posts - 120,  comments - 19,  trackbacks - 0
          import ?org.htmlparser.Node;
          import ?org.htmlparser.NodeFilter;
          import ?org.htmlparser.Parser;
          import ?org.htmlparser.filters.TagNameFilter;
          import ?org.htmlparser.tags.TableTag;
          import ?org.htmlparser.util.NodeList;

          /**
          ?*?<br>
          ?*?標題:?<br>
          ?*?功能概要:?<br>
          ?*?版權:?cityyouth.cn?(c)?2005?<br>
          ?*?公司:上海城市青年網?<br>
          ?*?創建時間:2005-12-21?<br>
          ?*?修改時間:?<br>
          ?*?修改原因:
          ?*?
          ?*?
          @author ?張偉
          ?*?
          @version ?1.0
          ?
          */
          public ? class ?TestYahoo?{
          ????
          public ? static ? void ?testHtml()?{
          ????????
          try ?{
          ????????????String?sCurrentLine;
          ????????????String?sTotalString;
          ????????????sCurrentLine?
          = ? "" ;
          ????????????sTotalString?
          = ? "" ;
          ????????????java.io.InputStream?l_urlStream;
          ????????????java.net.URL?l_url?
          = ? new ?java.net.URL(
          ????????????????????
          " http://sports.sina.com.cn/iframe/nba/live/ " );
          ????????????java.net.HttpURLConnection?l_connection?
          = ?(java.net.HttpURLConnection)?l_url
          ????????????????????.openConnection();
          ????????????l_connection.connect();
          ????????????l_urlStream?
          = ?l_connection.getInputStream();
          ????????????java.io.BufferedReader?l_reader?
          = ? new ?java.io.BufferedReader(
          ????????????????????
          new ?java.io.InputStreamReader(l_urlStream));
          ????????????
          while ?((sCurrentLine? = ?l_reader.readLine())? != ? null )?{
          ????????????????sTotalString?
          += ?sCurrentLine;
          ????????????}
          ????????????System.out.println(sTotalString);

          ????????????System.out.println(
          " ==================== " );
          ????????????String?testText?
          = ?extractText(sTotalString);
          ????????????System.out.println(testText);
          ????????}?
          catch ?(Exception?e)?{
          ????????????e.printStackTrace();
          ????????}

          ????}

          ????
          /**
          ?????*?抽取純文本信息
          ?????*?
          ?????*?
          @param ?inputHtml
          ?????*?
          @return
          ?????
          */
          ????
          public ? static ?String?extractText(String?inputHtml)? throws ?Exception?{
          ????????StringBuffer?text?
          = ? new ?StringBuffer();

          ????????Parser?parser?
          = ?Parser.createParser( new ?String(inputHtml.getBytes(),
          ????????????????
          " 8859_1 " ),? " 8859-1 " );
          ????????
          // ?遍歷所有的節點
          ????????NodeList?nodes? = ?parser.extractAllNodesThatMatch( new ?NodeFilter()?{
          ????????????
          public ? boolean ?accept(Node?node)?{
          ????????????????
          return ? true ;
          ????????????}
          ????????});
          ????????Node?node?
          = ?nodes.elementAt( 0 );
          ????????text.append(
          new ?String(node.toPlainTextString().getBytes( " 8859_1 " )));
          ????????
          return ?text.toString();
          ????}

          ????
          /**
          ?????*?讀取文件的方式來分析內容.?filePath也可以是一個Url.
          ?????*?
          ?????*?
          @param ?resource
          ?????*????????????文件/Url
          ?????
          */
          ????
          public ? static ? void ?test5(String?resource)? throws ?Exception?{
          ????????Parser?myParser?
          = ? new ?Parser(resource);

          ????????
          // ?設置編碼
          ????????myParser.setEncoding( " GBK " );
          ????????String?filterStr?
          = ? " table " ;
          ????????NodeFilter?filter?
          = ? new ?TagNameFilter(filterStr);
          ????????NodeList?nodeList?
          = ?myParser.extractAllNodesThatMatch(filter);
          ????????TableTag?tabletag?
          = ?(TableTag)?nodeList.elementAt( 11 );
          ????????????
          ????????????System.out.println(tabletag.toHtml());
          ????????????
          ????????????System.out.println(
          " ============== " );

          ????}

          ????
          /*
          ?????*?public?static?void?main(String[]?args)?{?TestYahoo?testYahoo?=?new
          ?????*?TestYahoo();?testYahoo.testHtml();?}
          ?????
          */
          ????
          public ? static ? void ?main(String[]?args)? throws ?Exception?{
          ????????test5(
          " http://sports.yahoo.com/nba/scoreboard " );
          ????}
          }


          posted on 2006-09-15 10:04 阿成 閱讀(3960) 評論(0)  編輯  收藏 所屬分類: Open source
          主站蜘蛛池模板: 抚顺市| 山阴县| 凤城市| 武强县| 龙泉市| 凤冈县| 太康县| 和顺县| 论坛| 苍梧县| 始兴县| 德昌县| 龙海市| 舞钢市| 绥德县| 封开县| 尼勒克县| 囊谦县| 赣榆县| 元谋县| 平山县| 泰和县| 拉萨市| 安新县| 肇东市| 揭阳市| 沙坪坝区| 汾阳市| 深泽县| 泰兴市| 厦门市| 文成县| 辽中县| 彰武县| 新宾| 怀来县| 广元市| 衡阳县| 云霄县| 英超| 嵩明县|