konhon

          忘掉過去,展望未來。找回自我,超越自我。
          逃避不一定躲的過, 面對不一定最難過, 孤單不一定不快樂, 得到不一定能長久, 失去不一定不再擁有, 可能因?yàn)槟硞€理由而傷心難過, 但我卻能找個理由讓自己快樂.

          Google

          BlogJava 首頁 新隨筆 聯(lián)系 聚合 管理
            203 Posts :: 0 Stories :: 61 Comments :: 0 Trackbacks

          ?

          import ?org.htmlparser.Node;
          import ?org.htmlparser.NodeFilter;
          import ?org.htmlparser.Parser;
          import ?org.htmlparser.filters.TagNameFilter;
          import ?org.htmlparser.tags.TableTag;
          import ?org.htmlparser.util.NodeList;

          /**
          ?*?<br>
          ?*?標(biāo)題:?<br>
          ?*?功能概要:?<br>
          ?*?版權(quán):?cityyouth.cn?(c)?2005?<br>
          ?*?公司:上海城市青年網(wǎng)?<br>
          ?*?創(chuàng)建時間:2005-12-21?<br>
          ?*?修改時間:?<br>
          ?*?修改原因:
          ?*?
          ?*?
          @author ?張偉
          ?*?
          @version ?1.0
          ?
          */
          public ? class ?TestYahoo?{
          ????
          public ? static ? void ?testHtml()?{
          ????????
          try ?{
          ????????????String?sCurrentLine;
          ????????????String?sTotalString;
          ????????????sCurrentLine?
          = ? "" ;
          ????????????sTotalString?
          = ? "" ;
          ????????????java.io.InputStream?l_urlStream;
          ????????????java.net.URL?l_url?
          = ? new ?java.net.URL(
          ????????????????????
          " http://sports.sina.com.cn/iframe/nba/live/ " );
          ????????????java.net.HttpURLConnection?l_connection?
          = ?(java.net.HttpURLConnection)?l_url
          ????????????????????.openConnection();
          ????????????l_connection.connect();
          ????????????l_urlStream?
          = ?l_connection.getInputStream();
          ????????????java.io.BufferedReader?l_reader?
          = ? new ?java.io.BufferedReader(
          ????????????????????
          new ?java.io.InputStreamReader(l_urlStream));
          ????????????
          while ?((sCurrentLine? = ?l_reader.readLine())? != ? null )?{
          ????????????????sTotalString?
          += ?sCurrentLine;
          ????????????}
          ????????????System.out.println(sTotalString);

          ????????????System.out.println(
          " ==================== " );
          ????????????String?testText?
          = ?extractText(sTotalString);
          ????????????System.out.println(testText);
          ????????}?
          catch ?(Exception?e)?{
          ????????????e.printStackTrace();
          ????????}

          ????}

          ????
          /**
          ?????*?抽取純文本信息
          ?????*?
          ?????*?
          @param ?inputHtml
          ?????*?
          @return
          ?????
          */
          ????
          public ? static ?String?extractText(String?inputHtml)? throws ?Exception?{
          ????????StringBuffer?text?
          = ? new ?StringBuffer();

          ????????Parser?parser?
          = ?Parser.createParser( new ?String(inputHtml.getBytes(),
          ????????????????
          " 8859_1 " ),? " 8859-1 " );
          ????????
          // ?遍歷所有的節(jié)點(diǎn)
          ????????NodeList?nodes? = ?parser.extractAllNodesThatMatch( new ?NodeFilter()?{
          ????????????
          public ? boolean ?accept(Node?node)?{
          ????????????????
          return ? true ;
          ????????????}
          ????????});
          ????????Node?node?
          = ?nodes.elementAt( 0 );
          ????????text.append(
          new ?String(node.toPlainTextString().getBytes( " 8859_1 " )));
          ????????
          return ?text.toString();
          ????}

          ????
          /**
          ?????*?讀取文件的方式來分析內(nèi)容.?filePath也可以是一個Url.
          ?????*?
          ?????*?
          @param ?resource
          ?????*????????????文件/Url
          ?????
          */
          ????
          public ? static ? void ?test5(String?resource)? throws ?Exception?{
          ????????Parser?myParser?
          = ? new ?Parser(resource);

          ????????
          // ?設(shè)置編碼
          ????????myParser.setEncoding( " GBK " );
          ????????String?filterStr?
          = ? " table " ;
          ????????NodeFilter?filter?
          = ? new ?TagNameFilter(filterStr);
          ????????NodeList?nodeList?
          = ?myParser.extractAllNodesThatMatch(filter);
          ????????TableTag?tabletag?
          = ?(TableTag)?nodeList.elementAt( 11 );
          ????????????
          ????????????System.out.println(tabletag.toHtml());
          ????????????
          ????????????System.out.println(
          " ============== " );

          ????}

          ????
          /*
          ?????*?public?static?void?main(String[]?args)?{?TestYahoo?testYahoo?=?new
          ?????*?TestYahoo();?testYahoo.testHtml();?}
          ?????
          */
          ????
          public ? static ? void ?main(String[]?args)? throws ?Exception?{
          ????????test5(
          " http://sports.yahoo.com/nba/scoreboard " );
          ????}
          }
          posted on 2007-03-08 06:50 konhon 優(yōu)華 閱讀(1597) 評論(0)  編輯  收藏

          只有注冊用戶登錄后才能發(fā)表評論。


          網(wǎng)站導(dǎo)航:
           
          主站蜘蛛池模板: 葫芦岛市| 尚义县| 晴隆县| 南和县| 奇台县| 车险| 尼勒克县| 汤原县| 滦平县| 双辽市| 伊吾县| 乃东县| 桂林市| 莆田市| 乌兰浩特市| 凤城市| 巴塘县| 准格尔旗| 皋兰县| 松溪县| 东城区| 平度市| 东兰县| 榆林市| 方正县| 开江县| 灵武市| 和田市| 凤山市| 房产| 浦东新区| 黑山县| 西乌珠穆沁旗| 舞钢市| 驻马店市| 定陶县| 呼伦贝尔市| 双城市| 乌鲁木齐县| 葵青区| 商南县|