konhon

          忘掉過去,展望未來。找回自我,超越自我。
          逃避不一定躲的過, 面對不一定最難過, 孤單不一定不快樂, 得到不一定能長久, 失去不一定不再擁有, 可能因為某個理由而傷心難過, 但我卻能找個理由讓自己快樂.

          Google

          BlogJava 首頁 新隨筆 聯(lián)系 聚合 管理
            203 Posts :: 0 Stories :: 61 Comments :: 0 Trackbacks

          ?

          import ?org.htmlparser.Node;
          import ?org.htmlparser.NodeFilter;
          import ?org.htmlparser.Parser;
          import ?org.htmlparser.filters.TagNameFilter;
          import ?org.htmlparser.tags.TableTag;
          import ?org.htmlparser.util.NodeList;

          /**
          ?*?<br>
          ?*?標題:?<br>
          ?*?功能概要:?<br>
          ?*?版權:?cityyouth.cn?(c)?2005?<br>
          ?*?公司:上海城市青年網(wǎng)?<br>
          ?*?創(chuàng)建時間:2005-12-21?<br>
          ?*?修改時間:?<br>
          ?*?修改原因:
          ?*?
          ?*?
          @author ?張偉
          ?*?
          @version ?1.0
          ?
          */
          public ? class ?TestYahoo?{
          ????
          public ? static ? void ?testHtml()?{
          ????????
          try ?{
          ????????????String?sCurrentLine;
          ????????????String?sTotalString;
          ????????????sCurrentLine?
          = ? "" ;
          ????????????sTotalString?
          = ? "" ;
          ????????????java.io.InputStream?l_urlStream;
          ????????????java.net.URL?l_url?
          = ? new ?java.net.URL(
          ????????????????????
          " http://sports.sina.com.cn/iframe/nba/live/ " );
          ????????????java.net.HttpURLConnection?l_connection?
          = ?(java.net.HttpURLConnection)?l_url
          ????????????????????.openConnection();
          ????????????l_connection.connect();
          ????????????l_urlStream?
          = ?l_connection.getInputStream();
          ????????????java.io.BufferedReader?l_reader?
          = ? new ?java.io.BufferedReader(
          ????????????????????
          new ?java.io.InputStreamReader(l_urlStream));
          ????????????
          while ?((sCurrentLine? = ?l_reader.readLine())? != ? null )?{
          ????????????????sTotalString?
          += ?sCurrentLine;
          ????????????}
          ????????????System.out.println(sTotalString);

          ????????????System.out.println(
          " ==================== " );
          ????????????String?testText?
          = ?extractText(sTotalString);
          ????????????System.out.println(testText);
          ????????}?
          catch ?(Exception?e)?{
          ????????????e.printStackTrace();
          ????????}

          ????}

          ????
          /**
          ?????*?抽取純文本信息
          ?????*?
          ?????*?
          @param ?inputHtml
          ?????*?
          @return
          ?????
          */
          ????
          public ? static ?String?extractText(String?inputHtml)? throws ?Exception?{
          ????????StringBuffer?text?
          = ? new ?StringBuffer();

          ????????Parser?parser?
          = ?Parser.createParser( new ?String(inputHtml.getBytes(),
          ????????????????
          " 8859_1 " ),? " 8859-1 " );
          ????????
          // ?遍歷所有的節(jié)點
          ????????NodeList?nodes? = ?parser.extractAllNodesThatMatch( new ?NodeFilter()?{
          ????????????
          public ? boolean ?accept(Node?node)?{
          ????????????????
          return ? true ;
          ????????????}
          ????????});
          ????????Node?node?
          = ?nodes.elementAt( 0 );
          ????????text.append(
          new ?String(node.toPlainTextString().getBytes( " 8859_1 " )));
          ????????
          return ?text.toString();
          ????}

          ????
          /**
          ?????*?讀取文件的方式來分析內容.?filePath也可以是一個Url.
          ?????*?
          ?????*?
          @param ?resource
          ?????*????????????文件/Url
          ?????
          */
          ????
          public ? static ? void ?test5(String?resource)? throws ?Exception?{
          ????????Parser?myParser?
          = ? new ?Parser(resource);

          ????????
          // ?設置編碼
          ????????myParser.setEncoding( " GBK " );
          ????????String?filterStr?
          = ? " table " ;
          ????????NodeFilter?filter?
          = ? new ?TagNameFilter(filterStr);
          ????????NodeList?nodeList?
          = ?myParser.extractAllNodesThatMatch(filter);
          ????????TableTag?tabletag?
          = ?(TableTag)?nodeList.elementAt( 11 );
          ????????????
          ????????????System.out.println(tabletag.toHtml());
          ????????????
          ????????????System.out.println(
          " ============== " );

          ????}

          ????
          /*
          ?????*?public?static?void?main(String[]?args)?{?TestYahoo?testYahoo?=?new
          ?????*?TestYahoo();?testYahoo.testHtml();?}
          ?????
          */
          ????
          public ? static ? void ?main(String[]?args)? throws ?Exception?{
          ????????test5(
          " http://sports.yahoo.com/nba/scoreboard " );
          ????}
          }
          posted on 2007-03-08 06:50 konhon 優(yōu)華 閱讀(1597) 評論(0)  編輯  收藏

          只有注冊用戶登錄后才能發(fā)表評論。


          網(wǎng)站導航:
           
          主站蜘蛛池模板: 石屏县| 金阳县| 监利县| 上饶市| 油尖旺区| 桑植县| 永福县| 南部县| 达州市| 阆中市| 赣州市| 黑河市| 昂仁县| 保定市| 合川市| 珠海市| 宣化县| 福安市| 平安县| 宁海县| 应城市| 旬阳县| 宣化县| 密山市| 临沭县| 株洲县| 遵义县| 疏附县| 乌海市| 舒兰市| 岳西县| 临邑县| 旌德县| 高州市| 长葛市| 卢氏县| 泗水县| 大冶市| 台湾省| 耒阳市| 五大连池市|