?
import
?org.htmlparser.Node;
import ?org.htmlparser.NodeFilter;
import ?org.htmlparser.Parser;
import ?org.htmlparser.filters.TagNameFilter;
import ?org.htmlparser.tags.TableTag;
import ?org.htmlparser.util.NodeList;
/**
?*?<br>
?*?標題:?<br>
?*?功能概要:?<br>
?*?版權:?cityyouth.cn?(c)?2005?<br>
?*?公司:上海城市青年網(wǎng)?<br>
?*?創(chuàng)建時間:2005-12-21?<br>
?*?修改時間:?<br>
?*?修改原因:
?*?
?*? @author ?張偉
?*? @version ?1.0
? */
public ? class ?TestYahoo?{
???? public ? static ? void ?testHtml()?{
???????? try ?{
????????????String?sCurrentLine;
????????????String?sTotalString;
????????????sCurrentLine? = ? "" ;
????????????sTotalString? = ? "" ;
????????????java.io.InputStream?l_urlStream;
????????????java.net.URL?l_url? = ? new ?java.net.URL(
???????????????????? " http://sports.sina.com.cn/iframe/nba/live/ " );
????????????java.net.HttpURLConnection?l_connection? = ?(java.net.HttpURLConnection)?l_url
????????????????????.openConnection();
????????????l_connection.connect();
????????????l_urlStream? = ?l_connection.getInputStream();
????????????java.io.BufferedReader?l_reader? = ? new ?java.io.BufferedReader(
???????????????????? new ?java.io.InputStreamReader(l_urlStream));
???????????? while ?((sCurrentLine? = ?l_reader.readLine())? != ? null )?{
????????????????sTotalString? += ?sCurrentLine;
????????????}
????????????System.out.println(sTotalString);
????????????System.out.println( " ==================== " );
????????????String?testText? = ?extractText(sTotalString);
????????????System.out.println(testText);
????????}? catch ?(Exception?e)?{
????????????e.printStackTrace();
????????}
????}
???? /**
?????*?抽取純文本信息
?????*?
?????*? @param ?inputHtml
?????*? @return
????? */
???? public ? static ?String?extractText(String?inputHtml)? throws ?Exception?{
????????StringBuffer?text? = ? new ?StringBuffer();
????????Parser?parser? = ?Parser.createParser( new ?String(inputHtml.getBytes(),
???????????????? " 8859_1 " ),? " 8859-1 " );
???????? // ?遍歷所有的節(jié)點
????????NodeList?nodes? = ?parser.extractAllNodesThatMatch( new ?NodeFilter()?{
???????????? public ? boolean ?accept(Node?node)?{
???????????????? return ? true ;
????????????}
????????});
????????Node?node? = ?nodes.elementAt( 0 );
????????text.append( new ?String(node.toPlainTextString().getBytes( " 8859_1 " )));
???????? return ?text.toString();
????}
???? /**
?????*?讀取文件的方式來分析內容.?filePath也可以是一個Url.
?????*?
?????*? @param ?resource
?????*????????????文件/Url
????? */
???? public ? static ? void ?test5(String?resource)? throws ?Exception?{
????????Parser?myParser? = ? new ?Parser(resource);
???????? // ?設置編碼
????????myParser.setEncoding( " GBK " );
????????String?filterStr? = ? " table " ;
????????NodeFilter?filter? = ? new ?TagNameFilter(filterStr);
????????NodeList?nodeList? = ?myParser.extractAllNodesThatMatch(filter);
????????TableTag?tabletag? = ?(TableTag)?nodeList.elementAt( 11 );
????????????
????????????System.out.println(tabletag.toHtml());
????????????
????????????System.out.println( " ============== " );
????}
???? /*
?????*?public?static?void?main(String[]?args)?{?TestYahoo?testYahoo?=?new
?????*?TestYahoo();?testYahoo.testHtml();?}
????? */
???? public ? static ? void ?main(String[]?args)? throws ?Exception?{
????????test5( " http://sports.yahoo.com/nba/scoreboard " );
????}
}
import ?org.htmlparser.NodeFilter;
import ?org.htmlparser.Parser;
import ?org.htmlparser.filters.TagNameFilter;
import ?org.htmlparser.tags.TableTag;
import ?org.htmlparser.util.NodeList;
/**
?*?<br>
?*?標題:?<br>
?*?功能概要:?<br>
?*?版權:?cityyouth.cn?(c)?2005?<br>
?*?公司:上海城市青年網(wǎng)?<br>
?*?創(chuàng)建時間:2005-12-21?<br>
?*?修改時間:?<br>
?*?修改原因:
?*?
?*? @author ?張偉
?*? @version ?1.0
? */
public ? class ?TestYahoo?{
???? public ? static ? void ?testHtml()?{
???????? try ?{
????????????String?sCurrentLine;
????????????String?sTotalString;
????????????sCurrentLine? = ? "" ;
????????????sTotalString? = ? "" ;
????????????java.io.InputStream?l_urlStream;
????????????java.net.URL?l_url? = ? new ?java.net.URL(
???????????????????? " http://sports.sina.com.cn/iframe/nba/live/ " );
????????????java.net.HttpURLConnection?l_connection? = ?(java.net.HttpURLConnection)?l_url
????????????????????.openConnection();
????????????l_connection.connect();
????????????l_urlStream? = ?l_connection.getInputStream();
????????????java.io.BufferedReader?l_reader? = ? new ?java.io.BufferedReader(
???????????????????? new ?java.io.InputStreamReader(l_urlStream));
???????????? while ?((sCurrentLine? = ?l_reader.readLine())? != ? null )?{
????????????????sTotalString? += ?sCurrentLine;
????????????}
????????????System.out.println(sTotalString);
????????????System.out.println( " ==================== " );
????????????String?testText? = ?extractText(sTotalString);
????????????System.out.println(testText);
????????}? catch ?(Exception?e)?{
????????????e.printStackTrace();
????????}
????}
???? /**
?????*?抽取純文本信息
?????*?
?????*? @param ?inputHtml
?????*? @return
????? */
???? public ? static ?String?extractText(String?inputHtml)? throws ?Exception?{
????????StringBuffer?text? = ? new ?StringBuffer();
????????Parser?parser? = ?Parser.createParser( new ?String(inputHtml.getBytes(),
???????????????? " 8859_1 " ),? " 8859-1 " );
???????? // ?遍歷所有的節(jié)點
????????NodeList?nodes? = ?parser.extractAllNodesThatMatch( new ?NodeFilter()?{
???????????? public ? boolean ?accept(Node?node)?{
???????????????? return ? true ;
????????????}
????????});
????????Node?node? = ?nodes.elementAt( 0 );
????????text.append( new ?String(node.toPlainTextString().getBytes( " 8859_1 " )));
???????? return ?text.toString();
????}
???? /**
?????*?讀取文件的方式來分析內容.?filePath也可以是一個Url.
?????*?
?????*? @param ?resource
?????*????????????文件/Url
????? */
???? public ? static ? void ?test5(String?resource)? throws ?Exception?{
????????Parser?myParser? = ? new ?Parser(resource);
???????? // ?設置編碼
????????myParser.setEncoding( " GBK " );
????????String?filterStr? = ? " table " ;
????????NodeFilter?filter? = ? new ?TagNameFilter(filterStr);
????????NodeList?nodeList? = ?myParser.extractAllNodesThatMatch(filter);
????????TableTag?tabletag? = ?(TableTag)?nodeList.elementAt( 11 );
????????????
????????????System.out.println(tabletag.toHtml());
????????????
????????????System.out.println( " ============== " );
????}
???? /*
?????*?public?static?void?main(String[]?args)?{?TestYahoo?testYahoo?=?new
?????*?TestYahoo();?testYahoo.testHtml();?}
????? */
???? public ? static ? void ?main(String[]?args)? throws ?Exception?{
????????test5( " http://sports.yahoo.com/nba/scoreboard " );
????}
}