/*
html : html內容
identifier: 搜索標識
*/
private?Object[]?extractText(String?html,?String?identifier)

????????????throws?Exception?
{
????????List<String>?resultTextList?=?new?ArrayList<String>();

????????Parser?parser?=?new?Parser();
????????parser.setInputHTML(html);

????????NodeFilter?filter?=?new?HasAttributeFilter("class",?identifier);
?/* NodeFilter?filter 就是要解析的過濾器,實現有好多種,我采用的屬性過濾,其他more api*/
????????NodeList?nodeList?=?parser.extractAllNodesThatMatch(filter);
/* extractAllNodesThatAre(class)已經不被推薦使用,在1.6版本中,我感到更加體形了靈活性.更好的適用了自定義的tag */
????????if?(nodeList?==?null)
????????????return?null;
????????if(nodeList.size()?==?0)
????????????return?null;

????????//?System.out.println("start?==============?,size?=?"
????????//?+?nodeList.size());
????????Node[]?nodes?=?nodeList.toNodeArray();
????????String?line?=?"";

????????for?(int?i?=?0;?i?<?nodes.length;?i++)?
{
????????????Node?node?=?nodes[i];???/*得到所以符合的節點,類型化做對應的標簽類*/

????????if?(node?instanceof?Span)?
{
????????????????Span?spanTag?=?(Span)?node;
????????????????line?=?spanTag.toPlainTextString();

????????????}?else?if?(node?instanceof?TableColumn)?
{
????????????????TableColumn?tableTag?=?(TableColumn)?node;
????????????????line?=?tableTag.toPlainTextString();

????????????}?else?if?(node?instanceof?Div)?
{
????????????????Div?divTag?=?(Div)?node;
????????????????line?=?divTag.toPlainTextString();
????????????}

????????????if?(StringUtil.isTrimEmpty(line))
{
????????????????continue;

????????????}else
{
????????????????resultTextList.add(line);
????????????}
????????????
????????}
????????return?resultTextList.toArray();
????}
html : html內容
identifier: 搜索標識
*/

private?Object[]?extractText(String?html,?String?identifier)









?/* NodeFilter?filter 就是要解析的過濾器,實現有好多種,我采用的屬性過濾,其他more api*/

/* extractAllNodesThatAre(class)已經不被推薦使用,在1.6版本中,我感到更加體形了靈活性.更好的適用了自定義的tag */










































StringUtil 常用類












































posted on 2006-05-22 17:30 地獄男爵(hellboys) 閱讀(2900) 評論(1) 編輯 收藏 所屬分類: 編程語言(c/c++ java python sql ......)