注銷

/**
?*
?* Title: Document
?*
?* Description: 用正則表達式解析xml,目的是為了提高性能.
?*
?* Copyright: Copyright (c) 2005
?*
?* Company: org.axman
?*
?* @author :Axman
?* @version 1.0
?*/
public class Document {
? private String xmlString;

? /**
?? * 傳入xml的字符串內容,對于InputStream,Reader對象請轉換為String對象后傳入構造方法.
?? * @param xmlString String
?? * @throws IllegalArgumentException
?? */
? public Document(String xmlString) throws IllegalArgumentException{
??? if(xmlString == null || xmlString.length() == 0)
????? throw new IllegalArgumentException("Input string orrer!");
??? this.xmlString = xmlString;
? }

? /**
?? * 在文檔中搜索指定的元素,返回符合條件的元素數組.
?? * @param tagName String
?? * @return String[]
?? */
? public String[] getElementsByTag(String tagName){
??? Pattern p = Pattern.compile("<"+tagName+"[^>]*?((>.*?</"+tagName+">)|(/>))");
??? Matcher m = p.matcher(this.xmlString);
??? ArrayList<String> al = new ArrayList<String>();
??? while(m.find())
????? al.add(m.group());
??? String[] arr = al.toArray(new String[al.size()]);
??? al.clear();
??? return arr;
? }

? /**
?? * 用xpath模式提取元素,以#為分隔符
?? * 如 ROOT#PARENT#CHILD表示提取ROOT元素下的PARENT元素下的CHILD元素
?? * @param singlePath String
?? * @return String
?? */
? public String getElementBySinglePath(String singlePath){
??? String[] path = singlePath.split("#");
??? String lastTag = path[path.length-1];
??? String tmp = "(<"+lastTag+"[^>]*?((>.*?</"+lastTag+">)|(/>)))";
??????????????????????????????????????? //最后一個元素,可能是<x>v</x>形式或<x/>形式
??? for(int i=path.length-2;i >=0;i--){
????? lastTag = path[i];
????? tmp = "<"+lastTag+">.*"+tmp + ".*</"+lastTag+">";
??? }
??? Pattern p = Pattern.compile(tmp);
??? Matcher m = p.matcher(this.xmlString);
??? if(m.find()){
????? return m.group(1);
??? }
??? return "";
? }

? /**
?? * 用xpath模式提取元素從多重元素中獲取指批定元素,以#為分隔符
?? * 元素后無索引序號則默認為0: ROOT#PARENT[2]#CHILD[1]
?? * @param singlePath String
?? * @return String
?? */
? public String getElementByMultiPath(String singlePath){
??? try{
????? String[] path = singlePath.split("#");
????? String input = this.xmlString;
????? String[] ele = null;
????? for (int i = 0; i < path.length; i++) {
??????? Pattern p = Pattern.compile("(\\w+)(\\[(\\d+)\\])?");
??????? Matcher m = p.matcher(path[i]);
??????? if (m.find()) {
????????? String tagName = m.group(1);
????????? System.out.println(input + "----" + tagName);
????????? int index = (m.group(3) == null) ? 0 :
????????????? new Integer(m.group(3)).intValue();
????????? ele = getElementsByTag(input, tagName);
????????? input = ele[index];
??????? }
????? }
????? return input;
??? }catch(Exception e){
????? return null;
??? }
? }

? /**
?? * 在給定的元素中搜索指定的元素,返回符合條件的元素數組.對于不同級別的同名元素限制作用,即可以
?? * 搜索元素A中的子元素C.而對于元素B中子元素C則過慮,通過多級限定可以準確定位.
?? * @param parentElementString String
?? * @param tagName String
?? * @return String[]
?? */
? public static String[] getElementsByTag(String parentElementString,String tagName){
??? Pattern p = Pattern.compile("<"+tagName+"[^>]*?((>.*?</"+tagName+">)|(/>))");
??? Matcher m = p.matcher(parentElementString);
??? ArrayList<String> al = new ArrayList<String>();
??? while(m.find())
????? al.add(m.group());
??? String[] arr = al.toArray(new String[al.size()]);
??? al.clear();
??? return arr;
? }

? /**
?? * 從指定的父元素中根據xpath模式獲取子元素,singlePath以#為分隔符
?? * 如 ROOT#PARENT#CHILD表示提取ROOT元素下的PARENT元素下的CHILD元素
?? * @param parentElementString String
?? * @param singlePath String
?? * @return String
?? */
? public static String getElementBySinglePath(String parentElementString,String singlePath){
??? String[] path = singlePath.split("#");
??? String lastTag = path[path.length-1];
??? String tmp = "(<"+lastTag+"[^>]*?((>.*?</"+lastTag+">)|(/>)))";
??????????????????????????????????????? //最后一個元素,可能是<x>v</x>形式或<x/>形式
??? for(int i=path.length-2;i >=0;i--){
????? lastTag = path[i];
????? tmp = "<"+lastTag+">.*"+tmp + ".*</"+lastTag+">";
??? }
??? Pattern p = Pattern.compile(tmp);
??? Matcher m = p.matcher(parentElementString);
??? if(m.find()){
????? return m.group(1);
??? }
??? return "";
? }

? /**
?? * 用xpath模式提取元素從指定的多重元素中獲取指批定元素,以#為分隔符
?? * @param parentElementString String
?? * @param singlePath String
?? * @return String
?? */
? public static String getElementByMultiPath(String parentElementString,String singlePath){
??? try{
????? String[] path = singlePath.split("#");
????? String input = parentElementString;
????? String[] ele = null;
????? for (int i = 0; i < path.length; i++) {
??????? Pattern p = Pattern.compile("(\\w+)(\\[(\\d+)\\])?");
??????? Matcher m = p.matcher(path[i]);
??????? if (m.find()) {
????????? String tagName = m.group(1);
????????? int index = (m.group(3) == null) ? 0 :
????????????? new Integer(m.group(3)).intValue();
????????? ele = getElementsByTag(input, tagName);
????????? input = ele[index];
??????? }
????? }
????? return input;
??? }catch(Exception e){
????? return null;
??? }
? }

? /**
?? * 在給定的元素中獲取所有屬性的集合.該元素應該從getElementsByTag方法中獲取
?? * @param elementString String
?? * @return HashMap
?? */
? public HashMap<String,String> getAttributes(String elementString){
??? HashMap hm = new HashMap<String,String>();
??? Pattern p = Pattern.compile("<[^>]+>");
??? Matcher m = p.matcher(elementString);
??? String tmp = m.find()?m.group():"";
??? p = Pattern.compile("(\\w+)\\s*=\\s*\"([^\"]+)\"");
??? m = p.matcher(tmp);
??? while(m.find()){
????? hm.put(m.group(1).trim(),m.group(2).trim());
??? }
??? return hm;
? }

? /**
?? * 在給定的元素中獲取指定屬性的值.該元素應該從getElementsByTag方法中獲取
?? * @param elementString String
?? * @param attributeName String
?? * @return String
?? */
? public static String getAttribute(String elementString,String attributeName){
??? HashMap hm = new HashMap<String,String>();
??? Pattern p = Pattern.compile("<[^>]+>");
??? Matcher m = p.matcher(elementString);
??? String tmp = m.find()?m.group():"";
??? p = Pattern.compile("(\\w+)\\s*=\\s*\"([^\"]+)\"");
??? m = p.matcher(tmp);
??? while(m.find()){
????? if(m.group(1).trim().equals(attributeName))
??????? return m.group(2).trim();
??? }
??? return "";
? }

? /**
?? * 獲取指定元素的文本內容
?? * @param elementString String
?? * @return String
?? */
? public static String getElementText(String elementString){
??? Pattern p = Pattern.compile(">([^<>]*)<");
??? Matcher m = p.matcher(elementString);
??? if(m.find()){
????? return m.group(1);
??? }
??? return "";
? }

? public static void main(String[] args){
??? new Document("<ROOT>sss <PARENT>sss <CHILD>aaaa</CHILD>ss </PARENT>sss </ROOT>").getElementByMultiPath("ROOT[0]#PARENT#CHILD");
??? //System.out.println(child);
? }

}

posted on 2006-11-15 17:41 注銷..... 閱讀(1286) 評論(0) 編輯收藏所屬分類: .net摘要

新用戶注冊刷新評論列表


只有注冊用戶登錄后才能發表評論。




網站導航: 博客園 IT新聞 Chat2DB C++博客博問管理
相關文章: 串口操作使用異步客戶端套接字反編譯說說大型高并發高負載網站的系統架構資源文件國外C#開源系統一覽表 C#的前途如何？ vss教程 .net中客戶端驗證可以用微軟自帶的驗證控件動態加載DLL

注銷

常用鏈接

留言簿(2)

隨筆分類

隨筆檔案

文章分類

文章檔案

相冊

搜索

積分與排名

最新評論

閱讀排行榜

評論排行榜