本站不再更新,歡迎光臨 java開發(fā)技術(shù)網(wǎng)
          隨筆-230  評論-230  文章-8  trackbacks-0

          把最近工作中所寫的代碼貼一下,以備后用,如能給予你幫助我萬分高興

          package com.easylotto.omas.util;


          import java.util.*;
          import java.io.*;
          import org.apache.commons.lang.*;
          /**
           * 替換HTMl里面的字符 e.g.: < > " å И 水
           * 
           * 
          @author 趙學(xué)慶
           * modify peidw 2008-06-20
           * 
           
          */
          public class HTMLDecoder {

            
          public static final HashMap<String, Character> charTable;

            
          public static String decode(String s) {
              String t;
              Character ch;
              
          int tmpPos, i;

              
          int maxPos = s.length();
              StringBuffer sb 
          = new StringBuffer(maxPos);
              
          int curPos = 0;
              
          while (curPos < maxPos) {
                
          char c = s.charAt(curPos++);
                
          if (c == '&') {
                  tmpPos 
          = curPos;
                  
          if (tmpPos < maxPos) {
                    
          char d = s.charAt(tmpPos++);
                    
          if (d == '#') {
                      
          if (tmpPos < maxPos) {
                        d 
          = s.charAt(tmpPos++);
                        
          if ((d == 'x'|| (d == 'X')) {
                          
          if (tmpPos < maxPos) {
                            d 
          = s.charAt(tmpPos++);
                            
          if (isHexDigit(d)) {
                              
          while (tmpPos < maxPos) {
                                d 
          = s.charAt(tmpPos++);
                                
          if (!isHexDigit(d)) {
                                  
          if (d == ';') {
                                    t 
          = s.substring(curPos + 2, tmpPos - 1);
                                    
          try {
                                      i 
          = Integer.parseInt(t, 16);
                                      
          if ((i >= 0&& (i < 65536)) {
                                        c 
          = (char) i;
                                        curPos 
          = tmpPos;
                                      }
                                    } 
          catch (NumberFormatException e) {
                                    }
                                  }
                                  
          break;
                                }
                              }
                            }
                          }
                        } 
          else if (isDigit(d)) {
                          
          while (tmpPos < maxPos) {
                            d 
          = s.charAt(tmpPos++);
                            
          if (!isDigit(d)) {
                              
          if (d == ';') {
                                t 
          = s.substring(curPos + 1, tmpPos - 1);
                                
          try {
                                  i 
          = Integer.parseInt(t);
                                  
          if ((i >= 0&& (i < 65536)) {
                                    c 
          = (char) i;
                                    curPos 
          = tmpPos;
                                  }
                                } 
          catch (NumberFormatException e) {
                                }
                              }
                              
          break;
                            }
                          }
                        }
                      }
                    } 
          else if (isLetter(d)) {
                      
          while (tmpPos < maxPos) {
                        d 
          = s.charAt(tmpPos++);
                        
          if (!isLetterOrDigit(d)) {
                          
          if (d == ';') {
                            t 
          = s.substring(curPos, tmpPos - 1);
                  
                            ch 
          = (Character) charTable.get(t);
                            
          if (ch != null) {
                              c 
          = ch.charValue();
                              curPos 
          = tmpPos;
                            }
                          }
                          
          break;
                        }
                      }
                    }
                  }
                }
                sb.append(c);
              }
              
          return sb.toString();
            }

            
          private static boolean isLetterOrDigit(char c) {
              
          return isLetter(c) || isDigit(c);
            }

            
          private static boolean isHexDigit(char c) {
              
          return isHexLetter(c) || isDigit(c);
            }

            
          private static boolean isLetter(char c) {
              
          return ((c >= 'a'&& (c <= 'z')) || ((c >= 'A'&& (c <= 'Z'));
            }

            
          private static boolean isHexLetter(char c) {
              
          return ((c >= 'a'&& (c <= 'f')) || ((c >= 'A'&& (c <= 'F'));
            }

            
          private static boolean isDigit(char c) {
              
          return (c >= '0'&& (c <= '9');
            }

            
          public static String compact(String s) {
              
          int maxPos = s.length();
              StringBuffer sb 
          = new StringBuffer(maxPos);
              
          int curPos = 0;
              
          while (curPos < maxPos) {
                
          char c = s.charAt(curPos++);
                
          if (isWhitespace(c)) {
                  
          while ((curPos < maxPos) && isWhitespace(s.charAt(curPos))) {
                    curPos
          ++;
                  }
                  c 
          = '\u0020';
                }
                sb.append(c);
              }
              
          return sb.toString();
            }

            
          // HTML is very particular about what constitutes white space.
            public static boolean isWhitespace(char ch) {
              
          return (ch == '\u0020'|| (ch == '\r'|| (ch == '\n'|| (ch == '\u0009'|| (ch == '\u000c'|| (ch == '\u200b');
            }

            
          static {
              charTable 
          = new HashMap<String, Character>();
              charTable.put(
          "quot"new Character((char34));
              charTable.put(
          "amp"new Character((char38));
              charTable.put(
          "apos"new Character((char39));
              charTable.put(
          "lt"new Character((char60));
              charTable.put(
          "gt"new Character((char62));
              charTable.put(
          "nbsp"new Character((char32));
              charTable.put(
          "iexcl"new Character((char161));
              charTable.put(
          "cent"new Character((char162));
              charTable.put(
          "pound"new Character((char163));
              charTable.put(
          "curren"new Character((char164));
              charTable.put(
          "yen"new Character((char165));
              charTable.put(
          "brvbar"new Character((char166));
              charTable.put(
          "sect"new Character((char167));
              charTable.put(
          "uml"new Character((char168));
              charTable.put(
          "copy"new Character((char169));
              charTable.put(
          "ordf"new Character((char170));
              charTable.put(
          "laquo"new Character((char171));
              charTable.put(
          "not"new Character((char172));
              charTable.put(
          "shy"new Character((char173));
              charTable.put(
          "reg"new Character((char174));
              charTable.put(
          "macr"new Character((char175));
              charTable.put(
          "deg"new Character((char176));
              charTable.put(
          "plusmn"new Character((char177));
              charTable.put(
          "sup2"new Character((char178));
              charTable.put(
          "sup3"new Character((char179));
              charTable.put(
          "acute"new Character((char180));
              charTable.put(
          "micro"new Character((char181));
              charTable.put(
          "para"new Character((char182));
              charTable.put(
          "middot"new Character((char183));
              charTable.put(
          "cedil"new Character((char184));
              charTable.put(
          "sup1"new Character((char185));
              charTable.put(
          "ordm"new Character((char186));
              charTable.put(
          "raquo"new Character((char187));
              charTable.put(
          "frac14"new Character((char188));
              charTable.put(
          "frac12"new Character((char189));
              charTable.put(
          "frac34"new Character((char190));
              charTable.put(
          "iquest"new Character((char191));
              charTable.put(
          "Agrave"new Character((char192));
              charTable.put(
          "Aacute"new Character((char193));
              charTable.put(
          "Acirc"new Character((char194));
              charTable.put(
          "Atilde"new Character((char195));
              charTable.put(
          "Auml"new Character((char196));
              charTable.put(
          "Aring"new Character((char197));
              charTable.put(
          "AElig"new Character((char198));
              charTable.put(
          "Ccedil"new Character((char199));
              charTable.put(
          "Egrave"new Character((char200));
              charTable.put(
          "Eacute"new Character((char201));
              charTable.put(
          "Ecirc"new Character((char202));
              charTable.put(
          "Euml"new Character((char203));
              charTable.put(
          "Igrave"new Character((char204));
              charTable.put(
          "Iacute"new Character((char205));
              charTable.put(
          "Icirc"new Character((char206));
              charTable.put(
          "Iuml"new Character((char207));
              charTable.put(
          "ETH"new Character((char208));
              charTable.put(
          "Ntilde"new Character((char209));
              charTable.put(
          "Ograve"new Character((char210));
              charTable.put(
          "Oacute"new Character((char211));
              charTable.put(
          "Ocirc"new Character((char212));
              charTable.put(
          "Otilde"new Character((char213));
              charTable.put(
          "Ouml"new Character((char214));
              charTable.put(
          "times"new Character((char215));
              charTable.put(
          "Oslash"new Character((char216));
              charTable.put(
          "Ugrave"new Character((char217));
              charTable.put(
          "Uacute"new Character((char218));
              charTable.put(
          "Ucirc"new Character((char219));
              charTable.put(
          "Uuml"new Character((char220));
              charTable.put(
          "Yacute"new Character((char221));
              charTable.put(
          "THORN"new Character((char222));
              charTable.put(
          "szlig"new Character((char223));
              charTable.put(
          "agrave"new Character((char224));
              charTable.put(
          "aacute"new Character((char225));
              charTable.put(
          "acirc"new Character((char226));
              charTable.put(
          "atilde"new Character((char227));
              charTable.put(
          "auml"new Character((char228));
              charTable.put(
          "aring"new Character((char229));
              charTable.put(
          "aelig"new Character((char230));
              charTable.put(
          "ccedil"new Character((char231));
              charTable.put(
          "egrave"new Character((char232));
              charTable.put(
          "eacute"new Character((char233));
              charTable.put(
          "ecirc"new Character((char234));
              charTable.put(
          "euml"new Character((char235));
              charTable.put(
          "igrave"new Character((char236));
              charTable.put(
          "iacute"new Character((char237));
              charTable.put(
          "icirc"new Character((char238));
              charTable.put(
          "iuml"new Character((char239));
              charTable.put(
          "eth"new Character((char240));
              charTable.put(
          "ntilde"new Character((char241));
              charTable.put(
          "ograve"new Character((char242));
              charTable.put(
          "oacute"new Character((char243));
              charTable.put(
          "ocirc"new Character((char244));
              charTable.put(
          "otilde"new Character((char245));
              charTable.put(
          "ouml"new Character((char246));
              charTable.put(
          "divide"new Character((char247));
              charTable.put(
          "oslash"new Character((char248));
              charTable.put(
          "ugrave"new Character((char249));
              charTable.put(
          "uacute"new Character((char250));
              charTable.put(
          "ucirc"new Character((char251));
              charTable.put(
          "uuml"new Character((char252));
              charTable.put(
          "yacute"new Character((char253));
              charTable.put(
          "thorn"new Character((char254));
              charTable.put(
          "yuml"new Character((char255));
              charTable.put(
          "OElig"new Character((char338));
              charTable.put(
          "oelig"new Character((char339));
              charTable.put(
          "Scaron"new Character((char352));
              charTable.put(
          "scaron"new Character((char353));
              charTable.put(
          "fnof"new Character((char402));
              charTable.put(
          "circ"new Character((char710));
              charTable.put(
          "tilde"new Character((char732));
              charTable.put(
          "Alpha"new Character((char913));
              charTable.put(
          "Beta"new Character((char914));
              charTable.put(
          "Gamma"new Character((char915));
              charTable.put(
          "Delta"new Character((char916));
              charTable.put(
          "Epsilon"new Character((char917));
              charTable.put(
          "Zeta"new Character((char918));
              charTable.put(
          "Eta"new Character((char919));
              charTable.put(
          "Theta"new Character((char920));
              charTable.put(
          "Iota"new Character((char921));
              charTable.put(
          "Kappa"new Character((char922));
              charTable.put(
          "Lambda"new Character((char923));
              charTable.put(
          "Mu"new Character((char924));
              charTable.put(
          "Nu"new Character((char925));
              charTable.put(
          "Xi"new Character((char926));
              charTable.put(
          "Omicron"new Character((char927));
              charTable.put(
          "Pi"new Character((char928));
              charTable.put(
          "Rho"new Character((char929));
              charTable.put(
          "Sigma"new Character((char931));
              charTable.put(
          "Tau"new Character((char932));
              charTable.put(
          "Upsilon"new Character((char933));
              charTable.put(
          "Phi"new Character((char934));
              charTable.put(
          "Chi"new Character((char935));
              charTable.put(
          "Psi"new Character((char936));
              charTable.put(
          "Omega"new Character((char937));
              charTable.put(
          "alpha"new Character((char945));
              charTable.put(
          "beta"new Character((char946));
              charTable.put(
          "gamma"new Character((char947));
              charTable.put(
          "delta"new Character((char948));
              charTable.put(
          "epsilon"new Character((char949));
              charTable.put(
          "zeta"new Character((char950));
              charTable.put(
          "eta"new Character((char951));
              charTable.put(
          "theta"new Character((char952));
              charTable.put(
          "iota"new Character((char953));
              charTable.put(
          "kappa"new Character((char954));
              charTable.put(
          "lambda"new Character((char955));
              charTable.put(
          "mu"new Character((char956));
              charTable.put(
          "nu"new Character((char957));
              charTable.put(
          "xi"new Character((char958));
              charTable.put(
          "omicron"new Character((char959));
              charTable.put(
          "pi"new Character((char960));
              charTable.put(
          "rho"new Character((char961));
              charTable.put(
          "sigmaf"new Character((char962));
              charTable.put(
          "sigma"new Character((char963));
              charTable.put(
          "tau"new Character((char964));
              charTable.put(
          "upsilon"new Character((char965));
              charTable.put(
          "phi"new Character((char966));
              charTable.put(
          "chi"new Character((char967));
              charTable.put(
          "psi"new Character((char968));
              charTable.put(
          "omega"new Character((char969));
              charTable.put(
          "thetasym"new Character((char977));
              charTable.put(
          "upsih"new Character((char978));
              charTable.put(
          "piv"new Character((char982));
              charTable.put(
          "ensp"new Character((char8194));
              charTable.put(
          "emsp"new Character((char8195));
              charTable.put(
          "thinsp"new Character((char8201));
              charTable.put(
          "zwnj"new Character((char8204));
              charTable.put(
          "zwj"new Character((char8205));
              charTable.put(
          "lrm"new Character((char8206));
              charTable.put(
          "rlm"new Character((char8207));
              charTable.put(
          "ndash"new Character((char8211));
              charTable.put(
          "mdash"new Character((char8212));
              charTable.put(
          "lsquo"new Character((char8216));
              charTable.put(
          "rsquo"new Character((char8217));
              charTable.put(
          "sbquo"new Character((char8218));
              charTable.put(
          "ldquo"new Character((char8220));
              charTable.put(
          "rdquo"new Character((char8221));
              charTable.put(
          "bdquo"new Character((char8222));
              charTable.put(
          "dagger"new Character((char8224));
              charTable.put(
          "Dagger"new Character((char8225));
              charTable.put(
          "bull"new Character((char8226));
              charTable.put(
          "hellip"new Character((char8230));
              charTable.put(
          "permil"new Character((char8240));
              charTable.put(
          "prime"new Character((char8242));
              charTable.put(
          "Prime"new Character((char8243));
              charTable.put(
          "lsaquo"new Character((char8249));
              charTable.put(
          "rsaquo"new Character((char8250));
              charTable.put(
          "oline"new Character((char8254));
              charTable.put(
          "frasl"new Character((char8260));
              charTable.put(
          "euro"new Character((char8364));
              charTable.put(
          "image"new Character((char8465));
              charTable.put(
          "weierp"new Character((char8472));
              charTable.put(
          "real"new Character((char8476));
              charTable.put(
          "trade"new Character((char8482));
              charTable.put(
          "alefsym"new Character((char8501));
              charTable.put(
          "larr"new Character((char8592));
              charTable.put(
          "uarr"new Character((char8593));
              charTable.put(
          "rarr"new Character((char8594));
              charTable.put(
          "darr"new Character((char8595));
              charTable.put(
          "harr"new Character((char8596));
              charTable.put(
          "crarr"new Character((char8629));
              charTable.put(
          "lArr"new Character((char8656));
              charTable.put(
          "uArr"new Character((char8657));
              charTable.put(
          "rArr"new Character((char8658));
              charTable.put(
          "dArr"new Character((char8659));
              charTable.put(
          "hArr"new Character((char8660));
              charTable.put(
          "forall"new Character((char8704));
              charTable.put(
          "part"new Character((char8706));
              charTable.put(
          "exist"new Character((char8707));
              charTable.put(
          "empty"new Character((char8709));
              charTable.put(
          "nabla"new Character((char8711));
              charTable.put(
          "isin"new Character((char8712));
              charTable.put(
          "notin"new Character((char8713));
              charTable.put(
          "ni"new Character((char8715));
              charTable.put(
          "prod"new Character((char8719));
              charTable.put(
          "sum"new Character((char8721));
              charTable.put(
          "minus"new Character((char8722));
              charTable.put(
          "lowast"new Character((char8727));
              charTable.put(
          "radic"new Character((char8730));
              charTable.put(
          "prop"new Character((char8733));
              charTable.put(
          "infin"new Character((char8734));
              charTable.put(
          "ang"new Character((char8736));
              charTable.put(
          "and"new Character((char8743));
              charTable.put(
          "or"new Character((char8744));
              charTable.put(
          "cap"new Character((char8745));
              charTable.put(
          "cup"new Character((char8746));
              charTable.put(
          "int"new Character((char8747));
              charTable.put(
          "there4"new Character((char8756));
              charTable.put(
          "sim"new Character((char8764));
              charTable.put(
          "cong"new Character((char8773));
              charTable.put(
          "asymp"new Character((char8776));
              charTable.put(
          "ne"new Character((char8800));
              charTable.put(
          "equiv"new Character((char8801));
              charTable.put(
          "le"new Character((char8804));
              charTable.put(
          "ge"new Character((char8805));
              charTable.put(
          "sub"new Character((char8834));
              charTable.put(
          "sup"new Character((char8835));
              charTable.put(
          "nsub"new Character((char8836));
              charTable.put(
          "sube"new Character((char8838));
              charTable.put(
          "supe"new Character((char8839));
              charTable.put(
          "oplus"new Character((char8853));
              charTable.put(
          "otimes"new Character((char8855));
              charTable.put(
          "perp"new Character((char8869));
              charTable.put(
          "sdot"new Character((char8901));
              charTable.put(
          "lceil"new Character((char8968));
              charTable.put(
          "rceil"new Character((char8969));
              charTable.put(
          "lfloor"new Character((char8970));
              charTable.put(
          "rfloor"new Character((char8971));
              charTable.put(
          "lang"new Character((char9001));
              charTable.put(
          "rang"new Character((char9002));
              charTable.put(
          "loz"new Character((char9674));
              charTable.put(
          "spades"new Character((char9824));
              charTable.put(
          "clubs"new Character((char9827));
              charTable.put(
          "hearts"new Character((char9829));
              charTable.put(
          "diams"new Character((char9830));
            }
            
            
          public static void main(String[]args)throws Exception{
                StringBuffer sb
          =new StringBuffer();
                File file
          =new File("e:\\temp\\統(tǒng)計表.htm");
                InputStream is
          =new FileInputStream(file);
                BufferedReader br
          =new BufferedReader(new InputStreamReader(is));
                String str
          =null;
                
          while((str=br.readLine())!=null){
                    sb.append(str);
                }
                String tmp
          =decode(sb.toString());
                tmp
          =StringUtils.replace(tmp, "?""");
                System.out.println(tmp);
                
            }
            
            
          }
          posted on 2008-06-19 11:33 有貓相伴的日子 閱讀(4037) 評論(3)  編輯  收藏 所屬分類: jdk

          評論:
          # re: unicode 編碼轉(zhuǎn)換成漢字 2008-10-31 22:13 | wuzhikun
          hao   回復(fù)  更多評論
            
          # re: unicode 編碼轉(zhuǎn)換成漢字 2008-10-31 22:14 | wuzhikun
          yao   回復(fù)  更多評論
            
          # re: unicode 編碼轉(zhuǎn)換成漢字[未登錄] 2012-08-23 23:25 | 菜鳥
          不錯,正用得上,萬分感謝!  回復(fù)  更多評論
            
          本站不再更新,歡迎光臨 java開發(fā)技術(shù)網(wǎng)
          主站蜘蛛池模板: 麻城市| 吉安县| 贡觉县| 梁平县| 宣城市| 永平县| 大姚县| 彭阳县| 松滋市| 波密县| 鹿泉市| 蓬莱市| 眉山市| 钦州市| 洛川县| 耒阳市| 长顺县| 芦山县| 凤阳县| 永定县| 台东县| 防城港市| 东阿县| 乐清市| 聂荣县| 安阳市| 秦皇岛市| 尖扎县| 会同县| 苗栗市| 治县。| 时尚| 南丰县| 三台县| 深水埗区| 元氏县| 尚义县| 南通市| 乐昌市| 延边| 来宾市|