锘??xml version="1.0" encoding="utf-8" standalone="yes"?> UTF-8搴旇鏄彲浠ョ敤鏉ヨ〃紺轟腑鏂囩殑鍚э紵鎴戜笉鐭ラ亾榪欐槸涓嶆槸Jdom鐨勪竴涓狟UG錛圝dom 1.0錛宐eta浜?0嬈$殑浜х墿鍝︼紒錛夈傛垜google浜嗕竴涓嬶紝澶у瑙e喅榪欎釜闂鐨勫姙娉曟棤闈炴槸鎶奐dom鐨勮緭鍑哄瓧絎﹂泦鏀逛負GBK鎴栬匞B2312錛屼絾鏄繖鏍峰氨浼氭湁涓浜涘壇浣滅敤錛屽鏋滃湪娌℃湁鐗瑰畾瀛楃闆嗭紙GBK鎴栬匞B2312錛夌殑鎿嶄綔緋葷粺涓婁笉鏄緷鐒朵笉鑳芥紜В鏋愬悧錛熶竴涓瘮杈冨ソ鐨勮В鍐沖姙娉曟槸鍏堟妸涓枃杞崲鎴怳nicode緙栫爜鍦ㄧ洿鎺ヨ緭鍑猴紝紼嬪簭瑙f瀽XML鍚庣殑鏃跺欏啀鎶奤nicode緙栫爜杞洖涓枃灝辨病鏈夐棶棰樹簡銆?/p>
浜庢槸鎴戞煡鐪嬩簡JDK鐨勬枃妗o紝鎴嚦Java 5濂藉儚閮芥病鏈夊仛綾諱技杞崲鐨勭被鍙互鐩存帴浣跨敤錛屼絾鏄垜鍙戠幇涓涓被 java.util.Properties錛屽畠鐨勬簮浠g爜閲屾湁涓や釜縐佹湁錛坧rivate錛夋柟娉?loadConvert (char[] in, int off, int len, char[] convtBuf) 鍜?saveConvert(String theString, boolean escapeSpace) 鍏跺疄灝辨槸鍋氱壒孌婂瓧絎﹀拰Unicode緙栫爜瀛楃闂磋漿鎹㈢殑錛屾垜鎶婂畠浠彁鍙栧嚭鏉ワ紝鍗曠嫭鍖呰鍒頒竴涓被閲屽氨鍙互浣跨敤浜嗐?/p>
涓嬮潰鏄垜鍖呰鐨勭被 CharacterSetToolkit /* package mobi.chenwei.lang; /** 聽聽聽聽聽聽聽 for(int x=0; x<len; x++) { 聽聽聽聽聽聽聽 while (off < end) {
聽* CharacterSetToolkit.java
聽*
聽* Created on 2006騫?0鏈?7鏃? 涓嬪崍2:06
聽*
聽* To change this template, choose Tools | Template Manager
聽* and open the template in the editor.
聽*/
聽* 榪涜瀛楃鎿嶄綔鐨勫伐鍏風被
聽* @author Chen Wei
聽* @email chenwei.mobi@gmail.com
聽*/
public class CharacterSetToolkit {
聽聽聽
聽聽聽 /** Creates a new instance of CharacterSetToolkit */
聽聽聽 public CharacterSetToolkit() {
聽聽聽 }
聽聽聽
聽聽聽 private static final char[] hexDigit = {
聽聽聽聽聽聽聽 '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'
聽聽聽 };
聽聽聽
聽聽聽 private static char toHex(int nibble) {
聽聽聽聽聽聽聽 return hexDigit[(nibble & 0xF)];
聽聽聽 }
聽聽聽
聽聽聽 /**
聽聽聽聽 * 灝嗗瓧絎︿覆緙栫爜鎴?Unicode 銆?br />聽聽聽聽 * @param theString 寰呰漿鎹㈡垚Unicode緙栫爜鐨勫瓧絎︿覆銆?br />聽聽聽聽 * @param escapeSpace 鏄惁蹇界暐絀烘牸銆?br />聽聽聽聽 * @return 榪斿洖杞崲鍚嶶nicode緙栫爜鐨勫瓧絎︿覆銆?br />聽聽聽聽 */
聽聽聽 public static String toUnicode(String theString, boolean escapeSpace) {
聽聽聽聽聽聽聽 int len = theString.length();
聽聽聽聽聽聽聽 int bufLen = len * 2;
聽聽聽聽聽聽聽 if (bufLen < 0) {
聽聽聽聽聽聽聽聽聽聽聽 bufLen = Integer.MAX_VALUE;
聽聽聽聽聽聽聽 }
聽聽聽聽聽聽聽 StringBuffer outBuffer = new StringBuffer(bufLen);
聽聽聽聽聽聽聽聽聽聽聽 char aChar = theString.charAt(x);
聽聽聽聽聽聽聽聽聽聽聽 // Handle common case first, selecting largest block that
聽聽聽聽聽聽聽聽聽聽聽 // avoids the specials below
聽聽聽聽聽聽聽聽聽聽聽 if ((aChar > 61) && (aChar < 127)) {
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 if (aChar == '\\') {
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 outBuffer.append('\\'); outBuffer.append('\\');
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 continue;
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 }
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 outBuffer.append(aChar);
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 continue;
聽聽聽聽聽聽聽聽聽聽聽 }
聽聽聽聽聽聽聽聽聽聽聽 switch(aChar) {
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case ' ':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 if (x == 0 || escapeSpace)
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 outBuffer.append('\\');
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 outBuffer.append(' ');
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 break;
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '\t':outBuffer.append('\\'); outBuffer.append('t');
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 break;
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '\n':outBuffer.append('\\'); outBuffer.append('n');
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 break;
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '\r':outBuffer.append('\\'); outBuffer.append('r');
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 break;
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '\f':outBuffer.append('\\'); outBuffer.append('f');
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 break;
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '=': // Fall through
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case ':': // Fall through
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '#': // Fall through
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '!':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 outBuffer.append('\\'); outBuffer.append(aChar);
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 break;
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 default:
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 if ((aChar < 0x0020) || (aChar > 0x007e)) {
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 outBuffer.append('\\');
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 outBuffer.append('u');
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 outBuffer.append(toHex((aChar >> 12) & 0xF));
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 outBuffer.append(toHex((aChar >>聽 8) & 0xF));
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 outBuffer.append(toHex((aChar >>聽 4) & 0xF));
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 outBuffer.append(toHex( aChar聽聽聽聽聽聽聽 & 0xF));
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 } else {
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 outBuffer.append(aChar);
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 }
聽聽聽聽聽聽聽聽聽聽聽 }
聽聽聽聽聽聽聽 }
聽聽聽聽聽聽聽 return outBuffer.toString();
聽聽聽 }
聽聽聽
聽聽聽 /**
聽聽聽聽 * 浠?Unicode 鐮佽漿鎹㈡垚緙栫爜鍓嶇殑鐗規(guī)畩瀛楃涓層?br />聽聽聽聽 * @param in Unicode緙栫爜鐨勫瓧絎︽暟緇勩?br />聽聽聽聽 * @param off 杞崲鐨勮搗濮嬪亸縐婚噺銆?br />聽聽聽聽 * @param len 杞崲鐨勫瓧絎﹂暱搴︺?br />聽聽聽聽 * @param convtBuf 杞崲鐨勭紦瀛樺瓧絎︽暟緇勩?br />聽聽聽聽 * @return 瀹屾垚杞崲錛岃繑鍥炵紪鐮佸墠鐨勭壒孌婂瓧絎︿覆銆?br />聽聽聽聽 */
聽聽聽 public String fromUnicode(char[] in, int off, int len, char[] convtBuf) {
聽聽聽聽聽聽聽 if (convtBuf.length < len) {
聽聽聽聽聽聽聽聽聽聽聽 int newLen = len * 2;
聽聽聽聽聽聽聽聽聽聽聽 if (newLen < 0) {
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 newLen = Integer.MAX_VALUE;
聽聽聽聽聽聽聽聽聽聽聽 }
聽聽聽聽聽聽聽聽聽聽聽 convtBuf = new char[newLen];
聽聽聽聽聽聽聽 }
聽聽聽聽聽聽聽 char aChar;
聽聽聽聽聽聽聽 char[] out = convtBuf;
聽聽聽聽聽聽聽 int outLen = 0;
聽聽聽聽聽聽聽 int end = off + len;
聽聽聽聽聽聽聽聽聽聽聽 aChar = in[off++];
聽聽聽聽聽聽聽聽聽聽聽 if (aChar == '\\') {
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 aChar = in[off++];
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 if (aChar == 'u') {
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 // Read the xxxx
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 int value = 0;
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 for (int i = 0; i < 4; i++) {
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 aChar = in[off++];
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 switch (aChar) {
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '0':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '1':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '2':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '3':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '4':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '5':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '6':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '7':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '8':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case '9':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 value = (value << 4) + aChar - '0';
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 break;
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case 'a':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case 'b':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case 'c':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case 'd':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case 'e':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case 'f':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 value = (value << 4) + 10 + aChar - 'a';
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 break;
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case 'A':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case 'B':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case 'C':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case 'D':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case 'E':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 case 'F':
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 value = (value << 4) + 10 + aChar - 'A';
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 break;
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 default:
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 throw new IllegalArgumentException(
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 "Malformed \\uxxxx encoding.");
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 }
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 }
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 out[outLen++] = (char) value;
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 } else {
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 if (aChar == 't') {
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 aChar = '\t';
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 } else if (aChar == 'r') {
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 aChar = '\r';
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 } else if (aChar == 'n') {
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 aChar = '\n';
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 } else if (aChar == 'f') {
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 aChar = '\f';
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 }
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 out[outLen++] = aChar;
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 }
聽聽聽聽聽聽聽聽聽聽聽 } else {
聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 out[outLen++] = (char) aChar;
聽聽聽聽聽聽聽聽聽聽聽 }
聽聽聽聽聽聽聽 }
聽聽聽聽聽聽聽 return new String(out, 0, outLen);
聽聽聽 }
}
]]>
// 鍗佸叚榪涘埗杞寲涓哄崄榪涘埗錛岀粨鏋?40銆?br />
Integer.parseInt("8C",16);
]]>