The NoteBook of EricKong

            BlogJava :: 首頁 :: 聯系 :: 聚合  :: 管理
            611 Posts :: 1 Stories :: 190 Comments :: 0 Trackbacks

          /* 輸出

          Original:       黃   彪彪

          to unicode:     \u9EC4\ \t\u5F6A\u5F6A

          from unicode:   黃   彪彪

          使用命令轉換: native2ascii -encoding utf-8 a.txt a.txt

          Java的properties屬性文件會把字符先轉換成unicode的形式存儲.

          */

           


          import java.io.UnsupportedEncodingException;

           


          public class UnicodeConverter {

              public static void main(String[] args) throws UnsupportedEncodingException {

                  String s = "黃 \t彪\u5F6A";

                  System.out.println("Original:\t\t" + s);

           


                  s = toEncodedUnicode(s, true);

                  System.out.println("to unicode:\t\t" + s);

           


                  s = fromEncodedUnicode(s.toCharArray(), 0, s.length());

                  System.out.println("from unicode:\t" + s);

              }

           


              private static final char[] hexDigit = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',

                      'B', 'C', 'D', 'E', 'F' };

           


              private static char toHex(int nibble) {

                  return hexDigit[(nibble & 0xF)];

              }

           


              /**

               * 將字符串編碼成 Unicode 形式的字符串. 如 "黃" to "\u9EC4"

               * Converts unicodes to encoded \\uxxxx and escapes

               * special characters with a preceding slash

               *

               * @param theString

               *        待轉換成Unicode編碼的字符串。

               * @param escapeSpace

               *        是否忽略空格,為true時在空格后面是否加個反斜杠。

               * @return 返回轉換后Unicode編碼的字符串。

               */

              public static String toEncodedUnicode(String theString, boolean escapeSpace) {

                  int len = theString.length();

                  int bufLen = len * 2;

                  if (bufLen < 0) {

                      bufLen = Integer.MAX_VALUE;

                  }

                  StringBuffer outBuffer = new StringBuffer(bufLen);

           


                  for (int x = 0; x < len; x++) {

                      char aChar = theString.charAt(x);

                      // Handle common case first, selecting largest block that

                      // avoids the specials below

                      if ((aChar > 61) && (aChar < 127)) {

                          if (aChar == '\\') {

                              outBuffer.append('\\');

                              outBuffer.append('\\');

                              continue;

                          }

                          outBuffer.append(aChar);

                          continue;

                      }

                     

                      switch (aChar) {

                      case ' ':

                          if (x == 0 || escapeSpace) outBuffer.append('\\');

                          outBuffer.append(' ');

                          break;

                      case '\t':

                          outBuffer.append('\\');

                          outBuffer.append('t');

                          break;

                      case '\n':

                          outBuffer.append('\\');

                          outBuffer.append('n');

                          break;

                      case '\r':

                          outBuffer.append('\\');

                          outBuffer.append('r');

                          break;

                      case '\f':

                          outBuffer.append('\\');

                          outBuffer.append('f');

                          break;

                      case '=': // Fall through

                      case ':': // Fall through

                      case '#': // Fall through

                      case '!':

                          outBuffer.append('\\');

                          outBuffer.append(aChar);

                          break;

                      default:

                          if ((aChar < 0x0020) || (aChar > 0x007e)) {

                              // 每個unicode有16位,每四位對應的16進制從高位保存到低位

                              outBuffer.append('\\');

                              outBuffer.append('u');

                              outBuffer.append(toHex((aChar >> 12) & 0xF));

                              outBuffer.append(toHex((aChar >> 8) & 0xF));

                              outBuffer.append(toHex((aChar >> 4) & 0xF));

                              outBuffer.append(toHex(aChar & 0xF));

                          } else {

                              outBuffer.append(aChar);

                          }

                      }

                  }

                  return outBuffer.toString();

              }

           


              /**

               * 從 Unicode 形式的字符串轉換成對應的編碼的特殊字符串。 如 "\u9EC4" to "黃".

               * Converts encoded \\uxxxx to unicode chars

               * and changes special saved chars to their original forms

               *

               * @param in

               *        Unicode編碼的字符數組。

               * @param off

               *        轉換的起始偏移量。

               * @param len

               *        轉換的字符長度。

               * @param convtBuf

               *        轉換的緩存字符數組。

               * @return 完成轉換,返回編碼前的特殊字符串。

               */

              public static String fromEncodedUnicode(char[] in, int off, int len) {

                  char aChar;

                  char[] out = new char[len]; // 只短不長

                  int outLen = 0;

                  int end = off + len;

           


                  while (off < end) {

                      aChar = in[off++];

                      if (aChar == '\\') {

                          aChar = in[off++];

                          if (aChar == 'u') {

                              // Read the xxxx

                              int value = 0;

                              for (int i = 0; i < 4; i++) {

                                  aChar = in[off++];

                                  switch (aChar) {

                                  case '0':

                                  case '1':

                                  case '2':

                                  case '3':

                                  case '4':

                                  case '5':

                                  case '6':

                                  case '7':

                                  case '8':

                                  case '9':

                                      value = (value << 4) + aChar - '0';

                                      break;

                                  case 'a':

                                  case 'b':

                                  case 'c':

                                  case 'd':

                                  case 'e':

                                  case 'f':

                                      value = (value << 4) + 10 + aChar - 'a';

                                      break;

                                  case 'A':

                                  case 'B':

                                  case 'C':

                                  case 'D':

                                  case 'E':

                                  case 'F':

                                      value = (value << 4) + 10 + aChar - 'A';

                                      break;

                                  default:

                                      throw new IllegalArgumentException("Malformed \\uxxxx encoding.");

                                  }

                              }

                              out[outLen++] = (char) value;

                          } else {

                              if (aChar == 't') {

                                  aChar = '\t';

                              } else if (aChar == 'r') {

                                  aChar = '\r';

                              } else if (aChar == 'n') {

                                  aChar = '\n';

                              } else if (aChar == 'f') {

                                  aChar = '\f';

                              }

                              out[outLen++] = aChar;

                          }

                      } else {

                          out[outLen++] = (char) aChar;

                      }

                  }

                  return new String(out, 0, outLen);

              }

          }

           

          posted on 2011-05-11 10:15 Eric_jiang 閱讀(14474) 評論(1)  編輯  收藏 所屬分類: Java

          Feedback

          # re: Java:轉換漢字為unicode形式的字符串和轉換unicode形式字符串轉換成漢字 2012-03-06 10:17 維尼老熊
          很有用,多謝  回復  更多評論
            

          主站蜘蛛池模板: 广南县| 河曲县| 武城县| 盱眙县| 四子王旗| 治多县| 奉化市| 垫江县| 汕尾市| 遂宁市| 商洛市| 永登县| 洱源县| 双桥区| 永胜县| 宁津县| 固安县| 基隆市| 武山县| 四平市| 邻水| 龙山县| 资溪县| 秀山| 琼海市| 临澧县| 托克托县| 仁布县| 清苑县| 清新县| 伊宁市| 利辛县| 合江县| 麻阳| 安图县| 广南县| 河池市| 革吉县| 叙永县| 成武县| 宝山区|