Blogger Scott

          一個utf8轉換程序

          據說是一個通用的手機上使用的UTF8轉換程序,先記下來。

           1private final String readUnicodeFileUTF8(String filename) {
           2        StringBuffer sb = new StringBuffer(256);
           3        try {
           4            int[] surrogatePair = new int[2];
           5            InputStream is = this.getClass().getResourceAsStream(filename);
           6
           7            int val = 0;
           8            int unicharCount = 0;
           9            while ((val = readNextCharFromStreamUTF8(is))!=-1{
          10                unicharCount++;
          11                if (val <= 0xFFFF{
          12                    // if first value is the Byte Order Mark (BOM), do not add
          13                    if (! (unicharCount == 1 && val == 0xFEFF)) {
          14                        sb.append((char)val);
          15                    }

          16                }
           else {
          17                    supplementCodePointToSurrogatePair(val, surrogatePair);
          18                    sb.append((char)surrogatePair[0]);
          19                    sb.append((char)surrogatePair[1]);
          20                }

          21            }

          22            is.close();
          23        }
           catch (Exception e) {};
          24
          25        return new String(sb);
          26    }

          27   
          28    private final static int readNextCharFromStreamUTF8(InputStream is) {
          29        int c = -1;
          30        if (is==nullreturn c;
          31        boolean complete = false;
          32       
          33        try {
          34            int byteVal;
          35            int expecting=0;
          36            int composedVal=0;
          37           
          38            while (!complete && (byteVal = is.read()) != -1{
          39                if (expecting > 0 && (byteVal & 0xC0== 0x80{  /* 10xxxxxx */
          40                    expecting--;
          41                    composedVal = composedVal | ((byteVal & 0x3F<< (expecting*6));
          42                    if (expecting == 0{
          43                        c = composedVal;
          44                        complete = true;
          45                        //System.out.println("appending: U+" + Integer.toHexString(composedVal) );
          46                    }

          47                }
           else {
          48                    composedVal = 0;
          49                    expecting = 0;
          50                    if ((byteVal & 0x80== 0{    /* 0xxxxxxx */
          51                        // one byte character, no extending byte expected
          52                        c = byteVal;
          53                        complete = true;
          54                        //System.out.println("appending: U+" + Integer.toHexString(byteVal) );
          55                    }
           else if ((byteVal & 0xE0== 0xC0{  /* 110xxxxx */
          56                        expecting = 1;  // expecting 1 extending byte
          57                        composedVal = ((byteVal & 0x1F<< 6);
          58                    }
           else if ((byteVal & 0xF0== 0xE0{  /* 1110xxxx */
          59                        expecting = 2;  // expecting 2 extending bytes
          60                        composedVal = ((byteVal & 0x0F<< 12);
          61                    }
           else if ((byteVal & 0xF8== 0xF0{  /* 11110xxx */
          62                        expecting = 3;  // expecting 3 extending bytes
          63                        composedVal = ((byteVal & 0x07<< 18);
          64                    }
           else {
          65                        // non conformant utf-8, ignore or catch error
          66                    }

          67                }

          68            }

          69           
          70        }
           catch (Exception e) {
          71            System.out.println(e.toString());
          72        }

          73       
          74        return c;
          75    }

          76
          77    private final static void supplementCodePointToSurrogatePair(int codePoint, int[] surrogatePair) {
          78        int high4 = ((codePoint >> 16& 0x1F- 1;
          79        int mid6 = ((codePoint >> 10& 0x3F);
          80        int low10 = codePoint & 0x3FF;
          81
          82        surrogatePair[0= (0xD800 | (high4 << 6| (mid6));
          83        surrogatePair[1= (0xDC00 | (low10));
          84    }

          posted on 2009-06-07 16:37 江天部落格 閱讀(298) 評論(0)  編輯  收藏 所屬分類: Android 、Java

          主站蜘蛛池模板: 阿克苏市| 沙雅县| 合阳县| 眉山市| 漾濞| 泸溪县| 承德县| 古浪县| 剑阁县| 铜山县| 玉田县| 崇州市| 鹤岗市| 泾川县| 吕梁市| 岳西县| 左贡县| 离岛区| 滁州市| 济宁市| 江达县| 衡南县| 梓潼县| 淅川县| 赤水市| 商河县| 淮滨县| 杭州市| 昆山市| 通渭县| 凤凰县| 西盟| 黄冈市| 化德县| 平谷区| 新和县| 萝北县| 阜南县| 镇安县| 酉阳| 汶上县|