Blogger Scott

          一個(gè)utf8轉(zhuǎn)換程序

          據(jù)說是一個(gè)通用的手機(jī)上使用的UTF8轉(zhuǎn)換程序,先記下來。

           1private final String readUnicodeFileUTF8(String filename) {
           2        StringBuffer sb = new StringBuffer(256);
           3        try {
           4            int[] surrogatePair = new int[2];
           5            InputStream is = this.getClass().getResourceAsStream(filename);
           6
           7            int val = 0;
           8            int unicharCount = 0;
           9            while ((val = readNextCharFromStreamUTF8(is))!=-1{
          10                unicharCount++;
          11                if (val <= 0xFFFF{
          12                    // if first value is the Byte Order Mark (BOM), do not add
          13                    if (! (unicharCount == 1 && val == 0xFEFF)) {
          14                        sb.append((char)val);
          15                    }

          16                }
           else {
          17                    supplementCodePointToSurrogatePair(val, surrogatePair);
          18                    sb.append((char)surrogatePair[0]);
          19                    sb.append((char)surrogatePair[1]);
          20                }

          21            }

          22            is.close();
          23        }
           catch (Exception e) {};
          24
          25        return new String(sb);
          26    }

          27   
          28    private final static int readNextCharFromStreamUTF8(InputStream is) {
          29        int c = -1;
          30        if (is==nullreturn c;
          31        boolean complete = false;
          32       
          33        try {
          34            int byteVal;
          35            int expecting=0;
          36            int composedVal=0;
          37           
          38            while (!complete && (byteVal = is.read()) != -1{
          39                if (expecting > 0 && (byteVal & 0xC0== 0x80{  /* 10xxxxxx */
          40                    expecting--;
          41                    composedVal = composedVal | ((byteVal & 0x3F<< (expecting*6));
          42                    if (expecting == 0{
          43                        c = composedVal;
          44                        complete = true;
          45                        //System.out.println("appending: U+" + Integer.toHexString(composedVal) );
          46                    }

          47                }
           else {
          48                    composedVal = 0;
          49                    expecting = 0;
          50                    if ((byteVal & 0x80== 0{    /* 0xxxxxxx */
          51                        // one byte character, no extending byte expected
          52                        c = byteVal;
          53                        complete = true;
          54                        //System.out.println("appending: U+" + Integer.toHexString(byteVal) );
          55                    }
           else if ((byteVal & 0xE0== 0xC0{  /* 110xxxxx */
          56                        expecting = 1;  // expecting 1 extending byte
          57                        composedVal = ((byteVal & 0x1F<< 6);
          58                    }
           else if ((byteVal & 0xF0== 0xE0{  /* 1110xxxx */
          59                        expecting = 2;  // expecting 2 extending bytes
          60                        composedVal = ((byteVal & 0x0F<< 12);
          61                    }
           else if ((byteVal & 0xF8== 0xF0{  /* 11110xxx */
          62                        expecting = 3;  // expecting 3 extending bytes
          63                        composedVal = ((byteVal & 0x07<< 18);
          64                    }
           else {
          65                        // non conformant utf-8, ignore or catch error
          66                    }

          67                }

          68            }

          69           
          70        }
           catch (Exception e) {
          71            System.out.println(e.toString());
          72        }

          73       
          74        return c;
          75    }

          76
          77    private final static void supplementCodePointToSurrogatePair(int codePoint, int[] surrogatePair) {
          78        int high4 = ((codePoint >> 16& 0x1F- 1;
          79        int mid6 = ((codePoint >> 10& 0x3F);
          80        int low10 = codePoint & 0x3FF;
          81
          82        surrogatePair[0= (0xD800 | (high4 << 6| (mid6));
          83        surrogatePair[1= (0xDC00 | (low10));
          84    }

          posted on 2009-06-07 16:37 江天部落格 閱讀(307) 評(píng)論(0)  編輯  收藏 所屬分類: AndroidJava

          主站蜘蛛池模板: 房产| 麻栗坡县| 奉化市| 浦县| 大渡口区| 龙泉市| 永福县| 成安县| 泰顺县| 甘孜| 泽州县| 社旗县| 泗水县| 富源县| 柞水县| 都匀市| 徐闻县| 洛宁县| 大石桥市| 射阳县| 前郭尔| 彰化市| 纳雍县| 宜黄县| 峨山| 长宁县| 秦皇岛市| 青海省| 赤水市| 太仆寺旗| 江达县| 江山市| 贵州省| 沧源| 巴彦淖尔市| 汾阳市| 临朐县| 凤冈县| 德钦县| 台湾省| 虞城县|