一個utf8轉換程序
據說是一個通用的手機上使用的UTF8轉換程序,先記下來。 1
private final String readUnicodeFileUTF8(String filename) {
2
StringBuffer sb = new StringBuffer(256);
3
try {
4
int[] surrogatePair = new int[2];
5
InputStream is = this.getClass().getResourceAsStream(filename);
6
7
int val = 0;
8
int unicharCount = 0;
9
while ((val = readNextCharFromStreamUTF8(is))!=-1) {
10
unicharCount++;
11
if (val <= 0xFFFF) {
12
// if first value is the Byte Order Mark (BOM), do not add
13
if (! (unicharCount == 1 && val == 0xFEFF)) {
14
sb.append((char)val);
15
}
16
} else {
17
supplementCodePointToSurrogatePair(val, surrogatePair);
18
sb.append((char)surrogatePair[0]);
19
sb.append((char)surrogatePair[1]);
20
}
21
}
22
is.close();
23
} catch (Exception e) {};
24
25
return new String(sb);
26
}
27
28
private final static int readNextCharFromStreamUTF8(InputStream is) {
29
int c = -1;
30
if (is==null) return c;
31
boolean complete = false;
32
33
try {
34
int byteVal;
35
int expecting=0;
36
int composedVal=0;
37
38
while (!complete && (byteVal = is.read()) != -1) {
39
if (expecting > 0 && (byteVal & 0xC0) == 0x80) { /* 10xxxxxx */
40
expecting--;
41
composedVal = composedVal | ((byteVal & 0x3F) << (expecting*6));
42
if (expecting == 0) {
43
c = composedVal;
44
complete = true;
45
//System.out.println("appending: U+" + Integer.toHexString(composedVal) );
46
}
47
} else {
48
composedVal = 0;
49
expecting = 0;
50
if ((byteVal & 0x80) == 0) { /* 0xxxxxxx */
51
// one byte character, no extending byte expected
52
c = byteVal;
53
complete = true;
54
//System.out.println("appending: U+" + Integer.toHexString(byteVal) );
55
} else if ((byteVal & 0xE0) == 0xC0) { /* 110xxxxx */
56
expecting = 1; // expecting 1 extending byte
57
composedVal = ((byteVal & 0x1F) << 6);
58
} else if ((byteVal & 0xF0) == 0xE0) { /* 1110xxxx */
59
expecting = 2; // expecting 2 extending bytes
60
composedVal = ((byteVal & 0x0F) << 12);
61
} else if ((byteVal & 0xF8) == 0xF0) { /* 11110xxx */
62
expecting = 3; // expecting 3 extending bytes
63
composedVal = ((byteVal & 0x07) << 18);
64
} else {
65
// non conformant utf-8, ignore or catch error
66
}
67
}
68
}
69
70
} catch (Exception e) {
71
System.out.println(e.toString());
72
}
73
74
return c;
75
}
76
77
private final static void supplementCodePointToSurrogatePair(int codePoint, int[] surrogatePair) {
78
int high4 = ((codePoint >> 16) & 0x1F) - 1;
79
int mid6 = ((codePoint >> 10) & 0x3F);
80
int low10 = codePoint & 0x3FF;
81
82
surrogatePair[0] = (0xD800 | (high4 << 6) | (mid6));
83
surrogatePair[1] = (0xDC00 | (low10));
84
}

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

posted on 2009-06-07 16:37 江天部落格 閱讀(298) 評論(0) 編輯 收藏 所屬分類: Android 、Java