汉字与 unicode 编码相互转化

一、 概述：

如果项目采用了 GBK 的编码，那么汉字转化就不是问题了。但是如果采用了 utf-8 的编码，汉字的处理就相对比较麻烦一些。

二、 功能实现：

代码如下：

// 转为unicode
2

public static void writeUnicode( final DataOutputStream out,
3

final String value)

{
4

try

{
5

final String unicode = gbEncoding(value);
6

final byte [] data = unicode.getBytes();
7

final int dataLength = data.length;
8

System.out.println( " Data Length is: " + dataLength);
10

System.out.println( " Data is: " + value);
11

out.writeInt(dataLength); // 先写出字符串的长度
12

out.write(data, 0 , dataLength); // 然后写出转化后的字符串
13

} catch (IOException e)

{
14

}
16

}
17

public static String gbEncoding( final String gbString)

{
19

char [] utfBytes = gbString.toCharArray();
20

String unicodeBytes = "" ;
21

for ( int byteIndex = 0 ; byteIndex < utfBytes.length; byteIndex ++ )

{
22

String hexB = Integer.toHexString(utfBytes[byteIndex]);
23

if (hexB.length() <= 2 )

{
24

hexB = " 00 " + hexB;
25

}
26

unicodeBytes = unicodeBytes + " \\u " + hexB;
27

}
28

// System.out.println("unicodeBytes is: " + unicodeBytes);
29

return unicodeBytes;
30

}
31

/** */ /**
33

* This method will decode the String to a recognized String in ui.
34

* 功能:将unicod码转为需要的格式(utf-8)
35

* @author javajohn
36

* @param dataStr
37

* @return
38

*/
39

public static StringBuffer decodeUnicode( final String dataStr)

{
40

final StringBuffer buffer = new StringBuffer();
41

String tempStr = "" ;
42

String operStr = dataStr;
43

if (operStr != null && operStr.indexOf( " \\u " ) == - 1 ) return buffer.append(operStr); //
44

if (operStr != null && ! operStr.equals( "" ) && ! operStr.startsWith( " \\u " ))

{ //
45

tempStr = operStr.substring( 0 ,operStr.indexOf( " \\u " )); //
46

operStr = operStr.substring(operStr.indexOf("\\u"),operStr.length());//operStr字符一定是以unicode编码字符打头的字符串
47

}
48

buffer.append(tempStr);
49

while (operStr != null && ! operStr.equals( "" ) && operStr.startsWith( " \\u " ))

{ // 循环处理,处理对象一定是以unicode编码字符打头的字符串
50

tempStr = operStr.substring( 0 , 6 );
51

operStr = operStr.substring( 6 ,operStr.length());
52

String charStr = "" ;
53

charStr = tempStr.substring( 2 , tempStr.length());
54

char letter = ( char ) Integer.parseInt(charStr, 16 ); // 16进制parse整形字符串。
55

buffer.append( new Character(letter).toString());
56

if (operStr.indexOf( " \\u " ) == - 1 )

{ //
57

buffer.append(operStr);
58

} else

{ // 处理operStr使其打头字符为unicode字符
59

tempStr = operStr.substring( 0 ,operStr.indexOf( " \\u " ));
60

operStr = operStr.substring(operStr.indexOf( " \\u " ),operStr.length());
61

buffer.append(tempStr);
62

}
63

}
64

return buffer;
65

}

posted on 2006-07-17 21:44 kavenmo 阅读(1156) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

无思

公告