UTF-8 与 GBK互转乱码的原因
public class CodecTest {
public static void main(String[] args) throws UnsupportedEncodingException {
String s = "我是中国人";
//GBK编码后的字节
//gbkEncode[-50, -46, -54, -57, -42, -48, -71, -6, -56, -53]
byte[] gbkEncode = s.getBytes("GBK");
System.out.println("gbkEncode" + Arrays.toString(gbkEncode));
//utf-8编码后的字节
//utfEncode[-26, -120, -111, -26, -104, -81, -28, -72, -83, -27, -101, -67, -28, -70, -70]
byte[] utfEncode = s.getBytes("utf-8");
System.out.println("utfEncode" + Arrays.toString(utfEncode));
//用utf-8解码gbk编码后的字节,形成的字符串:�����й���
String gbkDecodedByUTF = new String(gbkEncode,"utf-8");
System.out.println(gbkDecodedByUTF);
// 对上一步的字符串 �����й��� 用utf-8编码后的字节
//[-17, -65, -67, -17, -65, -67, -17, -65, -67, -17, -65, -67, -17, -65, -67, -48, -71, -17, -65, -67, -17, -65, -67, -17, -65, -67]
byte[] gbkDecodedByUTF_EncodeByUTF = gbkDecodedByUTF.getBytes("utf-8");
System.out.println(Arrays.toString(gbkDecodedByUTF_EncodeByUTF));
//不出意外,这里解码应该是 �����й���
//所以,问题的源头出在 我们用 utf-8 gbk编码的字节上,形成了错误的字符。
// 用utf-8 对错误的字符串编码和解码 都是错误的字符串,是一致的(字符串没变),形成错误的字符串的原因是 utf-8 和GBK解码的不兼容
String gbkDecodedByUTF_EncodeByUTF_DecodeByUTF = new String(gbkEncode,"utf-8");
System.out.println(gbkDecodedByUTF_EncodeByUTF_DecodeByUTF);
}
}
Print:
gbkEncode[-50, -46, -54, -57, -42, -48, -71, -6, -56, -53]
utfEncode[-26, -120, -111, -26, -104, -81, -28, -72, -83, -27, -101, -67, -28, -70, -70]
�������
[-17, -65, -67, -17, -65, -67, -17, -65, -67, -17, -65, -67, -17, -65, -67, -48, -71, -17, -65, -67, -17, -65, -67, -17, -65, -67]
�������
utf-8编码格式:
ref:
字符编码笔记:ASCII,Unicode 和 UTF-8

浙公网安备 33010602011771号