UTF-16系列文件的文件头信息,及相关去头处理
在带文件头的UTF-16系列文件中:(文件前三个字节是文件头)
UTF-16 0xfe 0xff 0xfe
UTF-16LE 0xff 0xfe 0x2d
UTF-16BE 0xfe 0xff 0x4e
对于带有UTF-16系列文件头的字符串去除文件头的处理方法如下:
public static String removeUTFSignature(String strKey, String encoding) throws IOException { String resultKey = strKey; int index = 0; int[] utfHeader = new int[3]; switch (encoding) { case "UTF-16LE": utfHeader[0] = 0xff; utfHeader[1] = 0xfe; utfHeader[2] = 0x2d; break; case "UTF-16BE": utfHeader[0] = 0xfe; utfHeader[1] = 0xff; utfHeader[2] = 0x4e; break; case "UTF-16": utfHeader[0] = 0xfe; utfHeader[1] = 0xff; utfHeader[2] = 0xfe; break; default: // UTF-8 utfHeader[0] = 0xef; utfHeader[1] = 0xbb; utfHeader[2] = 0xbf; break; } DataInputStream dis = new DataInputStream(new ByteArrayInputStream(strKey.getBytes())); while (true) { int bb1 = dis.read(); int bb2 = dis.read(); int bb3 = dis.read(); if (bb1 == utfHeader[0] && bb2 == utfHeader[1] && bb3 == utfHeader[2]) { index += 3; } else { break; } } resultKey = new String(strKey.getBytes(), index, strKey.getBytes().length - index); return resultKey; }
浙公网安备 33010602011771号