package com.util; import java.io.FileInputStream; import java.io.InputStream; import java.io.UnsupportedEncodingException; /** * 处理UTF-8编码的不连续的字节流 * @author Administrator * */ public class HandlerUTF8 { /** 缓存不够一个字符的byte*/ private byte[] cacheByte = new byte[6]; /** byte数*/ private int cacheCount = 0; /** * 解码UTF-8字节 * @param buf * @return * @throws UnsupportedEncodingException */ public String getString(byte[] buf) throws UnsupportedEncodingException{ byte[] source = null; //上次不够一个字符的byte拼在前面 if (cacheCount > 0){ source = new byte[cacheCount + buf.length]; System.arraycopy(cacheByte, 0, source, 0, cacheCount); System.arraycopy(buf, 0, source, cacheCount, buf.length); }else{ source = buf; } cacheCount = HandlerUTF8(source); if (cacheCount > 0) System.arraycopy(source, source.length - cacheCount, cacheByte, 0, cacheCount); return new String(source,0,source.length -cacheCount,"utf-8"); } /** * UTF-8字符最长6个字节,截取最后6个字节分析 * U-00000000 - U-0000007F: 0xxxxxxx * U-00000080 - U-000007FF: 110xxxxx 10xxxxxx * U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx * U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx * U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * @param buf * @return */ private int HandlerUTF8(byte[] buf){ byte[] source = null; if (buf.length > 6){ source = new byte[6]; System.arraycopy(buf, buf.length - 6, source, 0, 6); }else{ source = buf; } for (int i = 0; i < source.length; i++){ int temp = source[i] & 0xFF; if (temp >> 5 == 0x06){ if (source.length - i < 2) return source.length - i; }else if (temp >> 4 == 0x0E){ if (source.length - i < 3) return source.length - i; }else if (temp >> 3 == 0x1E){ if (source.length - i < 4) return source.length - i; }else if (temp >> 2 == 0x3E){ if (source.length - i < 5) return source.length - i; }else if (temp >> 1 == 0x7E){ if (source.length - i < 6) return source.length - i; } } return 0; } public static void main(String[] args) throws Exception{ // String sourceString = "测试UTF-8字符串"; // byte[] buf = sourceString.getBytes("utf-8"); // for (int i = 0; i < buf.length; i++) // System.out.print(buf[i] + ","); //{-26,-75,-117,-24,-81,-107,85,84,70,45,56,-27,-83,-105,-25,-84,-90,-28,-72,-78}; byte[] buf1 = {-26,-75,-117,-24,-81,-107,85,84,70,45,56,-27,-83,-105,-25,-84,-90,-28}; byte[] buf2 = {-72,-78}; HandlerUTF8 handlerUTF8 = new HandlerUTF8(); String str = handlerUTF8.getString(buf1); System.out.print(str); String str2 = handlerUTF8.getString(buf2); System.out.print(str2); } }