package com.util;

import java.io.FileInputStream;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;

/**
 * 处理UTF-8编码的不连续的字节流
 * @author Administrator
 *
 */
public class HandlerUTF8 {
    /** 缓存不够一个字符的byte*/
    private byte[] cacheByte = new byte[6];
    /** byte数*/
    private int cacheCount = 0;
    
    /**
     * 解码UTF-8字节
     * @param buf
     * @return
     * @throws UnsupportedEncodingException
     */
    public String getString(byte[] buf) throws UnsupportedEncodingException{
        byte[] source = null;
        //上次不够一个字符的byte拼在前面
        if (cacheCount > 0){
            source = new byte[cacheCount + buf.length];
            System.arraycopy(cacheByte, 0, source, 0, cacheCount);
            System.arraycopy(buf, 0, source, cacheCount, buf.length);        
        }else{
            source = buf;
        }
        
        cacheCount = HandlerUTF8(source);
        if (cacheCount > 0)
            System.arraycopy(source, source.length - cacheCount, cacheByte, 0, cacheCount);
        
        return new String(source,0,source.length -cacheCount,"utf-8");
    }
    /**
     * UTF-8字符最长6个字节,截取最后6个字节分析
     *  U-00000000 - U-0000007F:  0xxxxxxx  
     *    U-00000080 - U-000007FF:  110xxxxx 10xxxxxx  
     *    U-00000800 - U-0000FFFF:  1110xxxx 10xxxxxx 10xxxxxx  
     *    U-00010000 - U-001FFFFF:  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx  
     *    U-00200000 - U-03FFFFFF:  111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx  
     *    U-04000000 - U-7FFFFFFF:  1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx  
     * @param buf
     * @return
     */
    private int HandlerUTF8(byte[] buf){
        byte[] source = null;
        if (buf.length > 6){
            source = new byte[6];
            System.arraycopy(buf, buf.length - 6, source, 0, 6);
        }else{
            source = buf;
        }
                
        for (int i = 0; i < source.length; i++){
            int temp = source[i] & 0xFF;

            if (temp >> 5 == 0x06){
                if (source.length - i < 2)
                    return source.length - i;
            }else if (temp >> 4 == 0x0E){
                if (source.length - i < 3)
                    return source.length - i;
            }else if (temp >> 3 == 0x1E){
                if (source.length - i < 4)
                    return source.length - i;
            }else if (temp >> 2 == 0x3E){
                if (source.length - i < 5)
                    return source.length - i;
            }else if (temp >> 1 == 0x7E){
                if (source.length - i < 6)
                    return source.length - i;
            }
        }
        
        return 0;
    }
    
    public static void main(String[] args) throws Exception{
//        String sourceString = "测试UTF-8字符串";
//        byte[] buf = sourceString.getBytes("utf-8");
//        for (int i = 0; i < buf.length; i++)
//            System.out.print(buf[i] + ",");
        //{-26,-75,-117,-24,-81,-107,85,84,70,45,56,-27,-83,-105,-25,-84,-90,-28,-72,-78};
        
        byte[] buf1 = {-26,-75,-117,-24,-81,-107,85,84,70,45,56,-27,-83,-105,-25,-84,-90,-28};
        byte[] buf2 = {-72,-78};
        HandlerUTF8 handlerUTF8 = new HandlerUTF8();
        String str = handlerUTF8.getString(buf1);
        
        System.out.print(str);
        
        String str2 = handlerUTF8.getString(buf2);
        System.out.print(str2);
        
    }
}