1 public static void main(String[] args) throws IOException {
2 String str="ab你好cd谢谢";
3 int len =str.getBytes("gbk").length;
4 for(int i=0;i<len;i++) {
5 System.out.println("GBK截取"+(i+1)+"个字节结果是:"+cutStringByBytes(str,i+1));
6 }
7
8 len =str.getBytes("UTF-8").length;
9 for(int i=0;i<len;i++) {
10 System.out.println("UTF截取"+(i+1)+"个字节结果是:"+cutStringByUTFBytes(str,i+1));
11 }
12 }
13
14 /**
15 * 思想:UTF-8编码,中文是三个字节表示,英文是一个字节
16 * 并且中文字中有第一个字节码是小于零的
17 * 97,98,-28,-67,-96,-27,-91,-67,99,100,-24,-80,-94,-24,-80,-94
18 * 根据截取位置向前计数,查看计数除3取余,为零:不舍,为1:舍1位,为21:舍2位
19 * @param str
20 * @param len
21 * @return
22 * @throws IOException
23 */
24 private static String cutStringByUTFBytes(String str, int len) throws IOException {
25 byte[] buf=str.getBytes("UTF-8");
26 int count=0;
27 for(int i=len-1;i>=0;i--) {
28 if(buf[i]<0)
29 count++;
30 else {
31 break;
32 }
33 }
34 if(count%3==0)
35 return new String(buf,0,len,"UTF-8");
36 else if(count%3==1)
37 return new String(buf,0,len-1,"UTF-8");
38 else
39 return new String(buf,0,len-2,"UTF-8");
40
41 }
42
43 /**
44 * 思想:gbk编码,中文是两个字节表示,英文是一个字节
45 * 并且中文字中有第一个字节码是小于零的
46 * 97,98,-60,-29,-70,-61,99,100,-48,-69,-48,-69
47 * 根据截取位置向前计数,查看计数的奇偶数,奇数舍弃,偶数保留
48 * @param str
49 * @param len
50 * @return
51 * @throws IOException
52 */
53 private static String cutStringByBytes(String str, int len) throws IOException {
54 byte[] buf=str.getBytes("gbk");
55 int count=0;
56 for(int i=len-1;i>=0;i--) {
57 if(buf[i]<0)
58 count++;
59 else {
60 break;
61 }
62 }
63 if(count%2==0)
64 return new String(buf,0,len,"gbk");
65 else
66 return new String(buf,0,len-1,"gbk");
67 }