以指定编码对字符串进行截取
假设有字符串“朱元璋”,截取限定的长度为7字节
截取编码为utf-8 结果为“朱”
截取编码为gb2312时 结果为“朱”
字符串为“hello朱元璋”,截取限定的长度为13字节
截取编码为utf-8 结果为“hello朱元”
这里的问题就是gbk编码占位2个字节,而utf-8占位3个字节,完整的示例代码:
1: 2: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
3: <html> 4: <head>5: <title> new document </title>
6: <meta name="generator" content="editplus" />
7: <meta name="author" content="" />
8: <meta name="keywords" content="" />
9: <meta name="description" content="" />
10: <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
11: </head> 12: 13: <body>14: <script type="text/javascript">
15: function getRealLen(str, isUTF8) {
16: if (typeof str != 'string') {
17: return 0;
18: } 19: 20: if (!isUTF8) {
21: return str.replace(/[^\x00-\xFF]/g, 'xx').length;
22: } else {
23: var tempStr = str.replace(/[\x00-\xFF]/g, '');
24: return (str.length - tempStr.length) + Math.floor(encodeURI(tempStr).length / 3);
25: } 26: } 27: 28: function uniLeft(str, isUTF8, len, suffix) {
29: var str = str.toString();
30: var len = len * 1;
31: var suffix = suffix || "";
32: 33: if (isNaN(len)) {
34: return str;
35: } 36: 37: var uniLen = getRealLen(str, isUTF8),tempStr = '';
38: 39: if (uniLen <= len) {
40: return str.substr(0);
41: } 42: 43: for (var i = Math.floor(len / 2); i< uniLen; i++) {
44: tempStr = str.substr(0, i); 45: 46: if (getRealLen(tempStr, isUTF8) >= len || getRealLen(str.substr(0, i+1), isUTF8) > len) {
47: return tempStr;
48: } 49: } 50: } 51: 52: var str_1 = '朱元璋';
53: var str_2 = 'hello朱元璋';
54: 55: document.write( uniLeft(str_1, false, 3) + "<br/>");//gb2312
56: document.write( uniLeft(str_2, true, 13) );//utf-8
57: </script> 58: </body> 59: </html>
这里从len/2开始试着截取字符串,截取后调用getRealLen方法获取真实长度,判断是否超出限定的长度
浙公网安备 33010602011771号