名字只能限定为中文,英语字母,数字.编码为utf8.
static bool _illegal_char(char c)
{
if( (c >= 48 && c <=57) //数字
|| (c >= 65 && c <= 90) //大写字母
|| (c >= 97 && c <= 122)) //小写字母
return false;
return true;
}
static bool has_illegal_char(unsigned char* str, int len)
{
int i = -1, k = 1;
while(i < len)
{
i += k;
if(str[i] == '\0')
break;
if(str[i] < 0x80) // ascii 0-127位字符
{
if (_illegal_char(str[i]))
return true;
k = 1; // 0-127位占一字节
}
else if(str[i] < 0xE0)
{
if(str[i] < 0xC4) //ascii 128-255位字符
if (_illegal_char(str[i]))
return true;
k = 2;
}
else if(str[i] < 0xF0)
k = 3;
else
k = 4;
}
return false;
}
以下排除只限定中文为0x4e00-0x9fa5.强暴的做法.改就改吧.
static bool has_illegal_char(unsigned char* str, int len)
{
int i = -1, k = 1;
while(i < len)
{
i += k;
if(str[i] == '\0')
break;
if(str[i] < 0x80) // ascii 0-127位字符
{
if (_illegal_char(str[i]))
return true;
k = 1; // 0-127位占一字节
}
else if(str[i] < 0xE0)
{
return true;
k = 2;
}
else if(str[i] < 0xF0)
{
if(str[i] < 0xE4)//查表去
return true;
k = 3;
}
else
{
//unsigned short a = (str[i] | 0x7);
//unsigned short b = (str[i+1] | 0x3F);
//unsigned short c = (str[i+2] | 0x3F);
//unsigned short d = (str[i+3] | 0x3F);
if(str[i] > 0xF0 || str[i+1] > 0x89)//要改就改吧
return true;
if(str[i+1] == 0x89) {
if(str[i+2] > 0xBE)
return true;
if(str[i+2] == 0xBE) {
if(str[i+3] > 0xA5)
return true;
}
}
k = 4;
}
}
return false;
}
这篇文章:
http://www.ruanyifeng.com/blog/2007/10/ascii_unicode_and_utf-8.html
讲解utf8编码比较清楚.
浙公网安备 33010602011771号