HttpUtility.HtmlEncode() 与 HttpUtility.UrlEncode()

1.我们先来看一看，HttpUtility.HtmlEncode，由于解码是编码的逆过程，所以只看它的编码是怎么实现的，当然我们要使用反编译工具了，这里用的是ILSPy.exe

public static string HtmlEncode(string s)
{
    if (s == null)
    {
        return null;
    }
    int num = HttpUtility.IndexOfHtmlEncodingChars(s, 0);
    if (num == -1)
    {
        return s;
    }
    StringBuilder stringBuilder = new StringBuilder(s.Length + 5);
    int length = s.Length;
    int num2 = 0;
    do
    {
        if (num > num2)
        {
            stringBuilder.Append(s, num2, num - num2);
        }
        char c = s[num];
        if (c <= '>')
        {
            char c2 = c;
            if (c2 != '"')
            {
                if (c2 != '&')
                {
                    switch (c2)
                    {
                    case '<':
                        stringBuilder.Append("&lt;");
                        break;
                    case '>':
                        stringBuilder.Append("&gt;");
                        break;
                    }
                }
                else
                {
                    stringBuilder.Append("&amp;");
                }
            }
            else
            {
                stringBuilder.Append("&quot;");
            }
        }
        else
        {
            stringBuilder.Append("&#");
            StringBuilder arg_C6_0 = stringBuilder;
            int num3 = (int)c;
            arg_C6_0.Append(num3.ToString(NumberFormatInfo.InvariantInfo));
            stringBuilder.Append(';');
        }
        num2 = num + 1;
        if (num2 >= length)
        {
            goto IL_F8;
        }
        num = HttpUtility.IndexOfHtmlEncodingChars(s, num2);
    }
    while (num != -1);
    stringBuilder.Append(s, num2, length - num2);
    IL_F8:
    return stringBuilder.ToString();
}

//
//获取要编码开始的位置
//
private unsafe static int IndexOfHtmlEncodingChars(string s, int startPos)
{
    int i = s.Length - startPos;
    fixed (char* ptr = s)
    {
        char* ptr2 = ptr + startPos;
        while (i > 0)
        {
            char c = *ptr2;
            if (c <= '>')
            {
                char c2 = c;
                if (c2 != '"' && c2 != '&')
                {
                    switch (c2)
                    {
                    case '<':
                    case '>':
                        break;
                    case '=':
                        goto IL_7A;
                    default:
                        goto IL_7A;
                    }
                }
                int result = s.Length - i;
                return result;
            }
            if (c >= '\u00a0' && c < 'Ā')
            {
                int result = s.Length - i;
                return result;
            }
            IL_7A:
            ptr2++;
            i--;
        }
    }
    return -1;
}

额...贴出来了，下面代码完全看不懂，大概意思应该是获取编码的开始位置吧，完后上面的函数是这样的，先将安全的字符串Append进StringBuilder，完后按字符串的字符，并从刚那个函数返回的开始位置开始，替换特殊符号，这些特殊符号是什么呢？这是一些Html的特殊字符，详情见http://www.cnblogs.com/web-d/archive/2010/04/16/1713298.html

下表为常见的Html特殊字符表

HTML 原代码	显示结果	描述
<	<	小于号或显示标记
>	>	大于号或显示标记
&	&	可用于显示其它特殊字符
"	“	引号
®	®	已注册
©	©	版权
™	™	商标
&ensp;		半个空白位
&emsp;		一个空白位
		不断行的空白

说白了，HtmlEncoding就是将文本中的特殊字符，替换成Html的源编码的过程，很简单，所以我们在使用的时候也不需要输入编码方式之类的。

2.再来看一看， HttpUtility.UrlEncode

继续贴代码，如下：

private static string UrlDecodeStringFromStringInternal(string s, Encoding e)
{
    int length = s.Length;
    HttpUtility.UrlDecoder urlDecoder = new HttpUtility.UrlDecoder(length, e);
    int i = 0;
    while (i < length)
    {
        char c = s[i];
        if (c == '+')
        {
            c = ' ';
            goto IL_106;
        }
        if (c != '%' || i >= length - 2)
        {
            goto IL_106;
        }
        if (s[i + 1] == 'u' && i < length - 5)
        {
            int num = HttpUtility.HexToInt(s[i + 2]);
            int num2 = HttpUtility.HexToInt(s[i + 3]);
            int num3 = HttpUtility.HexToInt(s[i + 4]);
            int num4 = HttpUtility.HexToInt(s[i + 5]);
            if (num < 0 || num2 < 0 || num3 < 0 || num4 < 0)
            {
                goto IL_106;
            }
            c = (char)(num << 12 | num2 << 8 | num3 << 4 | num4);
            i += 5;
            urlDecoder.AddChar(c);
        }
        else
        {
            int num5 = HttpUtility.HexToInt(s[i + 1]);
            int num6 = HttpUtility.HexToInt(s[i + 2]);
            if (num5 < 0 || num6 < 0)
            {
                goto IL_106;
            }
            byte b = (byte)(num5 << 4 | num6);
            i += 2;
            urlDecoder.AddByte(b);
        }
        IL_120:
        i++;
        continue;
        IL_106:
        if ((c & 'ﾀ') == '\0')
        {
            urlDecoder.AddByte((byte)c);
            goto IL_120;
        }
        urlDecoder.AddChar(c);
        goto IL_120;
    }
    return urlDecoder.GetString();
}

额...还是看不懂啊，各种按位运算有没有，大概意思是将字符串中带有非ASCII字符（判断方式：见下放贴出的 IsSafe()函数）用 “%xy” 形式的字符串表示，其中xy为十六进制表示形式。当然这里要注意的是，UrlEncode()默认的编码方式是UTF-8，

internal static bool IsSafe(char ch)
{
    if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9'))
    {
        return true;
    }
    if (ch != '!')
    {
        switch (ch)
        {
        case '\'':
        case '(':
        case ')':
        case '*':
        case '-':
        case '.':
            return true;
        case '+':
        case ',':
            break;
        default:
            if (ch == '_')
            {
                return true;
            }
            break;
        }
        return false;
    }
    return true;
}

不错的Url编码问题资料： http://www.360doc.com/content/11/1223/16/2150778_174499703.shtml

posted @ 2019-04-30 16:53 NCat 阅读(1529) 评论(0) 收藏举报

刷新页面返回顶部

NCat

HttpUtility.HtmlEncode() 与 HttpUtility.UrlEncode()

公告