如何自动判断url中汉字的编码格式

参考了http://topic.csdn.net/u/20091105/15/0d54b7b2-38fe-4cdf-ae1b-5a1f07c26ea0.html帖子18楼的代码

 

修改了一下,满足了我实际的项目需求

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Web;

namespace testProgram
{
    
class MyEncoding
    {

        
static void Main()
        {
            MyEncoding myEncoding 
= new MyEncoding();
            
//GB2312
            string gb2312 = "http://www.baidu.com/s?wd=%B9%A4%B3%A7%B9%A9%B5%E7";
            
//utf8
            string utf8 = "http://www.google.com.hk/search?hl=zh-CN&newwindow=1&safe=strict&q=%25abc%E4%B8%AD%E5%9B%BD%2C%3B&btnG=Google+%E6%90%9C%E7%B4%A2&aq=f&aqi=&aql=&oq=&gs_rfai=";

            
string ss = myEncoding.UrlDecode(gb2312);
            
string ss1 = myEncoding.UrlDecode(utf8);

            Console.WriteLine(ss);
            Console.WriteLine(ss1);

            Console.ReadLine();
        }

        
private string UrlDecode(string url)
        {
            
string result = "";
            
byte[] buf = GetUrlCodingToBytes(url);
            
if (IsUTF8(buf))
            {
                result 
= HttpUtility.UrlDecode(url, Encoding.UTF8);
            }
            
else
            {
                result 
= HttpUtility.UrlDecode(url, Encoding.GetEncoding("GB2312"));
            }
            
return result;
        }

        
private byte[] GetUrlCodingToBytes(string url)
        {
            StringBuilder sb 
= new StringBuilder();

            
int i = url.IndexOf('%');
            
while (i >= 0)
            {
                
if (url.Length < i + 3)
                {
                    
break;
                }
                sb.Append(url.Substring(i, 
3));
                url 
= url.Substring(i + 3);
                i 
= url.IndexOf('%');
            }

            
string urlCoding = sb.ToString();
            
if (string.IsNullOrEmpty(urlCoding))
                
return new byte[0];

            urlCoding 
= urlCoding.Replace("%"string.Empty);
            
int len = urlCoding.Length / 2;
            
byte[] result = new byte[len];
            len 
*= 2;
            
for (int index = 0; index < len; index++)
            {
                
string s = urlCoding.Substring(index, 2);
                
int b = int.Parse(s, System.Globalization.NumberStyles.HexNumber);
                result[index 
/ 2= (byte)b;
                index
++;
            }
            
return result;
        }

        
private bool IsUTF8(byte[] buf)
        {
            
int i;
            
byte cOctets; // octets to go in this UTF-8 encoded character  
            bool bAllAscii = true;
            
long iLen = buf.Length;
            cOctets 
= 0;
            
for (i = 0; i < iLen; i++)
            {
                
if ((buf[i] & 0x80!= 0) bAllAscii = false;

                
if (cOctets == 0)
                {
                    
if (buf[i] >= 0x80)
                    {
                        
do
                        {
                            buf[i] 
<<= 1;
                            cOctets
++;
                        }
                        
while ((buf[i] & 0x80!= 0);

                        cOctets
--;
                        
if (cOctets == 0)
                            
return false;
                    }
                }
                
else
                {
                    
if ((buf[i] & 0xC0!= 0x80)
                        
return false;
                    cOctets
--;
                }
            }
            
if (cOctets > 0)
                
return false;
            
if (bAllAscii)
                
return false;
            
return true;
        }
    }
}

 

 

posted @ 2010-09-16 11:10  神龙升空  阅读(2113)  评论(1)    收藏  举报