private void button1_Click(object sender, EventArgs e)
{
String urlDownLoad = "http://www.cnblogs.com"; //需要获取网页内容的URL地址
//因为URL地址是HTTP协议的,所以返回一个HttpWebRequest对象
HttpWebRequest request = (HttpWebRequest) System.Net.WebRequest.Create(urlDownLoad);
HttpWebResponse response = (HttpWebResponse) request.GetResponse();
var buffer = GetBytes(response);
string strResult = Encoding.ASCII.GetString(buffer);
#region 获取网页编码
//验证 字符编码 的正则表达式
const string regCharset =
"(<meta[^>]*charset=(?<charset>[^>'\"]*)[\\s\\S]*?>)|(xml[^>]+encoding=(\"|')*(?<charset>[^>'\"]*)[\\s\\S]*?>)";
//使用正则表达式获取网页中的实际字符编码
var r = new Regex(regCharset, RegexOptions.IgnoreCase);
var m1 = r.Match(strResult);
string encodingName = (m1.Captures.Count != 0) ? m1.Groups["charset"].Value : "";
//如果未获取 这手动替换判断
if (string.IsNullOrEmpty(encodingName))
{
//如果未获取 这手动替换判断
string str = m1.Groups[1].Value;
const string pattern = "<meta charset=\"|\">|\" />";
encodingName = Regex.Replace(str, pattern, "");
}
#endregion
// 用网页中真实的字符编码获取下载的数据
string strHtml = GetEncodingByName(encodingName).GetString(buffer);
response.Close();
}
/// <summary>
/// 获取网页字符编码
/// </summary>
/// <param name="encodingName"></param>
/// <returns></returns>
private static Encoding GetEncodingByName(
string encodingName)
{
Encoding encoding = Encoding.Default;
if (string.IsNullOrEmpty(encodingName)) return encoding;
try
{
encoding = Encoding.GetEncoding(encodingName);
}
catch
{
encoding = Encoding.UTF8;
}
return encoding;
}
private static byte[] GetBytes(WebResponse response)
{
byte[] data;
using (var memoryStream = new MemoryStream())
{
var buffer = new byte[0x100];
using (var rs = response.GetResponseStream())
{
if (rs != null)
{
for (var i = rs.Read(buffer, 0, buffer.Length); i > 0; i = rs.Read(buffer, 0, buffer.Length))
{
memoryStream.Write(buffer, 0, i);
}
}
}
data = memoryStream.ToArray();
}
return data;
}