一个支持gzip,支持简单编码识别的下载类

使用方法
DDD.GetHtml("http://www.xxx.com/xxx");

using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.Text.RegularExpressions;
using System.IO;
using System.IO.Compression;
using System.Threading;

namespace XXX
{
public class DDD
{
public static string GetHtml(string url)
{
//Accept-Encoding: gzip
//Content-Encoding: gzip
Console.WriteLine("正在下载:" + url);

try
{
WebClient client = new WebClient();
long contentLength = 0;
//加入一些必要的HTTP头
client.Headers.Add("Accept: */*");
client.Headers.Add("Accept-Language: zh-cn");
client.Headers.Add("UA-CPU: x86");
client.Headers.Add("Accept-Encoding: gzip, deflate");
client.Headers.Add("User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1;)");

Stream stream = client.OpenRead(url);
if (client.ResponseHeaders["Content-Length"] != null)
{
contentLength = Convert.ToInt64(client.ResponseHeaders["Content-Length"]);
}

StreamReader sr = new StreamReader(stream);
MemoryStream ms = new MemoryStream();
byte[] mbyte = new byte[10000];

int allmybyte = mbyte.Length;
long nowFinish = 0;

int m = stream.Read(mbyte, 0, allmybyte);

Point p = new Point();
p.X = Console.CursorLeft;
p.Y = Console.CursorTop;

while (nowFinish < contentLength || m > 0)
{
ms.Write(mbyte, 0, m);
nowFinish += m;

Console.CursorTop = p.Y;
Console.CursorLeft = p.X;
Console.Write("已下载:" + nowFinish + "\t");
if (contentLength > 0)
{
double f = (double)nowFinish * 100 / contentLength;
Console.Write(f.ToString("f2") + "%");
}
m = stream.Read(mbyte, 0, allmybyte);
}

stream.Close();
bool bUseGzip = false;
if (client.ResponseHeaders["Content-Encoding"] != null
&& client.ResponseHeaders["Content-Encoding"].Equals("gzip", StringComparison.CurrentCultureIgnoreCase))
{
bUseGzip = true;
}
Console.WriteLine("下载完毕");
return GetStringByStream(ms, bUseGzip);
}
catch (WebException e)
{
Console.WriteLine(e);
}
catch (Exception ex)
{
Console.WriteLine(ex);

}
return null;
}

private static string GetStringByStream(MemoryStream ms, bool bUseGzip)
{
ms.Seek(0, SeekOrigin.Begin);
byte[] htmlBytes;
if (bUseGzip)
{
htmlBytes = gzipHandler(ms);
}
else
{
htmlBytes = new byte[ms.Length];
ms.Read(htmlBytes, 0, (int)ms.Length);
}
int len = htmlBytes.Length;

long trylen = Math.Min(500, len);
Encoding code = Encoding.Default;

string str = code.GetString(htmlBytes, 0, (int)trylen);

if (Regex.IsMatch(str, "<meta[^<]*charset=utf-8", RegexOptions.IgnoreCase | RegexOptions.Compiled))
{
code = Encoding.UTF8;
}
return code.GetString(htmlBytes);
}

private static byte[] gzipHandler(MemoryStream ms)
{
GZipStream gzipStream = new GZipStream(ms, CompressionMode.Decompress);
MemoryStream mm = new MemoryStream();
int len = 10000;
byte[] byteTemp = new byte[len];
while (true)
{
int read = gzipStream.Read(byteTemp, 0, len);
if (read == 0)
break;

mm.Write(byteTemp, 0, read);
}
mm.Seek(0, SeekOrigin.Begin);
byte[] htmlBytes = new byte[mm.Length];
mm.Read(htmlBytes, 0, (int)mm.Length);

return htmlBytes;

}
}
internal class Point
{
private int x;

public int X
{
get { return x; }
set { x = value; }
}
private int y;

public int Y
{
get { return y; }
set { y = value; }
}
}
}
posted @ 2008-06-12 15:56  冰封的心  阅读(267)  评论(0)    收藏  举报