using HtmlAgilityPack;
using Nito.AsyncEx;
using System;
using System.Diagnostics;
using System.IO;
using System.IO.Compression;
using System.Net;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace test1
{
class Program
{
static void Main(string[] args)
{
var uri = new Uri("https://www.baidu.com/");
string pageHtml=AsyncContext.Run(() => GetHttpDomByUrl(uri));
HtmlDocument thisnode = new HtmlDocument();
thisnode.LoadHtml(pageHtml);
var tnode = thisnode.DocumentNode;
if (tnode.SelectSingleNode("//*/a[@name=\"tj_login\"]") != null)
{
Console.WriteLine("获取到的数据为:"+tnode.SelectSingleNode("//*/a[@name=\"tj_login\"]").Attributes["href"].Value);
}
Console.WriteLine("测试成功");
Console.ReadKey();
}
public static CookieContainer CookiesContainer = new CookieContainer();//定义Cookie容器
static CookieContainer cookie = new CookieContainer();//设置为全局,这样可以方便每个函数直接调用
public static async Task<string> GetHttpDomByUrl(Uri uri, string proxy = null)
{
Thread.Sleep(1000);
return await Task.Run(() =>
{
var pageSource = string.Empty;
try
{
//模拟浏览器请求
//if (this.OnStart != null) this.OnStart(this, new OnStartEventArgs(uri));
var watch = new Stopwatch();
watch.Start();
var request = (HttpWebRequest)WebRequest.Create(uri);
request.Accept = "*/*";
request.ServicePoint.Expect100Continue = false;//加快载入速度
request.ServicePoint.UseNagleAlgorithm = false;//禁止Nagle算法加快载入速度
request.AllowWriteStreamBuffering = false;//禁止缓冲加快载入速度
request.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip,deflate");//定义gzip压缩页面支持
request.ContentType = "application/x-www-form-urlencoded";//定义文档类型及编码
request.AllowAutoRedirect = true;//禁止自动跳转
//设置User-Agent,伪装成Google Chrome浏览器
request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36";
request.Timeout = 5000;//定义请求超时时间为5秒
request.KeepAlive = true;//启用长连接
request.Method = "GET";//定义请求方式为GET
request.CookieContainer = cookie;
if (proxy != null)
{
request.Proxy = new WebProxy(proxy);//设置代理服务器IP,伪装请求地址
}
//request.CookieContainer = this.CookiesContainer;//附加Cookie容器
request.ServicePoint.ConnectionLimit = int.MaxValue;//定义最大连接数
using (var response = (HttpWebResponse)request.GetResponse())
{
//获取请求响应
foreach (Cookie cookie in response.Cookies)
{
CookiesContainer.Add(cookie);//将Cookie加入容器,保存登录状态
}
//判断网页是否被gzip压缩
if (response.ContentEncoding.ToLower().Contains("gzip"))
{
//解压
using (GZipStream stream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress))
{
using (StreamReader reader = new StreamReader(stream, Encoding.UTF8))
{
pageSource = reader.ReadToEnd();
}
}
}
//判断网页http头中是否Content-Encoding:deflate
else if (response.ContentEncoding.ToLower().Contains("deflate"))
{
//解压
using (DeflateStream stream = new DeflateStream(response.GetResponseStream(), CompressionMode.Decompress))
{
using (StreamReader reader = new StreamReader(stream, Encoding.UTF8))
{
pageSource = reader.ReadToEnd();
}
}
}
//正常流获取网页
else
{
using (Stream stream = response.GetResponseStream())//原始
{
using (StreamReader reader = new StreamReader(stream, Encoding.UTF8))
{
pageSource = reader.ReadToEnd();
}
}
}
}
request.Abort();
watch.Stop();
var threadId = System.Threading.Thread.CurrentThread.ManagedThreadId;//获取当前任务线程ID
var milliseconds = watch.ElapsedMilliseconds;//获取请求执行时间
//if (this.OnCompleted != null)
//{
// this.OnCompleted(this, new OnCompletedEventArgs(uri, threadId, milliseconds, pageSource));
// // Console.WriteLine("程序执行完成");
//}
}
catch (Exception ex)
{
Console.WriteLine($"hello, task的线程ID为{Thread.CurrentThread.ManagedThreadId}");
Console.WriteLine(uri + $"请求页面失败正在重新请求,当前线程{Thread.CurrentThread.ManagedThreadId}:" + ex.Message.ToString());
Thread.Sleep(1000);
return AsyncContext.Run(() => GetHttpDomByUrl(uri));
}
return pageSource;
});
}
}
}