使用 WebBrowser 获取Ajax动态加载网页信息
直接上代码(代码较粗糙,可根据需要优化):
WebBrowser 直接执行时会报一个单线程的问题,我的解决方法是:使用“STAThread”,指定线程模型为单线程单元
[STAThread]
static void Main(string[] args)
using System;
using System.IO;
using System.Net;
using System.Text;
using System.Windows.Forms;
using System.Text.RegularExpressions;
using System.Collections.Specialized;
namespace CrawlerTest
{
public class HttpHelper
{
/// <summary>
/// 下载Ajax Html
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
public static string DownloadAjaxHtml(string url)
{
string htmlstr = null;
try
{
WebBrowser wb = new WebBrowser();
wb.AllowNavigation = true;
wb.ScriptErrorsSuppressed = true;
int hitCount = 1;
wb.Navigating += (sender, e) =>
{
hitCount++;
};
wb.DocumentCompleted += (sender, e) =>
{
hitCount++;
};
wb.Navigate(url);
DateTime dtime = DateTime.Now;
double timespan = 0;
while (timespan <= 3 || wb.ReadyState != WebBrowserReadyState.Complete)
{
Application.DoEvents();
DateTime time2 = DateTime.Now;
timespan = (time2 - dtime).TotalSeconds;
}
if (wb.ReadyState == WebBrowserReadyState.Complete)
{
htmlstr = wb.Document.Body.OuterHtml;
htmlstr = System.Web.HttpUtility.UrlDecode(htmlstr);//解码
}
}
catch (Exception ex)
{
Console.WriteLine($"DownloadAjaxHtml-Error:{ex.ToString()}");
}
return htmlstr;
}
//获取Html后再获取想要的内容
public static List<NewsHotTitle> GetHotTitle(Encoding encoding)
{
var url = "http://www.news.cn/2021homepro/rsznb/";
string strHtml = HttpHelper.DownloadAjaxHtml(url);
if (string.IsNullOrEmpty(strHtml)) { Console.WriteLine($"获取数据失败"); }
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(strHtml);
HtmlNode rootnode = doc.DocumentNode;
HtmlNodeCollection hotlist = rootnode.SelectNodes("//ul[@class='htList']//li");
if (hotlist == null || !hotlist.Any()) { Console.WriteLine($"获取数据失败"); }
var list = new List<NewsHotTitle>();
foreach (HtmlNode item in hotlist)
{
NewsHotTitle model = new NewsHotTitle();
model.Title = HttpHelper.RemoveHtml(item.InnerHtml);
model.PublishTime = DateTime.Now;
Console.WriteLine($"{model.ToJson()}");
}
return list;
}
}
}

浙公网安备 33010602011771号