HttpClient 爬去网络数据

废话不多说,上教程。🤩


创建 HttpHelper类

public class HttpHelper{
  public static HttpClient Client { get; } = new HttpClient();
  /// get请求 url 请求地址
  public static async Task<string> GetHTMLByURLAsync(string url, string name="")
  {
    using (HttpClient client = new HttpClient())
    {
      string apiUrl = url+name;
      // 创建 HttpRequestMessage 对象并设置 Method 和 Content
      HttpRequestMessage request = new HttpRequestMessage(HttpMethod.Get, apiUrl);
      // 设置自定义的 Header 参数
      //HttpContent hc = new StreamContent(ms);
      // 发送请求并获取响应
      HttpResponseMessage response = await client.SendAsync(request);
      if (response.IsSuccessStatusCode)
      {
        // 处理成功响应
        string responseBody = await response.Content.ReadAsStringAsync();
        return responseBody;
      }
      else return "";
    }
  }
  /// <summary>
  /// post 请求 application/json
  /// </summary>
  /// <param name="url"></param>
  /// <param name="param"></param>
  /// <returns></returns>
  public static async Task<string> HttpPost(string url,string param) {
    HttpClient Client = new HttpClient();
    var json = JsonConvert.SerializeObject(new { param });
    HttpContent content = new StringContent(json);
    content.Headers.ContentType = new System.Net.Http.Headers.MediaTypeHeaderValue("application/json");
    HttpResponseMessage response = await Client.PostAsync(url, content);
    response.EnsureSuccessStatusCode();
    if (response.IsSuccessStatusCode)
    {
        string responseBody = await response.Content.ReadAsStringAsync();
    	return responseBody;
    }
    else return "";
  }
}

API使用

private static HtmlParser htmlParser = new HtmlParser(string url);
[HttpGet()]
public object get(){
  //获取dom
  var htmlDoc = HTTPHelper.GetHTMLByURLAsync(url).Result;
  //HTML 解析成 IDocument
  var dom = htmlParser.ParseDocument(htmlDoc);
  //QuerySelectorAll方法接受 选择器语法 
  var list= dom.QuerySelectorAll("div.className");
  List<string> urllist = new List<string>();
  foreach (var item in list)
  {
    item.QuerySelectorAll("a").ToList().ForEach(a =>
    {
      var pageUrl = a.GetAttribute("href");//获取a元素中的 href属性值
      urllist.Add(pageUrl);
     });
  }
  return urllist;
}

到这里就🎉🎉🎉大功告成了!🎉🎉🎉 你学废了吗😀
posted @ 2024-04-25 11:01  浅·笑  阅读(22)  评论(0)    收藏  举报