获取网易云歌单列表信息

前言:

  本人一直使用网易云音乐播放器,对网易云音乐十分的热衷,里面的歌单功能非常便捷,能快速找到符合自己喜好的歌曲信息。此文章如有侵权,请留言即刻删除文章。 

请求数据说明:以web请求的方式获取网易云音乐歌列表,歌单名称,链接,播放量既创建歌单人名。

一. 了解request请求

什么是request请求:当我们访问一个网站时所数据的网站地址就是一种request请求。请求的格式应遵照被请求方的合适要求。

例如用谷歌浏览器访问网易云音乐网址时:

f12进入控制台 如图选中NetWork选项

 

请求头信息:包含请求地址,请求方式(Get /Post)请求网页类型Accept,最重要的:User-agent--访问浏览器版本,

 

二. 提供信息支持

打开Visual Studio=>文件=>新建项目=>新建控制台应用程序

 

选择控制台应用程序=>

 

选中项目右击=>添加=>类取名为RequestOptions:

 

 RequestOptions:设置请求头信息参数

 1 public class RequestOptions
 2     {
 3         /// <summary>
 4         /// 请求方式 get post
 5         /// </summary>
 6         public string Method { get; set; }
 7         /// <summary>
 8         /// 请求地址
 9         /// </summary>
10         public Uri Uri { get; set; }
11         /// <summary>
12         /// 上级历史记录链接
13         /// </summary>
14         public string Referer { get; set; }
15         /// <summary>
16         /// 请求超时时间 毫秒单位
17         /// </summary>
18         public int TimeOut = 5000;
19 20 /// <summary> 21 /// 启用长连接 22 /// </summary> 23 public bool KeepAlive = true;

24 25 /// <summary> 26 /// 禁止自动跳转 27 /// </summary> 28 public bool AllowAutoRedirect = false; 29 30 /// <summary> 31 /// 定义最大连接数 32 /// </summary> 33 public int ConntectionLimit = int.MaxValue; 34 /// <summary> 35 /// 请求次数 36 /// </summary> 37 public int RequestNum = 3; 38 /// <summary> 39 /// 可通过文件上传提交的文件类型 40 /// </summary> 41 public string Accept = "*/*"; 42 43 /// <summary> 44 /// 内容类型 45 /// </summary> 46 public string ContentType = "application/x-www-form-urlencoded"; 47 48 /// <summary> 49 /// 实例化头部信息 50 /// </summary> 51 public WebHeaderCollection header = new WebHeaderCollection(); 52 53 public WebHeaderCollection webHeader 54 { 55 get { return header; } 56 set { header = value; } 57 } 58 /// <summary> 59 /// 定义请求cookie字符串 60 /// </summary> 61 public string RequestCookies { get; set; } 62 /// <summary> 63 /// 异步请求数据 64 /// </summary> 65 public string XHRParams { get; set; } 66 67 }

新建RequestHelper类:写请求方法,并处理返回数据信息进行处理

public class RequestHelper
    {
        public static string RequestAction(RequestOptions options)
        {
            string result = string.Empty;
            IWebProxy proxy = null;//GetWebProxy();
            var request = (HttpWebRequest)WebRequest.Create(options.Uri);
            request.Accept = options.Accept;
            request.ServicePoint.Expect100Continue = false;
            request.ServicePoint.UseNagleAlgorithm = false;//禁止Nagle算法加快载入速度
            if (!string.IsNullOrEmpty(options.XHRParams)) { request.AllowWriteStreamBuffering = true; } else { request.AllowWriteStreamBuffering = false; }//禁止缓冲加快载入速度
            request.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip,deflate");//定义gzip压缩页面支持

            request.ContentType = options.ContentType;//定义文档类型及编码
            request.AllowAutoRedirect = options.AllowAutoRedirect;//禁止自动跳转
            request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36";//设置User-Agent  伪装成goole chrome 浏览器
            request.Timeout = options.TimeOut;//定义超时时间
            request.KeepAlive = options.KeepAlive;//定义长连接
            if (!string.IsNullOrEmpty(options.Referer)) request.Referer = options.Referer;//返回上一级历史连接
            request.Method = options.Method;
            
            if (proxy != null) request.Proxy = proxy;//设置代理服务器IP,伪装请求地址
            if (!string.IsNullOrEmpty(options.RequestCookies)) request.Headers[HttpRequestHeader.Cookie] = options.RequestCookies;
            request.ServicePoint.ConnectionLimit = options.ConntectionLimit;
            if (options.webHeader != null && options.webHeader.Count > 0) request.Headers.Add(options.webHeader);

            if (!string.IsNullOrEmpty(options.XHRParams))
            {
                byte[] buffer = Encoding.UTF8.GetBytes(options.XHRParams);
                if (buffer != null)
                {
                    request.ContentLength = buffer.Length;
                    request.GetRequestStream().Write(buffer, 0, buffer.Length);
                }
            }
            using (var response = (HttpWebResponse)request.GetResponse())
            {
                if (response.ContentEncoding.ToLower().Contains("gzip"))//解压
                {
                    using (GZipStream stream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress))
                    {
                        using (StreamReader reader = new StreamReader(stream, Encoding.UTF8))
                        {
                            result = reader.ReadToEnd();
                        }
                    }
                }
                else if (response.ContentEncoding.ToLower().Contains("deflate"))//解压
                {
                    using (DeflateStream stream = new DeflateStream(response.GetResponseStream(), CompressionMode.Decompress))
                    {
                        using (StreamReader reader = new StreamReader(stream, Encoding.UTF8))
                        {
                            result = reader.ReadToEnd();
                        }
                    }
                }
                else
                {
                    using (Stream stream = response.GetResponseStream())//原始
                    {
                        using (StreamReader reader = new StreamReader(stream, Encoding.UTF8))
                        {
                            result = reader.ReadToEnd();
                        }
                    }
                }
            }
            request.Abort();
           
            return result;
        }

        private static IWebProxy GetWebProxy()
        {
            System.Net.WebProxy webProxy = null;
            try
            {
                // 代理链接地址加端口
                string proxyHost = "";
                string proxyPort = "";

                // 代理身份验证的帐号跟密码
                //string proxyUser = "xxx";
                //string proxyPass = "xxx";

                // 设置代理服务器
                webProxy = new System.Net.WebProxy();
                // 设置代理地址加端口
                webProxy.Address = new Uri(string.Format("{0}:{1}", proxyHost, proxyPort));
                // 如果只是设置代理IP加端口,例如192.168.1.1:80,这里直接注释该段代码,则不需要设置提交给代理服务器进行身份验证的帐号跟密码。
                //webProxy.Credentials = new System.Net.NetworkCredential(proxyUser, proxyPass);
            }
            catch (Exception ex)
            {
                Console.WriteLine("获取代理信息异常", DateTime.Now.ToString(), ex.Message);
            }
            return webProxy;
        }
    }

现在通用的请求方法已经编写完毕,下面在Mian方法中调用上面编写的方法:

项目添加引用:HttpAgiltyPack支持

选择项目点击右键=>选择NuGet包管理:

搜索HtmlAgilityPack

选择安装即可,

此项目主要使用的Xpath选择匹配:参考文档:http://www.w3school.com.cn/xpath/xpath_syntax.asp

xpath可视化工具:HtmlAgilityPack Tester 

链接:https://pan.baidu.com/s/1_in8Y9qFYzKQtnc-eLrb2w
提取码:co03

Mian方法:

static void Main(string[] args)
        {
            
            //设置请求路径
            var uri = new Uri(@"https://music.163.com/discover/playlist/?cat=%E5%85%A8%E9%83%A8&order=hot");
            //获取响应提文件
                var simpleCrawlResult = RequestHelper.RequestAction(new RequestOptions() { Uri = uri, Method = "Get" });

            HtmlDocument htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(simpleCrawlResult);

            //匹配选择到列表
            HtmlNodeCollection playList = htmlDoc.DocumentNode.SelectSingleNode(@"/html[1]/body[1]/div[3]/div[1]/ul[1]").SelectNodes("li");
            if (playList != null)
            {
                foreach (var playActicle in playList)
                {
                    //匹配歌单名称
                    string playName = playActicle.SelectSingleNode("p[1]/a[1]/@title[1]").InnerText.ToString();
                    //歌单链接
                    string playHref = playActicle.SelectSingleNode("p[1]/a[1]").GetAttributeValue("href","");
                    //歌单播放量
                    string playCount = playActicle.SelectSingleNode("div[1]/div[1]/span[2]").InnerText.ToString();
                    //创建者
                    string createBy = playActicle.SelectSingleNode("p[2]/a[1]/@title[1]").InnerText.ToString();
                    //创建者链接地址
                    string createHref = playActicle.SelectSingleNode("p[2]/a[1]").GetAttributeValue("href", "");


                    //打印输出到控制台
                    Console.WriteLine("歌单:"+playName+"\t链接:"+playHref+"\t播放量:"+ playCount + "\t创建者:"+createBy+"\t创建者链接:"+createHref);
                    Console.WriteLine("==============================");
                }

                
               
            }

            Console.Read();

        }

运行结果展示:

总结:

爬虫是批量获取信息的一种工具,方便快捷,大量数据。但有的网站进行了反爬虫处理,如果请求量过大,可能会导致ip被封,还有的网站进行了请求验证,需做验证,自行体会,爬虫虽好但不要侵犯他人隐私哦!

 

posted @ 2020-04-29 22:15  就着  阅读(1770)  评论(0编辑  收藏  举报