1 #!/usr/bin/env python
2 # -*- coding:utf-8 -*-
3 # Author:woshinidaye
4
5 #抓取网易云歌曲的热评,为了简单,不要登录了
6 #1、找到未加密的参数
7 #2、想办法把参数进行加密,加密逻辑与网易一致,params,encSecKey
8 #3、请求,拿去数据
9 #加密
10 # var
11 # bUM2x = window.asrsea(JSON.stringify(i6c), bsG7z(["流泪", "强"]), bsG7z(WW3x.md), bsG7z(["爱心", "女孩", "惊恐", "大笑"]));
12 # e6c.data = j6d.cs6m({
13 # params: bUM2x.encText,
14 # encSecKey: bUM2x.encSecKey
15 # })
16
17 import requests,re,json,base64
18 from lxml import html
19 from Crypto.Cipher import AES #pip install pycryptodome
20 etree = html.etree
21
22 url = "https://music.163.com/weapi/comment/resource/comments/get?csrf_token="
23 #请求方式
24 e = '010001'
25 f = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
26 g = "0CoJUm6Qyw8W8jud"
27 i = 'hjbiwTejTo17235R'
28 def get_encSecKey():
29 return '6c11f64c829ec94df8ce7d711932c15c4c6e46daf00674f0f22dc1170ba68e809047ee5a7e12c3e07d8c1c3f66b76e4518201b1d4679bd1659a747856f16ac17c32286fba6a82034fa2597004dcca90ca9bfce49bd1a85d09fac162d7b40b390fe8d4c4be15bcc65788d0002fdbd91fb529a71d4d42aa702170fd8e92f1ed87e'
30 def to_16 (data):
31 pad = 16 -len(data)%16
32 data = data + chr(pad)*pad
33 return data
34 def enc_params(data,key):
35 iv = '0102030405060708'
36 data = to_16(data)
37 aes = AES.new(key=key.encode('utf-8'),IV=iv.encode('utf-8'),mode=AES.MODE_CBC)
38 bs = aes.encrypt(data.encode('utf-8'))
39 return str(base64.b64encode(bs),'utf-8') #返回params
40 def get_params(data): #默认data是字符串
41 first = enc_params(data,g)
42 second = enc_params(first,i)
43 return second
44
45
46 # "c6aaef7d7fe54edc416de03808f94c8de2590f943d4f334d8bc485e53f00b95acdfbe704330a01d81bfe666c00b5d681321ab4b04147d0ba1683877e4350b1310e3ad67465ffa1dc9ea57b9d682f1efffbe14ad734a9454faf8e28464491542226109de2fdce6751b63426bd3b18543108c5076ef2b8eab03358ea7a88ce90e9"
47 data = {
48 'csrf_token': "",
49 'cursor': '-1',
50 'offset': '0',
51 'orderType': '1',
52 'pageNo': '1',
53 'pageSize': '20',
54 'rid': "R_SO_4_1881521546",
55 'threadId': "R_SO_4_1881521546"
56 }
57 #加密方式
58 '''
59 function a(a) { #随机产生16位字符串
60 var d, e, b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", c = "";
61 for (d = 0; a > d; d += 1) #循环16次
62 e = Math.random() * b.length, #随机数
63 e = Math.floor(e), #取整
64 c += b.charAt(e); #取字符串中的xxxx位置
65 return c
66 }
67 function b(a, b) { #a是要加密的数据
68 var c = CryptoJS.enc.Utf8.parse(b)
69 , d = CryptoJS.enc.Utf8.parse("0102030405060708")
70 , e = CryptoJS.enc.Utf8.parse(a)
71 , f = CryptoJS.AES.encrypt(e, c, {
72 iv: d, #偏移量
73 mode: CryptoJS.mode.CBC #模式CBC
74 });
75 return f.toString()
76 }
77 function c(a, b, c) {
78 var d, e;
79 return setMaxDigits(131),
80 d = new RSAKeyPair(b,"",c),
81 e = encryptedString(d, a)
82 }
83 function d(d, e, f, g) { #d:data e:010001 f:bsG7z(WW3x.md) g:bsG7z(["爱心", "女孩", "惊恐", "大笑"])
84 var h = {}
85 , i = a(16); #i就是16位随机字符
86 return h.encText = b(d, g),
87 h.encText = b(h.encText, i), #得到params, 做了两次加密,第一次 data+g
88 h.encSecKey = c(i, e, f), #得到encSecKey
89 h
90 }
91 '''
92 # var bUM2x = window.asrsea(JSON.stringify(i6c), bsG7z(["流泪", "强"]), bsG7z(WW3x.md), bsG7z(["爱心", "女孩", "惊恐", "大笑"]));
93 '''
94 u6o.be6Y = function(Y6S, e6c) {
95 var i6c = {}
96 , e6c = NEJ.X({}, e6c)
97 , mo0x = Y6S.indexOf("?");
98 if (window.GEnc && /(^|\.com)\/api/.test(Y6S) && !(e6c.headers && e6c.headers[eu7n.AI4M] == eu7n.FD6x) && !e6c.noEnc) {
99 if (mo0x != -1) {
100 i6c = j6d.gW8O(Y6S.substring(mo0x + 1));
101 Y6S = Y6S.substring(0, mo0x)
102 }
103 if (e6c.query) {
104 i6c = NEJ.X(i6c, j6d.fT8L(e6c.query) ? j6d.gW8O(e6c.query) : e6c.query)
105 }
106 if (e6c.data) {
107 i6c = NEJ.X(i6c, j6d.fT8L(e6c.data) ? j6d.gW8O(e6c.data) : e6c.data)
108 }
109 i6c["csrf_token"] = u6o.gQ8I("__csrf");
110 Y6S = Y6S.replace("api", "weapi");
111 e6c.method = "post";
112 delete e6c.query;
113 var bUM2x = window.asrsea(JSON.stringify(i6c), bsG7z(["流泪", "强"]), bsG7z(WW3x.md), bsG7z(["爱心", "女孩", "惊恐", "大笑"]));
114 e6c.data = j6d.cs6m({
115 params: bUM2x.encText,
116 encSecKey: bUM2x.encSecKey
117 })
118 }
119 var cdnHost = "y.music.163.com";
120 var apiHost = "interface.music.163.com";
121 if (location.host === cdnHost) {
122 Y6S = Y6S.replace(cdnHost, apiHost);
123 if (Y6S.match(/^\/(we)?api/)) {
124 Y6S = "//" + apiHost + Y6S
125 }
126 e6c.cookie = true
127 }
128 cxg5l(Y6S, e6c)
129 '''
130
131 resp = requests.post(url,data={
132 'params': get_params(json.dumps(data)),
133 "encSecKey":get_encSecKey()
134 })
135 print(resp.text)
136
137
138 #上面是获取某一首歌的评论,变量主要在data里面,更换歌曲ID,可以通过页面查找获取
139 '''
140 url = 'https://music.163.com/playlist?id=6920064959'
141 resp = requests.get(url=url,headers=headers)
142 resp.encoding = 'utf-8'
143 # print(resp.text)
144 # 用RE
145 # obj = re.compile(r'<li><a href="/(?P<song_id>.*?)">(?P<song_title>.*?)</a></li>',re.S)
146 # songs = obj.finditer(resp.text)
147 # for my_list in songs:
148 # aa = my_list.group('song_id').split('=')[-1]
149 # print(aa,'\t',my_list.group('song_title'))
150
151 #用Xpath
152 # etree = html.etree
153 # # print(resp.text)
154 # html = etree.HTML(resp.text)
155 # test = html.xpath('//html/body/div[3]/div[1]/div/div/div[2]/div[2]//a/@href')
156 # #这个地方试了好久,写全的话取不出来,感觉是跟页面有嵌套有关系
157 # print(test)
158
159 #用bs4
160 # from bs4 import BeautifulSoup
161 # html = BeautifulSoup(resp.text,'html.parser')
162 # test = html.find('ul',class_='f-hide').find_all('a')
163 # print(test)
164 '''