爬取网易云音乐热评
实现对网页版网易云音乐热评爬取,输入网页网址,生成热评保存在同目录下的hot_comment.txt文件中
正则表达式真好用!!
"""
2019/10/4
version: 1.0.0
by Zeronera
爬取网易云音乐热评
"""
from bs4 import BeautifulSoup
import requests
import re
from fake_useragent import UserAgent # user-agent池
def getHTMLText(url):
music_id=url.split("=")[1]
url="https://music.163.com/weapi/v1/resource/comments/R_SO_4_%s?csrf_token="%music_id
try:
headers = {'User-Agent': UserAgent().random}
data = {'params': 'nSQPTf3PYMn4ioHoOx+8GFAUoA6edfR07VISV2/2rXXE7S4h8lhHKsZoNtqNh65vHcXqwv59DwwVeclxC0TWvPebCBFD9j1NcoVxXtJ9awrkYu2DwAElmtbfmTeaz7uvcCNUK+nXr5dAOh7NKaeZKGj6kDOBeuCIH15zIcH8FdqYPBfOw53SyhjMINwWnf2/',
'encSecKey': '0b49c98190a4819faf9ae3d3b359e3544cf252f721b3a4b00532e8ebc30b693bef1018d64307e9b2c71f94f997b5f585e0684cd8b810b660061aff692a849ed81fd4db8c931b02d4f9c17efe3089364254135fc25759a1a6a441f8a11efbb7d41cd67fcec78aa645f6c2587c58e1f9015ec30b365d95be11f260964e0233c88e'}
r = requests.post(url,headers=headers, data=data)
return r.text
except:
return "error"
def main():
url = input("请输入网址:")
html = getHTMLText(url)
if html=='error':
print("error")
else:
new_html=re.findall(r'hotComments(.*?)comments',html)[0]
comment=re.findall(r'content":"(.*?)"',new_html)
with open("hot_comment.txt",'w',encoding='utf-8') as f:
for i in comment:
f.write(i+'\n\n')
if __name__ == "__main__":
main()

浙公网安备 33010602011771号