爬取网易云音乐热评

实现对网页版网易云音乐热评爬取,输入网页网址,生成热评保存在同目录下的hot_comment.txt文件中

正则表达式真好用!!

""" 
    2019/10/4
    version: 1.0.0
    by Zeronera
    爬取网易云音乐热评
"""
from bs4 import BeautifulSoup
import requests
import re
from fake_useragent import UserAgent  # user-agent池

def getHTMLText(url):
    
    music_id=url.split("=")[1]
    url="https://music.163.com/weapi/v1/resource/comments/R_SO_4_%s?csrf_token="%music_id
    try:
        headers = {'User-Agent': UserAgent().random}
        data = {'params': 'nSQPTf3PYMn4ioHoOx+8GFAUoA6edfR07VISV2/2rXXE7S4h8lhHKsZoNtqNh65vHcXqwv59DwwVeclxC0TWvPebCBFD9j1NcoVxXtJ9awrkYu2DwAElmtbfmTeaz7uvcCNUK+nXr5dAOh7NKaeZKGj6kDOBeuCIH15zIcH8FdqYPBfOw53SyhjMINwWnf2/',
                'encSecKey': '0b49c98190a4819faf9ae3d3b359e3544cf252f721b3a4b00532e8ebc30b693bef1018d64307e9b2c71f94f997b5f585e0684cd8b810b660061aff692a849ed81fd4db8c931b02d4f9c17efe3089364254135fc25759a1a6a441f8a11efbb7d41cd67fcec78aa645f6c2587c58e1f9015ec30b365d95be11f260964e0233c88e'}
        r = requests.post(url,headers=headers, data=data)
        return r.text
    except:
        return "error"

def main():
    url = input("请输入网址:")
    html = getHTMLText(url)
    if html=='error':
        print("error")
    else:
        new_html=re.findall(r'hotComments(.*?)comments',html)[0]
        comment=re.findall(r'content":"(.*?)"',new_html)
        with open("hot_comment.txt",'w',encoding='utf-8') as f:
            for i in comment:
                f.write(i+'\n\n')
    
if __name__ == "__main__":
    main()

posted @ 2019-10-04 21:02  Zeronera  阅读(434)  评论(0)    收藏  举报