爬取网易云排行榜

import requests
import json
import MySQLdb
from lxml import etree
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
                  ' (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36 Edg/8'
                  '4.0.522.40'}
paths={"云音乐飙升榜":'19723756',"云音乐新歌榜":'3779629', "网易原创歌曲榜":'2884035',"云音乐热歌榜":'3778678'}#排行榜id


for key in paths:
    print('==========================%s=============================='%(key))
    url = "https://music.163.com/discover/toplist?id=%s"%(paths[key])
    response = requests.get(url,headers=header)
    response.encoding='utf-8'
    txt = response.text
    html = etree.HTML(txt)

    #歌曲json数组
    song_json_list_str = html.xpath("//textarea[@id='song-list-pre-data']/text()")[0]
    # print(song_json_list)
    song_list = json.loads(song_json_list_str)
    index = 0
    for song in song_list:
        name = song["name"]
        songer_list = song["artists"]

        for songer in songer_list:
            songers= songer["name"]
        duration = song["duration"]
        song_map = {"num":index,"name":name,"songer":songers,"duration":duration,"rankName":key}
        index += 1
        print(song_map)
        #将数据存进数据库中
        mydb = MySQLdb.connect("localhost", "root", "123456", "wangyiyun", charset='utf8',autocommit=True)
        mycursor = mydb.cursor()
        sql='insert into wangyy(`num`,`name`,`songer`,`duration`,`rankName`) values(%d,"%s","%s",%d,"%s")'%(index,name,songers,duration,key)
        mycursor.execute(sql)
        mydb.commit()
    mydb.close()

posted @ 2020-10-25 00:34  不忘初心_LZY  阅读(220)  评论(0编辑  收藏  举报