爬取网易云歌曲、歌词和专辑图片
这几天做项目需要一个音乐数据库,就跟着
python爬取网易云飙升榜数据_听书人123的博客-CSDN博客
学习了一下,里面有些地方略有修改,在此保存一下
1 import re 2 import json 3 import urllib 4 import xlwt 5 from lxml import etree 6 import requests 7 8 workbook = xlwt.Workbook(encoding='utf-8') 9 globals() 10 wooksheet = workbook.add_sheet('歌曲') 11 wooksheet.write(0, 0, 'singed') 12 wooksheet.write(0, 1, 'id') 13 wooksheet.write(0, 2, '歌曲名') 14 wooksheet.write(0, 3, '歌手名') 15 wooksheet.write(0, 4, '歌曲所在') 16 wooksheet.write(0, 5, '歌曲链接') 17 wooksheet.write(0, 6, '歌词名') 18 wooksheet.write(0, 7, '歌词所在') 19 wooksheet.write(0, 8, '歌词链接') 20 wooksheet.write(0, 9, '专辑图片链接') 21 22 23 def save(): 24 wooksheet.write(count_id, 0, count_id) 25 wooksheet.write(count_id, 1, ids[i]) 26 wooksheet.write(count_id, 2, music_title) 27 wooksheet.write(count_id, 3, singer) 28 wooksheet.write(count_id, 4, 'track/song/%s.mp3' % music_title) 29 wooksheet.write(count_id, 5, download_url) 30 wooksheet.write(count_id, 6, music_title + ".lrc") 31 wooksheet.write(count_id, 7, "track/Lrc/" + music_title + ".lrc") 32 wooksheet.write(count_id, 8, ly) 33 wooksheet.write(count_id, 9, "track/cover/" + music_title + ".lrc") 34 workbook.save(r'C:\Users\Administrator\Desktop\song\song.xls') 35 print(music_title) 36 37 38 def wordsong(): 39 res = requests.get(ly, headers=headers) 40 json_obj = res.text 41 j = json.loads(json_obj) 42 lrc = j['lrc']['lyric'] 43 songlrc = lrc 44 name = "C:/Users/Administrator/Desktop/song/Lrc/" + music_title + ".lrc" 45 fh = open(name, 'w', encoding='utf-8') 46 fh.write(songlrc) 47 fh.close() 48 49 50 url = "https://music.163.com/discover/toplist?id=3001835560" 51 # 2809513713-欧美热歌榜100 21845217-KTV10 19723756-飙升榜100 71384707-古典 3001835560——acg 3778678-热歌200 52 headers = { 53 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 54 'Chrome/99.0.4844.51 Safari/537.36 ' 55 , 'Referer': 'http://music.163.com/', 56 'Host': 'music.163.com', 57 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 58 } 59 home_response = requests.get(url, headers=headers) 60 home_loadhtml = etree.HTML(home_response.text) 61 ids = home_loadhtml.xpath('//ul[@class="f-hide"]//a/@href') 62 for i in range(len(ids)): 63 ids[i] = re.sub('\D', '', ids[i]) 64 # print(ids) 65 # list=[] 66 count_id = 1 67 for i in range(len(ids)): 68 music_url = f"https://music.163.com/song?id={ids[i]}" 69 music_response = requests.get(music_url, headers=headers) 70 music_loadhtml = etree.HTML(music_response.text) 71 cover = music_loadhtml.xpath('//meta[@property="og:image"]/@content') 72 73 music_info = music_loadhtml.xpath('//title/text()') 74 ly = f'http://music.163.com/api/song/lyric?' + 'id=' + str(ids[i]) + '&lv=1&kv=1&tv=-1' 75 76 music_name = music_info[0].split('-')[0].rstrip() 77 music_name, p2, p3 = music_name.partition('(') 78 music_name, p2, p3 = music_name.partition('(') 79 singer = music_info[0].split('-')[1] 80 music_title = music_name + " --- " + singer.strip() 81 music_title = music_title.replace('?', '?').replace('"', '·').replace(':', ':').replace('<', '·').replace('>', '·').replace("/", "·").strip() 82 dis = music_loadhtml.xpath("//div[@class='cnt']//p[@class='des s-fc4']//a/@href") 83 for a in range(len(dis)): 84 dis[a] = re.sub('\D', '', dis[a]) 85 isd = dis[1] 86 result = isd.split('\n') 87 88 uli = [] 89 download_url = "https://music.163.com/song/media/outer/url?id=" + ids[i] + ".mp3" 90 for c in range(len(result)): 91 music_albums = f"https://music.163.com/album?id={result[c]}" 92 rse = requests.get(music_albums, headers=headers) 93 music_loadhtml = etree.HTML(rse.text) 94 album = home_loadhtml.xpath("//h2/text()") 95 96 try: 97 print('正在下载第' + str(i + 1) + '首歌,为' + music_name) 98 res = requests.post(url=download_url, headers=headers, 99 allow_redirects=False) # 注 allow_redirects=False是必须的 100 download_url = res.headers['location'] 101 if download_url == "http://music.163.com/404": 102 print("没版权啊,sir") 103 else: 104 print(download_url) 105 save() 106 wordsong() 107 urllib.request.urlretrieve(cover[0], "C:/Users/Administrator/Desktop/song/Cover/%s.jpg" % music_title) 108 urllib.request.urlretrieve(download_url, 'C:/Users/Administrator/Desktop/song/Song/%s.mp3' % music_title) 109 count_id += 1 110 print('下载成功') 111 except IOError: 112 print("文件错误,检查是否已打开?") 113 except BaseException as e: 114 print(e) 115 print("上传失败")

浙公网安备 33010602011771号