爬取网易云音乐飙升榜(100首)
目的
- 学习request模块的使用
- 练习爬虫的机制
- 仅学习使用,不做非法事情
爬取页面
代码
# -*- coding:utf-8 -*-
import os
import requests
from lxml import etree
import threading
class Get_Music:
def __init__(self):
self.song_url = 'https://music.163.com/discover/toplist'
self.headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)',
'referer': 'https://music.163.com/'
}
# 定义两个列表分别存储歌曲名称和歌曲的下载链接
self.song_list = []
self.download_url = []
def get_music_url(self):
response = requests.get(url=self.song_url, headers=self.headers).content.decode()
html_page = etree.HTML(response)
# 获取歌曲id和歌名列表
song_info_list = html_page.xpath('//div/ul[@class="f-hide"]/li')
# 解析歌曲列表信息
for item in song_info_list:
# 获取歌曲名称
song_name = item.xpath("./a/text()")[0]
self.song_list.append(song_name)
# 获取歌名
song_player_url = item.xpath("./a/@href")[0]
song_id = song_player_url.split('=')[1]
# 使用API拼接下载链接
download_url = 'http://music.163.com/song/media/outer/url?id=' + str(song_id)
self.download_url.append(download_url)
def save_file(self, number):
# 进度条
try:
number = int(number)
if int(number) == 0:
exit()
else:
response = requests.get(url=self.download_url[number - 1], headers=self.headers, stream=True)
chunk_size = 1024 # 单次下载的数据块大小
data_count = 0 # 数据已下载的总大小
content_size = int(response.headers['content-length']) # 数据需要下载的总大小
filename = self.song_list[number - 1] + ".mp3"
with open(f'./musics/{filename}', mode='wb') as f:
for data in response.iter_content(chunk_size=chunk_size):
f.write(data)
done_block = int((data_count / content_size) * 50)
data_count = data_count + len(data)
now_jd = (data_count / content_size) * 100
print("\r %s: [%s%s] %d%% %d/%d" % (filename, done_block * '█', ' ' * (50 - 1 - done_block), now_jd, 1, 1), end="")
except Exception:
print("请输入有效数字编号......")
def main(self):
# 检测是否创建存放音频文件的目录
if not os.path.exists('./musics'):
os.mkdir("./musics")
self.get_music_url()
print("----------------------------------------")
print('编号\t\t', '名称')
for item in self.song_list:
print(self.song_list.index(item) + 1, '\t\t', item)
print('----------------------------------------')
# 启动多线程下载
number = input('请输入歌曲编号,回车即可下载(退出请按:0): ')
threading.Thread(target=self.save_file, args=(number,)).start()
if __name__ == '__main__':
a = Get_Music()
a.main()
