python学习记录5:爬取网易云音乐爬虫代码
#爬取163音乐
import requests
from lxml import etree
url='https://music.163.com/discover/toplist?id=3779629'
domain='http://music.163.com/song/media/outer/url?id='
html_str = requests.get(url).text
#print(type(html_str))
result = etree.HTML(html_str)
#print(type(result))
#song_ids = result.xpath('//a[contains(@href,"/song?")]/@href')
song_ids = result.xpath('//a[contains(@href,"/song?")]/@href') # 歌曲id
song_names =
result.xpath('//a[contains(@href,"/song?")]/text()') # 歌名
print(song_ids)
i=0
for song_id,song_name in zip(song_ids,song_names):
#print(song_id)
#print(song_name)
sult_id=song_id.strip('/song?id=')
print(sult_id)
#resulturl=domain+sult_id
#print(resulturl)
if ('$' in sult_id) == False:
# # print(count_id)
song_url = domain + sult_id # 拼接url
print(song_url)
i += 1
mp3 = requests.get(song_url).content
with open('D:\mypython\爬163音乐\新歌/{}.{}.mp3'.format(i, song_name), 'wb') as file:
file.write(mp3)
print('歌曲{}{}.mp3下载成功!'.format(i,song_name))