import requests from bs4 import BeautifulSoup import time headers ={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'} # 请求头文件 def get_infos(url): # 定义获取信息的函数 resp = requests.get(url,headers) bs = BeautifulSoup(resp.text,'lxml') # rank = bs.find_all('span',{'class':"pc_temp_num"}) # 两种方式 ranks = bs.select('#rankWrap > div.pc_temp_songlist > ul > li > span.pc_temp_num') titles = bs.select('#rankWrap > div.pc_temp_songlist > ul > li > a') times = bs.select('#rankWrap > div.pc_temp_songlist > ul > li > span.pc_temp_tips_r > span') for rank,title,time in zip(ranks,titles,times): data = {'rank':rank.text.strip(), 'song':title.text.split('-')[1], # 通过split函数获取歌曲与歌手的信息 'singer':title.text.split('-')[0], 'time':time.text.strip()} print(data) # 数据打印出来 if __name__ == '__main__': # 主函数入口 urls = ['http://www.kugou.com/yy/rank/home/{}-8888.html'.format(str(i)) for i in range(1,24)] # 页数url获取 for url in urls: get_infos(url) time.sleep(2) # 两秒延时
浙公网安备 33010602011771号