爬虫案例_网易云歌单
技术点:
csv 转换数据格式
网易云没换爬虫策略的话代码应该是可以直接跑的
爬虫底线试了一下还是很简单的上手一天差不多可以直接爬了
案例代码:
from selenium import webdriver
import time
import json
import csv
class WangyiMusicSpider():
def __init__(self, url):
self.browser = webdriver.Chrome('chromedriver')
self.browser.get(url)
time.sleep(2)
def json_to_scv(self):
with open("wangyimisic.json", "r", encoding="utf-8") as r:
results = json.load(r)
f = open('wangyimusic.csv', 'w', encoding='utf-8')
csv_writer = csv.writer(f)
csv_writer.writerow(results[0].keys())
for result in results:
csv_writer.writerow(result.values())
f.close()
def main(self):
iframe_element = self.browser.find_element_by_id('g_iframe')
# 2> 切换 iframe
self.browser.switch_to.frame(iframe_element)
data_list = self.browser.find_elements_by_xpath('.//ul[@id="m-pl-container"]/li')
resuilts = []
for li in data_list:
item = {}
item['photo'] = li.find_element_by_xpath('.//img').get_attribute('src')
item['music_link'] = li.find_element_by_xpath('.//div/a').get_attribute('href')
item['hot'] = li.find_element_by_xpath('.//span[@class="nb"]').text
item['title'] = li.find_element_by_xpath('./p/a').text
item['name'] = li.find_element_by_xpath('.//p[last()]/a').text
resuilts.append(item)
resuilts_json = json.dumps(resuilts)
with open('wangyimisic.json', 'w', encoding='utf-8') as a:
a.write(resuilts_json)
self.json_to_scv()
def quit(self):
self.browser.quit()
if __name__ == '__main__':
url = 'https://music.163.com/#/discover/playlist'
obj = WangyiMusicSpider(url)
obj.main()
obj.quit()

浙公网安备 33010602011771号