网易云音乐小爬爬
一:爬取用户喜欢的音乐歌单
import traceback from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import time import random part_url = "https://music.163.com/#/playlist?id=" def get_data(id): new_url = "%s%s" % (part_url, id) driver = webdriver.Chrome() driver.get(new_url) driver.switch_to.frame('g_iframe') ctn = 0 ls = [] try: wait = WebDriverWait(driver, 5) wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'f-ff2'))) title_ls = driver.find_elements_by_xpath('//a[contains(@href,"/song")]/b') time_ls = driver.find_elements_by_xpath('//span[contains(@class,"u-dur")]') singer_ls = driver.find_elements_by_xpath('//div[contains(@class,"text")]/span') special_ls = driver.find_elements_by_xpath('//div[contains(@class,"text")]/a') song_is_ls = driver.find_elements_by_xpath('//a[contains(@href,"/song")]') while ctn < len(title_ls): print(ctn) dic = { "_id": ctn + 1, "title": title_ls[ctn].get_attribute("title"), "total_time": time_ls[ctn].text, "singer": singer_ls[ctn].get_attribute("title"), "special": special_ls[ctn].get_attribute("title"), "link": song_is_ls[ctn].get_attribute("href") } ctn += 1 ls.append(dic) save(id, ls) except Exception: traceback.print_exc() finally: driver.close() def save(id, ls): from pymongo import MongoClient client = MongoClient('mongodb://root:xxxxxx@localhost:27017/') db = client['music'] table_user = db["user%s" % id] table_user.insert_many(ls) if __name__ == '__main__': for id in ['77424238']: get_data(id) time.sleep(random.randint(1, 2))

浙公网安备 33010602011771号