网易云音乐小爬爬

一:爬取用户喜欢的音乐歌单

import traceback
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import random

part_url = "https://music.163.com/#/playlist?id="

def get_data(id):
    new_url = "%s%s" % (part_url, id)
    driver = webdriver.Chrome()
    driver.get(new_url)
    driver.switch_to.frame('g_iframe')
    ctn = 0
    ls = []
    try:
        wait = WebDriverWait(driver, 5)
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'f-ff2')))
        title_ls = driver.find_elements_by_xpath('//a[contains(@href,"/song")]/b')
        time_ls = driver.find_elements_by_xpath('//span[contains(@class,"u-dur")]')
        singer_ls = driver.find_elements_by_xpath('//div[contains(@class,"text")]/span')
        special_ls = driver.find_elements_by_xpath('//div[contains(@class,"text")]/a')
        song_is_ls = driver.find_elements_by_xpath('//a[contains(@href,"/song")]')
        while ctn < len(title_ls):
            print(ctn)
            dic = {
                "_id": ctn + 1,
                "title": title_ls[ctn].get_attribute("title"),
                "total_time": time_ls[ctn].text,
                "singer": singer_ls[ctn].get_attribute("title"),
                "special": special_ls[ctn].get_attribute("title"),
                "link": song_is_ls[ctn].get_attribute("href")
            }
            ctn += 1
            ls.append(dic)
        save(id, ls)
    except Exception:
        traceback.print_exc()
    finally:
        driver.close()


def save(id, ls):
    from pymongo import MongoClient
    client = MongoClient('mongodb://root:xxxxxx@localhost:27017/')
    db = client['music']
    table_user = db["user%s" % id]
    table_user.insert_many(ls)


if __name__ == '__main__':
    for id in ['77424238']:
        get_data(id)
        time.sleep(random.randint(1, 2))
面条版

 

posted @ 2019-01-24 16:34  风中琉璃  阅读(188)  评论(0)    收藏  举报