Pyhon信息采集 - 喜马拉雅专辑歌曲

Pyhon信息采集 - 喜马拉雅专辑歌曲

setting.py

喜马拉雅URL

XMLY_URL = "https://www.ximalaya.com/revision/play/album?albumId=%s&pageNum=%s&sort=-1&pageSize=30"
HEADER = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
}

数据库配置

import pymongo
conn = pymongo.MongoClient("127.0.0.1",27017)
MONGODB = conn["db3"]

目录配置

MUSIC_PATH = "Music"
COVER_PATH = "Cover"

data.py

import time

from setting import XMLY_URL, HEADER, MONGODB, MUSIC_PATH, COVER_PATH
import requests, os
from uuid import uuid4

my_url = XMLY_URL % ("17514344", "1")

res = requests.get(my_url, headers=HEADER)
data = res.json()
content_list = []

for music_info in data.get("data").get("tracksAudioPlay"):
music = {
"music": "",
"cover": "",
"title": ""
}

filename = uuid4()

audio = requests.get(music_info.get("src"))
audio_path = os.path.join(MUSIC_PATH, f"{filename}.mp3")
with open(audio_path, "wb") as f:
f.write(audio.content)

cover = requests.get("http:" + music_info.get("trackCoverPath"))
print(cover, "cover")
cover_path = os.path.join(COVER_PATH, f"{filename}.jpg")
with open(cover_path, "wb") as f:
f.write(cover.content)

music["cover"] = f"{filename}.jpg"
music["music"] = f"{filename}.mp3"
music["title"] = music_info.get("trackName")

content_list.append(music)

time.sleep(0.2)
# MONGODB.content.insert_one(music)

MONGODB.content.insert_many(content_list)

posted @ 2019-05-21 16:28  孔辉  阅读(368)  评论(0)    收藏  举报