B站视频爬取
代码
import requests
import os
import re
import json
import subprocess
url = 'https://www.bilibili.com/video/BV1uv411578j'
headers = {
'referer': 'https://www.bilibili.com/video/BV1uv411578j',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}
response = requests.get(url=url, headers=headers).text
rule = re.compile(r"<script>window.__playinfo__=(.*?)</script>", re.S)
info = json.loads(re.findall(rule, response)[0])
video_url = info["data"]["dash"]["video"][0]["baseUrl"]
audio_url = info["data"]["dash"]["audio"][0]["baseUrl"]
with open('./data/video.mp4', 'wb') as f:
f.write(requests.get(url=video_url, headers=headers).content)
with open('./data/audio.mp3', 'wb') as f:
f.write(requests.get(url=audio_url, headers=headers).content)
cmd = [r"Y:\ffmpeg-4.4-essentials_build\bin\ffmpeg.exe", "-i", "./data/video.mp4", "-i", "./data/audio.mp3", "-c",
"copy", "./data/play.mp4"]
subprocess.run(cmd, shell=True)
if os.path.exists("./data/video.mp4"):
os.remove("./data/video.mp4")
if os.path.exists("./data/audio.mp3"):
os.remove("./data/audio.mp3")
print("音视频下载成功")
