【Python】某站音视频爬取(转载)

import pprint
import requests
import re
import json

session = requests.session()
url = '你需要爬取的音视频网址'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 Edg/106.0.1370.37',
"Referer": "https://www.bilibili.com"}
resp = session.get(url,headers=headers)
print(resp.text)

title = re.findall(r'<title>(.*?)_哔哩哔哩_bilibili',resp.text)[0]
play_info = re.findall(r'<script>window.__playinfo__=(.*?)</script>',resp.text)[0]

'''print(title)
print(play_info,type(play_info)) '''
json_data = json.loads(play_info)
pprint.pprint(json_data) #格式化输出,便于观看
audio_url = json_data['data']['dash']['audio'][0]['backupUrl'][0] #音频地址 [0]清晰度最高
video_url = json_data['data']['dash']['video'][0]['backupUrl'][0] #视频地址
audio_content = session.get(audio_url,headers=headers).content #音频二进制内容
video_content = session.get(video_url,headers=headers).content #视频二进制内容
with open(r'E:\常用文件夹\某站爬虫\【音频】'+title+'.mp3','wb') as f:
    f.write(audio_content)
with open(r'E:\常用文件夹\某站爬虫\【视频】'+title+'.mp4','wb') as f:
    f.write(video_content)

 

posted @ 2025-10-31 22:38  山鬼谣`  阅读(3)  评论(0)    收藏  举报