异步爬取B站热门视频
目录
这里就不讲思路了,有点累了,直接上代码吧!代码写的有点乱,勿喷哈哈哈!
#@author: 袁小黑
#@date: 2025/03/15
import requests
import re
from tqdm import tqdm
import time
import json
import os
import brotli
import asyncio
import aiofiles
import aiohttp
def test_url(url,headers,params,title,bvid):
while True:
response_json1 = requests.get(url,headers=headers,params=params)
base_url=response_json1.json()['data']['dash']['video'][0]['base_url']
if base_url[len(base_url)-3:]=='000':
print(f"《{title}》url链接下载失败")
break
else:
global count
count+=1
with open(f'./video/audio.txt','a',encoding='utf-8') as f:
f.write(response_json1.json()['data']['dash']['audio'][0]['base_url'])
f.write('\t')
f.write(bvid)
f.write('\t')
f.write(title)
f.write('\n')
with open(f'./video/video.txt','a',encoding='utf-8') as f:
f.write(base_url)
f.write('\t')
f.write(bvid)
f.write('\t')
f.write(title)
f.write('\n')
break
return response_json1
def get_hot_video_info(page_num):
headers={
'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Mobile Safari/537.36 Edg/134.0.0.0',
'cookie':"buvid3=80381787-8DC6-341D-6383-2DAD0B65BE7F35778infoc; b_nut=1719229735; _uuid=EE52E93F-9A25-E1045-F668-2102C10B8F37D1037348infoc; buvid4=97AB953B-04D3-0D7C-3A5E-9B6F173E991238025-024062411-noq5OiLDsz9QRIZhiCc3Kg%3D%3D; rpdid=|(u)l|)Jlmk|0J'u~umkkm~J); buvid_fp_plain=undefined; DedeUserID=238639629; DedeUserID__ckMd5=46524913552e1129; header_theme_version=CLOSE; enable_web_push=DISABLE; hit-dyn-v2=1; fingerprint=f89a2bab30cb9a814fa72b8a72ab2f40; buvid_fp=f89a2bab30cb9a814fa72b8a72ab2f40; LIVE_BUVID=AUTO7317305698596358; PVID=1; CURRENT_QUALITY=80; is-2022-channel=1; enable_feed_channel=ENABLE; SESSDATA=c3be9ff5%2C1757344550%2C2b78e%2A31CjCIzD92JeD-fMkXHFP4MYVNFr_9RorajqpFZPNgxx3uSpGi2DfvB9GqPseHCoGt-CoSVjhvYy04MzFtc0FLNzFmRVNHZEhjb2xadDJEbEl1bmppcHcxMHdhY1FLTE1PRXBpMkt0V25ZMjNJdmlCWWJONDhrd3Fubk1mVEJBdTcxQ1gxdVF5UE1nIIEC; bili_jct=35578dbfa11598ddf34bd8b03657a5af; bsource=search_bing; sid=4vrj11co; home_feed_column=4; browser_resolution=1283-690; bp_t_offset_238639629=1043827256524275712; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NDIxODQ1NDAsImlhdCI6MTc0MTkyNTI4MCwicGx0IjotMX0.iXQwPHpk6MH30RWu-aQzqth7WlH_GqQDYZg_PQ8ugiU; bili_ticket_expires=1742184480; CURRENT_FNVAL=4048; b_lsid=27A53D3A_195932B70A0",
'referer':'https://www.bilibili.com/v/popular/all/?spm_id_from=333.1007.0.0',
}
url='https://api.bilibili.com/x/web-interface/popular'
params={
'ps': "20",#每页数量
'pn':str(page_num),#页码
'web_location': "333.934",#随机数
# 'w_rid': '0a63b6261a7df48b640b84158d4ef0c2',#不需要了
# 'wts': 1741914680#时间戳,不需要了
}
response_json1 = requests.get(url,headers=headers,params=params)
# video_url=response_json1.json()['data']['list'][6]['short_link_v2']
return response_json1.json() #返回一个带有视频信息的字典
def download_video_url(avid,bvid,cid,title):
headers={
'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Mobile Safari/537.36 Edg/134.0.0.0',
'cookie':"buvid3=80381787-8DC6-341D-6383-2DAD0B65BE7F35778infoc; b_nut=1719229735; _uuid=EE52E93F-9A25-E1045-F668-2102C10B8F37D1037348infoc; buvid4=97AB953B-04D3-0D7C-3A5E-9B6F173E991238025-024062411-noq5OiLDsz9QRIZhiCc3Kg%3D%3D; rpdid=|(u)l|)Jlmk|0J'u~umkkm~J); buvid_fp_plain=undefined; DedeUserID=238639629; DedeUserID__ckMd5=46524913552e1129; header_theme_version=CLOSE; enable_web_push=DISABLE; hit-dyn-v2=1; fingerprint=f89a2bab30cb9a814fa72b8a72ab2f40; buvid_fp=f89a2bab30cb9a814fa72b8a72ab2f40; LIVE_BUVID=AUTO7317305698596358; PVID=1; CURRENT_QUALITY=80; is-2022-channel=1; enable_feed_channel=ENABLE; SESSDATA=c3be9ff5%2C1757344550%2C2b78e%2A31CjCIzD92JeD-fMkXHFP4MYVNFr_9RorajqpFZPNgxx3uSpGi2DfvB9GqPseHCoGt-CoSVjhvYy04MzFtc0FLNzFmRVNHZEhjb2xadDJEbEl1bmppcHcxMHdhY1FLTE1PRXBpMkt0V25ZMjNJdmlCWWJONDhrd3Fubk1mVEJBdTcxQ1gxdVF5UE1nIIEC; bili_jct=35578dbfa11598ddf34bd8b03657a5af; bsource=search_bing; sid=4vrj11co; home_feed_column=4; browser_resolution=1283-690; bp_t_offset_238639629=1043827256524275712; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NDIxODQ1NDAsImlhdCI6MTc0MTkyNTI4MCwicGx0IjotMX0.iXQwPHpk6MH30RWu-aQzqth7WlH_GqQDYZg_PQ8ugiU; bili_ticket_expires=1742184480; CURRENT_FNVAL=4048; b_lsid=27A53D3A_195932B70A0",
'referer':f'https://www.bilibili.com/video/{bvid}/',#防盗链
# 'accept-encoding':'gzip, deflate'
}
params={
'avid': str(avid),
'bvid': str(bvid),
'cid': str(cid),
'qn': '80',
'fnver': '0',
'fnval': '4048',
'fourk': '1',
'gaia_source':'',
'from_client': 'BROWSER',
'is_main_page': 'true',
'need_fragment': 'false',
'isGaiaAvoided': 'false',
# 'session': 'd385c9e795dff1828e0c043bcb916665',
'voice_balance': '1',
'web_location': '1315873',
# 'dm_img_list': '[{"x":2403,"y":1939,"z":0,"timestamp":2512,"k":64,"type":0},{"x":2367,"y":1848,"z":14,"timestamp":2634,"k":65,"type":0},{"x":2306,"y":1633,"z":1,"timestamp":2803,"k":113,"type":0},{"x":2549,"y":1862,"z":240,"timestamp":2983,"k":75,"type":0}]',
# 'dm_img_str': 'V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ',
# 'dm_cover_img_str': 'QU5HTEUgKEFNRCwgQU1EIFJhZGVvbihUTSkgR3JhcGhpY3MgKDB4MDAwMDE2ODEpIERpcmVjdDNEMTEgdnNfNV8wIHBzXzVfMCwgRDNEMTEpR29vZ2xlIEluYy4gKEFNRC',
# 'dm_img_inter': '{"ds":[{"t":0,"c":"","p":[486,40,692],"s":[263,3014,1694]}],"wh":[5683,2846,75],"of":[12,24,12]}',
# 'w_rid': '3fd6e53c11e30ddc61a224b610b66e1b',
# 'wts': '1741917402',
}
url='https://api.bilibili.com/x/player/wbi/playurl'
# response_json = requests.get(url,headers=headers,params=params)
# title=title
test_url(url=url,headers=headers,params=params,title=title,bvid=bvid)
async def main():
with open('./video/audio.txt',mode='r',encoding='utf-8') as f:
audio_url_list=f.readlines()
with open('./video/video.txt',mode='r',encoding='utf-8') as f:
video_url_list=f.readlines()
download_video_task_list1=[]
download_video_task_list2=[]
for i in audio_url_list:
list1=[]
list1.append(i.strip().split('\t'))
for url,bvid,title in list1:
download_video_task_list1.append(asyncio.create_task(download_audio(url,bvid,title)))
for i in video_url_list:
list1=[]
list1.append(i.strip().split('\t'))
for url,bvid,title in list1:
download_video_task_list2.append(asyncio.create_task(download_video(url,bvid,title)))
await asyncio.gather(*download_video_task_list1)
await asyncio.gather(*download_video_task_list2)
async def download_video(url,bvid,title):#下载视频
headers={
'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Mobile Safari/537.36 Edg/134.0.0.0',
'cookie':"buvid3=80381787-8DC6-341D-6383-2DAD0B65BE7F35778infoc; b_nut=1719229735; _uuid=EE52E93F-9A25-E1045-F668-2102C10B8F37D1037348infoc; buvid4=97AB953B-04D3-0D7C-3A5E-9B6F173E991238025-024062411-noq5OiLDsz9QRIZhiCc3Kg%3D%3D; rpdid=|(u)l|)Jlmk|0J'u~umkkm~J); buvid_fp_plain=undefined; DedeUserID=238639629; DedeUserID__ckMd5=46524913552e1129; header_theme_version=CLOSE; enable_web_push=DISABLE; hit-dyn-v2=1; fingerprint=f89a2bab30cb9a814fa72b8a72ab2f40; buvid_fp=f89a2bab30cb9a814fa72b8a72ab2f40; LIVE_BUVID=AUTO7317305698596358; PVID=1; CURRENT_QUALITY=80; is-2022-channel=1; enable_feed_channel=ENABLE; SESSDATA=c3be9ff5%2C1757344550%2C2b78e%2A31CjCIzD92JeD-fMkXHFP4MYVNFr_9RorajqpFZPNgxx3uSpGi2DfvB9GqPseHCoGt-CoSVjhvYy04MzFtc0FLNzFmRVNHZEhjb2xadDJEbEl1bmppcHcxMHdhY1FLTE1PRXBpMkt0V25ZMjNJdmlCWWJONDhrd3Fubk1mVEJBdTcxQ1gxdVF5UE1nIIEC; bili_jct=35578dbfa11598ddf34bd8b03657a5af; bsource=search_bing; sid=4vrj11co; home_feed_column=4; browser_resolution=1283-690; bp_t_offset_238639629=1043827256524275712; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NDIxODQ1NDAsImlhdCI6MTc0MTkyNTI4MCwicGx0IjotMX0.iXQwPHpk6MH30RWu-aQzqth7WlH_GqQDYZg_PQ8ugiU; bili_ticket_expires=1742184480; CURRENT_FNVAL=4048; b_lsid=27A53D3A_195932B70A0",
'referer':f'https://www.bilibili.com/video/{bvid}/',#防盗链
# 'accept-encoding':'gzip, deflate'
}
async with aiohttp.ClientSession() as session:
async with session.get(url,headers=headers) as response:
video_m4s=await response.read()
if not os.path.exists(f'./video/{title}'):
os.mkdir(f'./video/{title}')
async with aiofiles.open(f'./video/{title}/{title}.mp4',mode='wb') as f:
await f.write(video_m4s)
async def download_audio(url,bvid,title):#下载音频
headers={
'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Mobile Safari/537.36 Edg/134.0.0.0',
'cookie':"buvid3=80381787-8DC6-341D-6383-2DAD0B65BE7F35778infoc; b_nut=1719229735; _uuid=EE52E93F-9A25-E1045-F668-2102C10B8F37D1037348infoc; buvid4=97AB953B-04D3-0D7C-3A5E-9B6F173E991238025-024062411-noq5OiLDsz9QRIZhiCc3Kg%3D%3D; rpdid=|(u)l|)Jlmk|0J'u~umkkm~J); buvid_fp_plain=undefined; DedeUserID=238639629; DedeUserID__ckMd5=46524913552e1129; header_theme_version=CLOSE; enable_web_push=DISABLE; hit-dyn-v2=1; fingerprint=f89a2bab30cb9a814fa72b8a72ab2f40; buvid_fp=f89a2bab30cb9a814fa72b8a72ab2f40; LIVE_BUVID=AUTO7317305698596358; PVID=1; CURRENT_QUALITY=80; is-2022-channel=1; enable_feed_channel=ENABLE; SESSDATA=c3be9ff5%2C1757344550%2C2b78e%2A31CjCIzD92JeD-fMkXHFP4MYVNFr_9RorajqpFZPNgxx3uSpGi2DfvB9GqPseHCoGt-CoSVjhvYy04MzFtc0FLNzFmRVNHZEhjb2xadDJEbEl1bmppcHcxMHdhY1FLTE1PRXBpMkt0V25ZMjNJdmlCWWJONDhrd3Fubk1mVEJBdTcxQ1gxdVF5UE1nIIEC; bili_jct=35578dbfa11598ddf34bd8b03657a5af; bsource=search_bing; sid=4vrj11co; home_feed_column=4; browser_resolution=1283-690; bp_t_offset_238639629=1043827256524275712; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NDIxODQ1NDAsImlhdCI6MTc0MTkyNTI4MCwicGx0IjotMX0.iXQwPHpk6MH30RWu-aQzqth7WlH_GqQDYZg_PQ8ugiU; bili_ticket_expires=1742184480; CURRENT_FNVAL=4048; b_lsid=27A53D3A_195932B70A0",
'referer':f'https://www.bilibili.com/video/{bvid}/',#防盗链
# 'accept-encoding':'gzip, deflate'
}
async with aiohttp.ClientSession() as session:
async with session.get(url,headers=headers) as response:
audio_m4s=await response.read()
if not os.path.exists(f'./video/{title}'):
os.mkdir(f'./video/{title}')
async with aiofiles.open(f'./video/{title}/{title}.mp3',mode='wb') as f:
await f.write(audio_m4s)
if __name__ == '__main__':
if not os.path.exists('./video'):
os.mkdir('./video')
page_num=input("请输入爬取B站热门视频的页数,一页大概20个视频:")
global count
count=0
for i in range(int(page_num)):
video_info_dict=get_hot_video_info(i+1)#video_info_dict为字典,里面带有至少20个视频的信息,用len(video_info_dict['data']['list'])可以查看
for i in range(len(video_info_dict['data']['list'])):
# referer=video_info_dict['data']['list'][i]['short_link_v2']
avid=video_info_dict['data']['list'][i]['aid']
bvid=video_info_dict['data']['list'][i]['bvid']
cid=video_info_dict['data']['list'][i]['cid']
title=re.sub(r'[^\w\u4e00-\u9fff]|\s', '',video_info_dict['data']['list'][i]['title'])#文件名称
print(f"正在下载第{count+1}个视频,标题为《{title}》")
download_video_url(avid,bvid,cid,title)
print(f"一共成功下载了{count}个视频的链接")
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(main())

浙公网安备 33010602011771号