异步爬取B站热门视频

目录

    这里就不讲思路了,有点累了,直接上代码吧!代码写的有点乱,勿喷哈哈哈!

    #@author: 袁小黑
    #@date: 2025/03/15
    
    import requests
    import re
    from tqdm import tqdm
    import time
    import json
    import os 
    import brotli
    import asyncio
    import aiofiles
    import aiohttp
    def test_url(url,headers,params,title,bvid):
        while True:
            response_json1 = requests.get(url,headers=headers,params=params)
            base_url=response_json1.json()['data']['dash']['video'][0]['base_url']
            if base_url[len(base_url)-3:]=='000':
                print(f"《{title}》url链接下载失败")
                
                break
                    
            else:
                global count
                count+=1
                with open(f'./video/audio.txt','a',encoding='utf-8') as f:
                    f.write(response_json1.json()['data']['dash']['audio'][0]['base_url'])
                    f.write('\t')
                    f.write(bvid)
                    f.write('\t')
                    f.write(title)
                    f.write('\n')
                with open(f'./video/video.txt','a',encoding='utf-8') as f:
                    f.write(base_url)
                    f.write('\t')
                    f.write(bvid)
                    f.write('\t')
                    f.write(title)
                    f.write('\n')
                break
        return response_json1
    def get_hot_video_info(page_num):
        headers={
            'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Mobile Safari/537.36 Edg/134.0.0.0',
            'cookie':"buvid3=80381787-8DC6-341D-6383-2DAD0B65BE7F35778infoc; b_nut=1719229735; _uuid=EE52E93F-9A25-E1045-F668-2102C10B8F37D1037348infoc; buvid4=97AB953B-04D3-0D7C-3A5E-9B6F173E991238025-024062411-noq5OiLDsz9QRIZhiCc3Kg%3D%3D; rpdid=|(u)l|)Jlmk|0J'u~umkkm~J); buvid_fp_plain=undefined; DedeUserID=238639629; DedeUserID__ckMd5=46524913552e1129; header_theme_version=CLOSE; enable_web_push=DISABLE; hit-dyn-v2=1; fingerprint=f89a2bab30cb9a814fa72b8a72ab2f40; buvid_fp=f89a2bab30cb9a814fa72b8a72ab2f40; LIVE_BUVID=AUTO7317305698596358; PVID=1; CURRENT_QUALITY=80; is-2022-channel=1; enable_feed_channel=ENABLE; SESSDATA=c3be9ff5%2C1757344550%2C2b78e%2A31CjCIzD92JeD-fMkXHFP4MYVNFr_9RorajqpFZPNgxx3uSpGi2DfvB9GqPseHCoGt-CoSVjhvYy04MzFtc0FLNzFmRVNHZEhjb2xadDJEbEl1bmppcHcxMHdhY1FLTE1PRXBpMkt0V25ZMjNJdmlCWWJONDhrd3Fubk1mVEJBdTcxQ1gxdVF5UE1nIIEC; bili_jct=35578dbfa11598ddf34bd8b03657a5af; bsource=search_bing; sid=4vrj11co; home_feed_column=4; browser_resolution=1283-690; bp_t_offset_238639629=1043827256524275712; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NDIxODQ1NDAsImlhdCI6MTc0MTkyNTI4MCwicGx0IjotMX0.iXQwPHpk6MH30RWu-aQzqth7WlH_GqQDYZg_PQ8ugiU; bili_ticket_expires=1742184480; CURRENT_FNVAL=4048; b_lsid=27A53D3A_195932B70A0",
            'referer':'https://www.bilibili.com/v/popular/all/?spm_id_from=333.1007.0.0',
        }
        url='https://api.bilibili.com/x/web-interface/popular'
        params={
            'ps': "20",#每页数量
            'pn':str(page_num),#页码
            'web_location': "333.934",#随机数
            # 'w_rid': '0a63b6261a7df48b640b84158d4ef0c2',#不需要了
            # 'wts': 1741914680#时间戳,不需要了
        }
        response_json1 = requests.get(url,headers=headers,params=params)
        # video_url=response_json1.json()['data']['list'][6]['short_link_v2']
        return response_json1.json()   #返回一个带有视频信息的字典
    
    def download_video_url(avid,bvid,cid,title):
        headers={
            'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Mobile Safari/537.36 Edg/134.0.0.0',
            'cookie':"buvid3=80381787-8DC6-341D-6383-2DAD0B65BE7F35778infoc; b_nut=1719229735; _uuid=EE52E93F-9A25-E1045-F668-2102C10B8F37D1037348infoc; buvid4=97AB953B-04D3-0D7C-3A5E-9B6F173E991238025-024062411-noq5OiLDsz9QRIZhiCc3Kg%3D%3D; rpdid=|(u)l|)Jlmk|0J'u~umkkm~J); buvid_fp_plain=undefined; DedeUserID=238639629; DedeUserID__ckMd5=46524913552e1129; header_theme_version=CLOSE; enable_web_push=DISABLE; hit-dyn-v2=1; fingerprint=f89a2bab30cb9a814fa72b8a72ab2f40; buvid_fp=f89a2bab30cb9a814fa72b8a72ab2f40; LIVE_BUVID=AUTO7317305698596358; PVID=1; CURRENT_QUALITY=80; is-2022-channel=1; enable_feed_channel=ENABLE; SESSDATA=c3be9ff5%2C1757344550%2C2b78e%2A31CjCIzD92JeD-fMkXHFP4MYVNFr_9RorajqpFZPNgxx3uSpGi2DfvB9GqPseHCoGt-CoSVjhvYy04MzFtc0FLNzFmRVNHZEhjb2xadDJEbEl1bmppcHcxMHdhY1FLTE1PRXBpMkt0V25ZMjNJdmlCWWJONDhrd3Fubk1mVEJBdTcxQ1gxdVF5UE1nIIEC; bili_jct=35578dbfa11598ddf34bd8b03657a5af; bsource=search_bing; sid=4vrj11co; home_feed_column=4; browser_resolution=1283-690; bp_t_offset_238639629=1043827256524275712; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NDIxODQ1NDAsImlhdCI6MTc0MTkyNTI4MCwicGx0IjotMX0.iXQwPHpk6MH30RWu-aQzqth7WlH_GqQDYZg_PQ8ugiU; bili_ticket_expires=1742184480; CURRENT_FNVAL=4048; b_lsid=27A53D3A_195932B70A0",
            'referer':f'https://www.bilibili.com/video/{bvid}/',#防盗链
            # 'accept-encoding':'gzip, deflate'
        }
        params={
                'avid': str(avid),  
                'bvid': str(bvid),
                'cid': str(cid),
                'qn': '80',
                'fnver': '0',
                'fnval': '4048',
                'fourk': '1',
                'gaia_source':'',
                'from_client': 'BROWSER',
                'is_main_page': 'true',
                'need_fragment': 'false',
                'isGaiaAvoided': 'false',
                # 'session': 'd385c9e795dff1828e0c043bcb916665',
                'voice_balance': '1',
                'web_location': '1315873',
                # 'dm_img_list': '[{"x":2403,"y":1939,"z":0,"timestamp":2512,"k":64,"type":0},{"x":2367,"y":1848,"z":14,"timestamp":2634,"k":65,"type":0},{"x":2306,"y":1633,"z":1,"timestamp":2803,"k":113,"type":0},{"x":2549,"y":1862,"z":240,"timestamp":2983,"k":75,"type":0}]',
                # 'dm_img_str': 'V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ',
                # 'dm_cover_img_str': 'QU5HTEUgKEFNRCwgQU1EIFJhZGVvbihUTSkgR3JhcGhpY3MgKDB4MDAwMDE2ODEpIERpcmVjdDNEMTEgdnNfNV8wIHBzXzVfMCwgRDNEMTEpR29vZ2xlIEluYy4gKEFNRC',
                # 'dm_img_inter': '{"ds":[{"t":0,"c":"","p":[486,40,692],"s":[263,3014,1694]}],"wh":[5683,2846,75],"of":[12,24,12]}',
                # 'w_rid': '3fd6e53c11e30ddc61a224b610b66e1b',
                # 'wts': '1741917402',
    }
        url='https://api.bilibili.com/x/player/wbi/playurl'
        # response_json = requests.get(url,headers=headers,params=params)
        # title=title
        test_url(url=url,headers=headers,params=params,title=title,bvid=bvid)
    
    async def main():
        with open('./video/audio.txt',mode='r',encoding='utf-8') as f:
            audio_url_list=f.readlines()
        with open('./video/video.txt',mode='r',encoding='utf-8') as f:
            video_url_list=f.readlines()
        
        download_video_task_list1=[]
        download_video_task_list2=[]
        for i in audio_url_list:
            
            list1=[]
            list1.append(i.strip().split('\t'))
            for url,bvid,title in list1:
                download_video_task_list1.append(asyncio.create_task(download_audio(url,bvid,title)))
        for i in video_url_list:
            list1=[]
            list1.append(i.strip().split('\t'))
            for url,bvid,title in list1:
                download_video_task_list2.append(asyncio.create_task(download_video(url,bvid,title)))
            
        await asyncio.gather(*download_video_task_list1)
        await asyncio.gather(*download_video_task_list2)
    
    async def download_video(url,bvid,title):#下载视频
        headers={
            'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Mobile Safari/537.36 Edg/134.0.0.0',
            'cookie':"buvid3=80381787-8DC6-341D-6383-2DAD0B65BE7F35778infoc; b_nut=1719229735; _uuid=EE52E93F-9A25-E1045-F668-2102C10B8F37D1037348infoc; buvid4=97AB953B-04D3-0D7C-3A5E-9B6F173E991238025-024062411-noq5OiLDsz9QRIZhiCc3Kg%3D%3D; rpdid=|(u)l|)Jlmk|0J'u~umkkm~J); buvid_fp_plain=undefined; DedeUserID=238639629; DedeUserID__ckMd5=46524913552e1129; header_theme_version=CLOSE; enable_web_push=DISABLE; hit-dyn-v2=1; fingerprint=f89a2bab30cb9a814fa72b8a72ab2f40; buvid_fp=f89a2bab30cb9a814fa72b8a72ab2f40; LIVE_BUVID=AUTO7317305698596358; PVID=1; CURRENT_QUALITY=80; is-2022-channel=1; enable_feed_channel=ENABLE; SESSDATA=c3be9ff5%2C1757344550%2C2b78e%2A31CjCIzD92JeD-fMkXHFP4MYVNFr_9RorajqpFZPNgxx3uSpGi2DfvB9GqPseHCoGt-CoSVjhvYy04MzFtc0FLNzFmRVNHZEhjb2xadDJEbEl1bmppcHcxMHdhY1FLTE1PRXBpMkt0V25ZMjNJdmlCWWJONDhrd3Fubk1mVEJBdTcxQ1gxdVF5UE1nIIEC; bili_jct=35578dbfa11598ddf34bd8b03657a5af; bsource=search_bing; sid=4vrj11co; home_feed_column=4; browser_resolution=1283-690; bp_t_offset_238639629=1043827256524275712; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NDIxODQ1NDAsImlhdCI6MTc0MTkyNTI4MCwicGx0IjotMX0.iXQwPHpk6MH30RWu-aQzqth7WlH_GqQDYZg_PQ8ugiU; bili_ticket_expires=1742184480; CURRENT_FNVAL=4048; b_lsid=27A53D3A_195932B70A0",
            'referer':f'https://www.bilibili.com/video/{bvid}/',#防盗链
            # 'accept-encoding':'gzip, deflate'
        }
        async with aiohttp.ClientSession() as session:
            async with session.get(url,headers=headers) as response:
                video_m4s=await response.read()
            
                if not os.path.exists(f'./video/{title}'):
                    os.mkdir(f'./video/{title}')
                async with aiofiles.open(f'./video/{title}/{title}.mp4',mode='wb') as f:
                    await f.write(video_m4s)
    async def download_audio(url,bvid,title):#下载音频
        headers={
            'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Mobile Safari/537.36 Edg/134.0.0.0',
            'cookie':"buvid3=80381787-8DC6-341D-6383-2DAD0B65BE7F35778infoc; b_nut=1719229735; _uuid=EE52E93F-9A25-E1045-F668-2102C10B8F37D1037348infoc; buvid4=97AB953B-04D3-0D7C-3A5E-9B6F173E991238025-024062411-noq5OiLDsz9QRIZhiCc3Kg%3D%3D; rpdid=|(u)l|)Jlmk|0J'u~umkkm~J); buvid_fp_plain=undefined; DedeUserID=238639629; DedeUserID__ckMd5=46524913552e1129; header_theme_version=CLOSE; enable_web_push=DISABLE; hit-dyn-v2=1; fingerprint=f89a2bab30cb9a814fa72b8a72ab2f40; buvid_fp=f89a2bab30cb9a814fa72b8a72ab2f40; LIVE_BUVID=AUTO7317305698596358; PVID=1; CURRENT_QUALITY=80; is-2022-channel=1; enable_feed_channel=ENABLE; SESSDATA=c3be9ff5%2C1757344550%2C2b78e%2A31CjCIzD92JeD-fMkXHFP4MYVNFr_9RorajqpFZPNgxx3uSpGi2DfvB9GqPseHCoGt-CoSVjhvYy04MzFtc0FLNzFmRVNHZEhjb2xadDJEbEl1bmppcHcxMHdhY1FLTE1PRXBpMkt0V25ZMjNJdmlCWWJONDhrd3Fubk1mVEJBdTcxQ1gxdVF5UE1nIIEC; bili_jct=35578dbfa11598ddf34bd8b03657a5af; bsource=search_bing; sid=4vrj11co; home_feed_column=4; browser_resolution=1283-690; bp_t_offset_238639629=1043827256524275712; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NDIxODQ1NDAsImlhdCI6MTc0MTkyNTI4MCwicGx0IjotMX0.iXQwPHpk6MH30RWu-aQzqth7WlH_GqQDYZg_PQ8ugiU; bili_ticket_expires=1742184480; CURRENT_FNVAL=4048; b_lsid=27A53D3A_195932B70A0",
            'referer':f'https://www.bilibili.com/video/{bvid}/',#防盗链
            # 'accept-encoding':'gzip, deflate'
        }
        async with aiohttp.ClientSession() as session:
            async with session.get(url,headers=headers) as response:
                audio_m4s=await response.read()
            
                if not os.path.exists(f'./video/{title}'):
                    os.mkdir(f'./video/{title}')
                async with aiofiles.open(f'./video/{title}/{title}.mp3',mode='wb') as f:
                    await f.write(audio_m4s)
                
    
    if __name__ == '__main__':
        if not os.path.exists('./video'):
            os.mkdir('./video')
        page_num=input("请输入爬取B站热门视频的页数,一页大概20个视频:")
        global count
        count=0
        for i in range(int(page_num)):
            
            video_info_dict=get_hot_video_info(i+1)#video_info_dict为字典,里面带有至少20个视频的信息,用len(video_info_dict['data']['list'])可以查看
           
            for i in range(len(video_info_dict['data']['list'])):
                # referer=video_info_dict['data']['list'][i]['short_link_v2']
                avid=video_info_dict['data']['list'][i]['aid']
                bvid=video_info_dict['data']['list'][i]['bvid']
                cid=video_info_dict['data']['list'][i]['cid']
                title=re.sub(r'[^\w\u4e00-\u9fff]|\s', '',video_info_dict['data']['list'][i]['title'])#文件名称
                
                print(f"正在下载第{count+1}个视频,标题为《{title}》")
                download_video_url(avid,bvid,cid,title)
                
        print(f"一共成功下载了{count}个视频的链接")
    
        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
        asyncio.run(main())
    
    
    posted @ 2025-03-20 15:07  CodeCraftsMan  阅读(76)  评论(0)    收藏  举报