爬取B站视频 - m4s与flv文件的那些事

问题描述

用Python爬取B站视频,简单易操作最好。

 

推荐阅读

链接如下:

https://blog.csdn.net/Enderman_xiaohei/article/details/100598003#%E7%88%AC%E5%8F%96%E5%8D%95%E4%B8%AA%E8%A7%86%E9%A2%91

一言以蔽之,若想爬B站视频,文章值得参阅。

 

解决方案

import requests
import os, sys

class BilibiliCrawler():
    def  __init__(self, qn=80, output=''):
        # 初始化

        if output:
            path = os.getcwd()+'\\'
            path += output
            if not os.path.exists(path):
                os.mkdir(path)
                output = path + '\\'
                 
        self.qn = qn
        self.output = output
        self.cid_url = 'https://api.bilibili.com/x/player/pagelist?aid={}&jsonp=jsonp'
        self.flv_url = 'https://api.bilibili.com/x/player/playurl?avid={}&cid={}&qn={}&type=&otype=json'
        self.headers1 = {'host': 'api.bilibili.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0'}
        self.headers2 = {'host':'',
            'Origin': 'https://www.bilibili.com',
            'Referer': 'https://www.bilibili.com/video/ac{}',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0'}

        
    def getCid(self, url):
        # 得到 cid
        data = requests.get(url, headers=self.headers1).json()
        detail = data['data'][0]
        cid = detail['cid']
        name = detail['part']
        duration = detail['duration']
        return cid, name, duration
        
        
    def getFlv(self, url):
        # 得到 flv
        data = requests.get(url, headers=self.headers1).json()
        durl = data['data']['durl'][0]
        size = durl['size']
        url = durl['url']
        length = durl['length']
        return length, size, url

    def download(self, url, filename='None.flv'):
        # 下载
        size = 0
        response = requests.get(url, headers=self.headers2, stream=True, verify=False)
        chunk_size = 1024
        content_size = int(response.headers['content-length'])
        if response.status_code == 200:
            sys.stdout.write('  [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))
            filename = os.path.join(self.output, filename)
            with open(filename, 'wb') as file:
                for data in response.iter_content(chunk_size = chunk_size):
                    file.write(data)
                    size += len(data)
                    file.flush()
                    sys.stdout.write('  [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
                    if size / content_size == 1:
                        print('\n')
        else:
            print('下载出错')

    def start(self, av):
        # 开始
        cid, name ,duration = self.getCid(self.cid_url.format(av))
        length, size, flv = self.getFlv(self.flv_url.format(av, cid, self.qn))
        host = flv.split('/')[2]
        self.headers2['host'] = host
        filename = name.replace(' ', '_') + '.flv'
        print("name: {0} duration:{1}s".format(filename, duration))
        self.download(flv, filename)


if __name__ == '__main__':
    bilibili = BilibiliCrawler(qn=80, output="download")
    avlist = ['66476652', '66551946']
    for i in avlist:
        bilibili.start(av=i)

 

posted @ 2020-07-06 22:08  TruthSeeking  阅读(623)  评论(0编辑  收藏  举报