爬取B站视频 - m4s与flv文件的那些事
问题描述
用Python爬取B站视频,简单易操作最好。
推荐阅读
链接如下:
一言以蔽之,若想爬B站视频,文章值得参阅。
解决方案
import requests import os, sys class BilibiliCrawler(): def __init__(self, qn=80, output=''): # 初始化 if output: path = os.getcwd()+'\\' path += output if not os.path.exists(path): os.mkdir(path) output = path + '\\' self.qn = qn self.output = output self.cid_url = 'https://api.bilibili.com/x/player/pagelist?aid={}&jsonp=jsonp' self.flv_url = 'https://api.bilibili.com/x/player/playurl?avid={}&cid={}&qn={}&type=&otype=json' self.headers1 = {'host': 'api.bilibili.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0'} self.headers2 = {'host':'', 'Origin': 'https://www.bilibili.com', 'Referer': 'https://www.bilibili.com/video/ac{}', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0'} def getCid(self, url): # 得到 cid data = requests.get(url, headers=self.headers1).json() detail = data['data'][0] cid = detail['cid'] name = detail['part'] duration = detail['duration'] return cid, name, duration def getFlv(self, url): # 得到 flv data = requests.get(url, headers=self.headers1).json() durl = data['data']['durl'][0] size = durl['size'] url = durl['url'] length = durl['length'] return length, size, url def download(self, url, filename='None.flv'): # 下载 size = 0 response = requests.get(url, headers=self.headers2, stream=True, verify=False) chunk_size = 1024 content_size = int(response.headers['content-length']) if response.status_code == 200: sys.stdout.write(' [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024)) filename = os.path.join(self.output, filename) with open(filename, 'wb') as file: for data in response.iter_content(chunk_size = chunk_size): file.write(data) size += len(data) file.flush() sys.stdout.write(' [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r') if size / content_size == 1: print('\n') else: print('下载出错') def start(self, av): # 开始 cid, name ,duration = self.getCid(self.cid_url.format(av)) length, size, flv = self.getFlv(self.flv_url.format(av, cid, self.qn)) host = flv.split('/')[2] self.headers2['host'] = host filename = name.replace(' ', '_') + '.flv' print("name: {0} duration:{1}s".format(filename, duration)) self.download(flv, filename) if __name__ == '__main__': bilibili = BilibiliCrawler(qn=80, output="download") avlist = ['66476652', '66551946'] for i in avlist: bilibili.start(av=i)