1 import json
2 import requests
3 import re
4
5
6 class Bilibili(object):
7 def __init__(self, goal_url):
8 # 目标url
9 self.url = goal_url
10 # 获取页面请求头
11 self.getHTMLHeaders = {
12 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
13 "Chrome/69.0.3497.100 Safari/537 "
14 }
15 # 下载请求头
16 self.downloadHeaders = {
17 'Origin': 'https://www.bilibili.com',
18 'Referer': 'https://www.bilibili.com/video/av26522634',
19 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
20 'Chrome/69.0.3497.100 Safari/537.36',
21 }
22
23 # 获取页面
24 def get_source(self):
25 try:
26 response = requests.get(self.url, self.getHTMLHeaders)
27 if response.status_code == 200:
28 return response.content.decode("utf-8")
29 else:
30 print("获取页面失败...")
31 return None
32 except requests.RequestException as e:
33 print(e)
34 return None
35
36 # 解析页面
37 @staticmethod
38 def parse_source(source):
39 video_name = re.search('<title data-vue-meta="true">(.*?)</title>', source).group(1)
40 video_name = re.sub('[!。%¥#@*,、‘;’,./;【】`~ ]', '-', video_name)
41 # 只有一种画质时可偷懒
42 # video_url = re.search('"backup_url":\["(.*?)"]', source).group(1)
43 # video_url = re.search('"id"\:80\,"baseUrl":"(.*?)"', source).group(1)
44 # video_url = re.search('"backupUrl":\["(.*?)"\]', source).group(1)
45 # video_url = re.search('"base_url":"(.*?)"', source).group(1)
46 video_url = ''
47 pattern = r'\<script\>window\.__playinfo__=(.*?)\</script\>'
48 result = re.findall(pattern, source)[0]
49 temp = json.loads(result)
50 for item in temp['data']['dash']['video']:
51 video_url = item['baseUrl']
52 return{
53 "video_name": video_name,
54 "video_url": video_url
55 }
56
57 # 下载并保存video
58 def save_video(self, video):
59 video_url = video["video_url"]
60 # stream参数设置立即下载响应内容
61 response = requests.get(video_url, headers=self.downloadHeaders, stream=True, verify=False)
62 video_name = video["video_name"] + '.flv'
63 with open(video_name, 'wb')as f:
64 f.write(response.content)
65
66 # 统筹调用
67 def run(self):
68 print("正在获取页面...\n")
69 source = self.get_source()
70 print("正在解析title和url...\n")
71 video = self.parse_source(source)
72 self.save_video(video)
73 print("下载完成!")
74
75
76 # 目标url
77 url = "https://www.bilibili.com/video/av70414801"
78 # 创建对象
79 download_video = Bilibili(url)
80 # 调用
81 download_video.run()
import json
import requests
import re
class Bilibili(object):
def __init__(self, goal_url):
# 目标url
self.url = goal_url
# 获取页面请求头
self.getHTMLHeaders = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/69.0.3497.100 Safari/537 "
}
# 下载请求头
self.downloadHeaders = {
'Origin': 'https://www.bilibili.com',
'Referer': 'https://www.bilibili.com/video/av26522634',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/69.0.3497.100 Safari/537.36',
}
# 获取页面
def get_source(self):
try:
response = requests.get(self.url, self.getHTMLHeaders)
if response.status_code == 200:
return response.content.decode("utf-8")
else:
print("获取页面失败...")
return None
except requests.RequestException as e:
print(e)
return None
# 解析页面
@staticmethod
def parse_source(source):
video_name = re.search('<title data-vue-meta="true">(.*?)</title>', source).group(1)
video_name = re.sub('[!。%¥#@*,、‘;’,./;【】`~ ]', '-', video_name)
# 只有一种画质时可偷懒
# video_url = re.search('"backup_url":\["(.*?)"]', source).group(1)
# video_url = re.search('"id"\:80\,"baseUrl":"(.*?)"', source).group(1)
# video_url = re.search('"backupUrl":\["(.*?)"\]', source).group(1)
# video_url = re.search('"base_url":"(.*?)"', source).group(1)
video_url = ''
pattern = r'\<script\>window\.__playinfo__=(.*?)\</script\>'
result = re.findall(pattern, source)[0]
temp = json.loads(result)
for item in temp['data']['dash']['video']:
video_url = item['baseUrl']
return{
"video_name": video_name,
"video_url": video_url
}
# 下载并保存video
def save_video(self, video):
video_url = video["video_url"]
# stream参数设置立即下载响应内容
response = requests.get(video_url, headers=self.downloadHeaders, stream=True, verify=False)
video_name = video["video_name"] + '.flv'
with open(video_name, 'wb')as f:
f.write(response.content)
# 统筹调用
def run(self):
print("正在获取页面...\n")
source = self.get_source()
print("正在解析title和url...\n")
video = self.parse_source(source)
self.save_video(video)
print("下载完成!")
# 目标url
url = "https://www.bilibili.com/video/av70414801"
# 创建对象
download_video = Bilibili(url)
# 调用
download_video.run()