import hashlib
import os
import time
from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED
import requests
# pip install pycryptodome -i http://pypi.douban.com/simple --trusted-host=pypi.douban.com
from Crypto.Cipher import AES
# 拉勾通过cookies进行付费校验,必须要购买课程的
COOKIE = '' # 这里需要抓个cookie
def md5(str1):
h1 = hashlib.md5()
h1.update(str1.encode('utf-8'))
return h1.hexdigest()
class m3u8:
def __init__(self, url, cookie=COOKIE, core=16):
self.url = url
self.base_url = url[:url.rfind('/') + 1] # 如果需要拼接url,则启用 , +1 把 / 加上
self.tmp = md5(url)
self.cookie = cookie
self.executor = ThreadPoolExecutor(core)
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
'Cookie': self.cookie
}
def download_ts(self):
rs = requests.get(self.url, headers=self.headers).text
list_content = rs.split('\n')
player_list = []
if not os.path.exists(self.tmp):
os.system('mkdir ' + self.tmp)
key = ''
for index, line in enumerate(list_content):
# 判断视频是否经过AES-128加密
if "#EXT-X-KEY" in line:
if not key:
method_pos = line.find("METHOD")
comma_pos = line.find(",")
method = line[method_pos:comma_pos].split('=')[1] # 获取加密方式
print("Decode Method:", method)
uri_pos = line.find("URI")
quotation_mark_pos = line.rfind('"')
key_path = line[uri_pos:quotation_mark_pos].split('"')[1]
key_url = key_path
res = requests.get(key_url, headers=self.headers)
key = res.content # 获取加密密钥
print("key:", key)
# 以下拼接方式可能会根据自己的需求进行改动
if '#EXTINF' in line:
# href = ''
# 如果加密,直接提取每一级的.ts文件链接地址
if 'http' in list_content[index + 1]:
href = list_content[index + 1]
player_list.append(href)
# 如果没有加密,构造出url链接
elif 'ad0.ts' not in list_content[index + 1]:
href = self.base_url + list_content[index + 1]
player_list.append(href)
if len(key):
print('此视频经过加密')
print(player_list) # 打印ts地址列表
tasks = [self.executor.submit(self._download2_, tsUrl, key, i) for i, tsUrl in enumerate(player_list)]
wait(tasks, return_when=ALL_COMPLETED)
print('下载完成')
else:
print('此视频未加密')
print(player_list) # 打印ts地址列表
tasks = [self.executor.submit(self._download_, tsUrl, i) for i, tsUrl in enumerate(player_list)]
print(tasks)
wait(tasks, return_when=ALL_COMPLETED)
print('下载完成')
def _download_(self, tsUrl, index):
res = requests.get(tsUrl, headers=self.headers)
with open(self.tmp + '/' + str(index + 1) + '.ts', 'wb') as file:
file.write(res.content)
print('正在写入第{}个文件'.format(index + 1))
def _download2_(self, tsUrl, key, index):
crypto = AES.new(key, AES.MODE_CBC, key)
res = requests.get(tsUrl, headers=self.headers)
with open(self.tmp + '/' + str(index + 1) + '.ts', 'wb') as file:
file.write(crypto.decrypt(res.content)) # 将解密后的视频写入文件
print('正在写入第{}个文件'.format(index + 1))
def merge(self, file_name):
c = os.listdir(self.tmp)
with open('%s.mp4' % file_name, 'wb+') as f:
for i in range(len(c)):
x = open(self.tmp + '/' + str(i + 1) + '.ts', 'rb').read()
f.write(x)
print('合并完成')
os.system('rd /s/q ' + self.tmp) # 这里如果试Linux 把rm -tf改成rm -rf
def download(self, file_name):
self.download_ts()
self.merge(file_name)
if __name__ == '__main__':
url = 'http://1252043158.vod2.myqcloud.com/1d93b969vodtranscq1252043158/5118f4575285890800411211515/drm/v.f240.m3u8'
start = time.time() # 开始时间
d = m3u8(url)
d.download("第01讲:程序运行时,内存到底是如何进行分配的?")
print('共耗时: %s)' % (time.time() - start))
import re
# pip install requests -i http://pypi.douban.com/simple --trusted-host=pypi.douban.com
import requests
import json
from m3u8 import m3u8
import time
res = requests.session()
maps = {}
def shi(url):
print(url)
ss = res.get(url).text
r = re.findall("window.courseInfo = (.+);", ss)[0]
print(r)
b = json.loads(r)
print(b["courseSections"])
for item in b["courseSections"]:
for lesson in item["courseLessons"]:
if "videoMedia" in lesson:
maps[lesson["theme"]] = lesson["videoMedia"]["fileUrl"]
print(item["sectionName"], lesson["theme"], lesson["videoMedia"]["fileUrl"])
else:
print(item["sectionName"], lesson["theme"], "not update")
print(maps)
return maps
#
if __name__ == '__main__':
# url = 'https://kaiwu.lagou.com/course/courseInfo.htm?courseId=69#/detail/pc?id=1898'
start = time.time() # 开始时间
url = 'https://kaiwu.lagou.com/course/courseInfo.htm?courseId=67#/detail/pc?id=1585'
maps = shi(url)
print(maps)
for k, v in maps.items():
m3u8(v).download(k)
print('共耗时: %s)' % (time.time() - start))