python - m3u8下载解析

import hashlib
import os
import time
from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED
 
import requests
# pip install pycryptodome -i http://pypi.douban.com/simple --trusted-host=pypi.douban.com
from Crypto.Cipher import AES
 
# 拉勾通过cookies进行付费校验,必须要购买课程的
COOKIE = ''   # 这里需要抓个cookie
 
 
def md5(str1):
    h1 = hashlib.md5()
    h1.update(str1.encode('utf-8'))
    return h1.hexdigest()
 
class m3u8: 
    def __init__(self, url, cookie=COOKIE, core=16):
        self.url = url
        self.base_url = url[:url.rfind('/') + 1]  # 如果需要拼接url,则启用 , +1 把 / 加上
        self.tmp = md5(url)
        self.cookie = cookie
        self.executor = ThreadPoolExecutor(core)
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
            'Cookie': self.cookie
        }
 
    def download_ts(self):
 
        rs = requests.get(self.url, headers=self.headers).text
        list_content = rs.split('\n')
        player_list = []
 
        if not os.path.exists(self.tmp):
            os.system('mkdir ' + self.tmp)
        key = ''
        for index, line in enumerate(list_content):
            # 判断视频是否经过AES-128加密
            if "#EXT-X-KEY" in line:
                if not key:
                    method_pos = line.find("METHOD")
                    comma_pos = line.find(",")
                    method = line[method_pos:comma_pos].split('=')[1]  # 获取加密方式
                    print("Decode Method:", method)
                    uri_pos = line.find("URI")
                    quotation_mark_pos = line.rfind('"')
                    key_path = line[uri_pos:quotation_mark_pos].split('"')[1]
                    key_url = key_path
                    res = requests.get(key_url, headers=self.headers)
                    key = res.content  # 获取加密密钥
                    print("key:", key)
            # 以下拼接方式可能会根据自己的需求进行改动
            if '#EXTINF' in line:
                # href = ''
                # 如果加密,直接提取每一级的.ts文件链接地址
                if 'http' in list_content[index + 1]:
                    href = list_content[index + 1]
                    player_list.append(href)
                # 如果没有加密,构造出url链接
                elif 'ad0.ts' not in list_content[index + 1]:
                    href = self.base_url + list_content[index + 1]
                    player_list.append(href)
        if len(key):
            print('此视频经过加密')
            print(player_list)  # 打印ts地址列表
 
            tasks = [self.executor.submit(self._download2_, tsUrl, key, i) for i, tsUrl in enumerate(player_list)]
            wait(tasks, return_when=ALL_COMPLETED)
            print('下载完成')
 
        else:
            print('此视频未加密')
            print(player_list)  # 打印ts地址列表
            tasks = [self.executor.submit(self._download_, tsUrl, i) for i, tsUrl in enumerate(player_list)]
            print(tasks)
            wait(tasks, return_when=ALL_COMPLETED)
            print('下载完成')
 
    def _download_(self, tsUrl, index):
        res = requests.get(tsUrl, headers=self.headers)
        with open(self.tmp + '/' + str(index + 1) + '.ts', 'wb') as file:
            file.write(res.content)
            print('正在写入第{}个文件'.format(index + 1))
 
    def _download2_(self, tsUrl, key, index):
        crypto = AES.new(key, AES.MODE_CBC, key)
        res = requests.get(tsUrl, headers=self.headers)
        with open(self.tmp + '/' + str(index + 1) + '.ts', 'wb') as file:
            file.write(crypto.decrypt(res.content))  # 将解密后的视频写入文件
            print('正在写入第{}个文件'.format(index + 1))
 
    def merge(self, file_name):
        c = os.listdir(self.tmp)
        with open('%s.mp4' % file_name, 'wb+') as f:
            for i in range(len(c)):
                x = open(self.tmp + '/' + str(i + 1) + '.ts', 'rb').read()
                f.write(x)
 
        print('合并完成')
        os.system('rd /s/q ' + self.tmp)  # 这里如果试Linux 把rm -tf改成rm -rf
 
    def download(self, file_name):
        self.download_ts()
 
        self.merge(file_name)
 
 
if __name__ == '__main__':
    url = 'http://1252043158.vod2.myqcloud.com/1d93b969vodtranscq1252043158/5118f4575285890800411211515/drm/v.f240.m3u8'
    start = time.time()  # 开始时间
    d = m3u8(url)
    d.download("第01讲:程序运行时,内存到底是如何进行分配的?")
    print('共耗时: %s)' % (time.time() - start))
import re
# pip install requests -i http://pypi.douban.com/simple --trusted-host=pypi.douban.com
import requests
import json
from m3u8 import m3u8
import time

res = requests.session()
maps = {}


def shi(url):
    print(url)
    ss = res.get(url).text
    r = re.findall("window.courseInfo = (.+);", ss)[0]
    print(r)
    b = json.loads(r)
    print(b["courseSections"])
 
    for item in b["courseSections"]:
 
        for lesson in item["courseLessons"]:
            if "videoMedia" in lesson:
                maps[lesson["theme"]] = lesson["videoMedia"]["fileUrl"]
                print(item["sectionName"], lesson["theme"], lesson["videoMedia"]["fileUrl"])
            else:
                print(item["sectionName"], lesson["theme"], "not update")
 
    print(maps)
    return maps


#
if __name__ == '__main__':
    # url = 'https://kaiwu.lagou.com/course/courseInfo.htm?courseId=69#/detail/pc?id=1898'
    start = time.time()  # 开始时间
    url = 'https://kaiwu.lagou.com/course/courseInfo.htm?courseId=67#/detail/pc?id=1585'
    maps = shi(url)
    print(maps)
    for k, v in maps.items():
        m3u8(v).download(k)
    print('共耗时: %s)' % (time.time() - start))

 

posted @ 2020-10-11 13:06  昨日微风  阅读(1173)  评论(0编辑  收藏  举报