# -*- coding: utf-8 -*-
"""
@Time : 2022/3/23 16:13
@Author : Andrew
@File : 91视频.py
"""
import re
"""
1.打开91日剧网站http://www.wwmulu.com/rj/wuyantuili/play-1-1.html,拿到网页源代码
2.从源代码中提取m3u8的url
3.下载m3u8文件。下载视频
5.合并视频
"""
# 页面的 http://www.wwmulu.com/rj/wuyantuili/play-1-1.html http://www.wwmulu.com
# 检查网页源代码/play.html?u=https://new.iskcd.com/20220111/38FCVqzP/1100kb/hls/index.m3u8(这个也可获取到)
#
# /play.html?u=https://new.iskcd.com/20220111/38FCVqzP/index.m3u8
# 抓包 https://new.iskcd.com/20220111/38FCVqzP/index.m3u8
# 它的返回就是/20220111/38FCVqzP/1100kb/hls/index.m3u8 ?_=1648026917721
# 将上述两者拼凑后就是真正的m3u8文件
# https://new.iskcd.com/20220111/38FCVqzP/1100kb/hls/index.m3u8
import requests
from lxml import etree
"""第一部分"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 "
"Safari/537.36 Core/1.77.97.400 QQBrowser/10.9.4621.400 ",
# 防盗链 -溯源 当前请求的上一级是谁
# "Referer": url
}
# url = "http://www.wwmulu.com/rj/wuyantuili/play-1-1.html"
# resp1 = requests.get(url, headers=headers)
# resp1.encoding = "utf-8"
# html = etree.HTML(resp1.text)
# # 通过m3u8_1以及返回的数据进行拼凑得出最终的m3u8_2
# m3u8_1 = html.xpath('//*[@id="cms_player"]/iframe/@src')[0].split("=")[-1]
# tvName = html.xpath('/html/body/div[2]/div/div[2]/div/div[2]/span[2]/@data-vod_name')[0]+html.xpath('/html/body/div[2]/div/div[2]/div/div[2]/span[2]/@data-playtitle')[0]
# resp2 = requests.get(m3u8_1, headers=headers)
# content = resp2.text.replace("\n", " ").split(" ")[2]
# m3u8_2 = "https://new.iskcd.com/" + content
# # 下载m3u8_2
# resp3 = requests.get(m3u8_2, headers=headers)
# with open("./91日剧/"+tvName+".m3u8", mode="wb") as f:
# f.write(resp3.content)
# resp1.close()
# resp2.close()
# resp3.close()
"""第二部分"""
import aiohttp
import asyncio
import aiofiles
"""多线程,但是不知道是真大还是啥,最后还是超时了"""
# async def download(url):
# # AttributeError: __aexit__ 原因是aiohttp.ClientSession没带()
# async with aiohttp.ClientSession() as session:
# async with session.get(url, headers=headers) as resp4: # resp4 = requests.get(url)
# # print(resp4)
# name = url.split("hls/")[1].split(".")[0]
# # print(name)
# # with open("./91日剧/勿言推理第01集_ts/" + name + ".ts",
# # mode="wb") as f:
# # f.write(await resp4.content.read())
# # print(url, ":下载结束")
# async with aiofiles.open("./91日剧/勿言推理第01集_ts/" + name + ".ts",
# mode="wb") as f:
# await f.write(await resp4.content.read())
# print(url, ":下载结束")
#
#
# async def getTasks():
# tasks = []
# with open("./91日剧/勿言推理第01集.m3u8", mode="r", encoding="utf-8") as f:
# for line in f:
# # 去掉换行符、空白、空格
# line = line.strip()
# # 如果#开头,我不要
# if line.startswith("#"):
# continue
# # 准备异步任务下载视频片段
# # print(line)
# tasks.append(download(line))
# await asyncio.wait(tasks)
# if __name__ == "__main__":
# asyncio.run(getTasks()) # 开启异步调用
"""单线程"""
n = 1
with open("./91日剧/勿言推理第01集.m3u8", mode="r", encoding="utf-8") as f:
for line in f:
# 去掉换行符、空白、空格
line = line.strip()
# 如果#开头,我不要
if line.startswith("#"):
continue
# 准备异步任务下载视频片段
# print(line)
resp4 = requests.get(line, headers=headers)
# name = line.split("hls/")[1].split(".")[0]
with open(f"./91日剧/勿言推理第01集_ts/{n}.ts",
mode="wb") as f:
f.write(resp4.content)
print(n, ":下载结束")
n +=1