import requests
import re
import json
import m3u8
import os
from concurrent.futures import ThreadPoolExecutor
from time import sleep
import shutil
import subprocess
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
'Cookie':'_did=web_1930955391DE64DF; _did=web_1930955391DE64DF; csrfToken=B3HzIa_gotdrEZWr2wOODE9z; webp_supported=%7B%22lossy%22%3Atrue%2C%22lossless%22%3Atrue%2C%22alpha%22%3Atrue%2C%22animation%22%3Atrue%7D; Hm_lvt_2af69bc2b378fb58ae04ed2a04257ed1=1730789391; HMACCOUNT=EAB764C050B1F83E; lsv_js_player_v2_main=ca85g8; uuid=71dac937b0a505074f4e5e94fa8d1234; auth_key=76212252; ac_userimg=https%3A%2F%2Fimgs.aixifan.com%2Fstyle%2Fimage%2FdefaultAvatar.jpg; stochastic=eWF6MjduaWRvZw%3D%3D; acPasstoken=ChVpbmZyYS5hY2Z1bi5wYXNzdG9rZW4ScGbcuUJMOlbw849_IMXUeM1D9Pnj6LNEtj0f15emmerMDycQxBDcT_CZHQLrhk-XgwxJctLTNpifSja1P7U-Vj2rAquxGMEdpRoqg2dF7Vvz2XvTTOMjZH0JmWGOuvGlGltsL5nV4iEaeATIEtRnGn8aEmdOSJkx2ly3WrEb6jSqdMEq6SIgOlkU3ZpDjDwLFeEP8MTMeUiOIRnLpDfBbTREE09kiSIoBTAB; ac_username=yangzhenyu9406; acPostHint=3eb71c4a51d5a6884aa31105719cdecf872a; safety_id=AAJROucDlbVzhbY2UsGaqvRY; cur_req_id=713498187499F148_self_0528e0128300a06862bd18195cc26e1d; cur_group_id=713498187499F148_self_0528e0128300a06862bd18195cc26e1d_0; Hm_lpvt_2af69bc2b378fb58ae04ed2a04257ed1=1730789919'
}
output_dir = r'C:\Users\38461\Desktop\dongman\data'
# 单个TS文件的下载函数,带重试机制
def download_segment(ts_url, ts_filename, max_retries):
attempt = 0
while attempt < max_retries:
try:
print(f"Attempting to download {ts_url} to {ts_filename}... (Attempt {attempt + 1})")
response = requests.get(ts_url, headers=headers, stream=True, timeout=10)
response.raise_for_status() # 检查请求是否成功
with open(ts_filename, "wb") as ts_file:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
ts_file.write(chunk)
print(f"Downloaded {ts_filename}")
return # 成功下载则退出函数
except requests.exceptions.RequestException as e:
print(f"Failed to download {ts_url}: {e}")
attempt += 1
sleep(2) # 等待2秒钟后重试
print(f"Failed to download {ts_url} after {max_retries} attempts.")
# 下载所有TS文件的多线程处理函数
def down_load_ts(m3u8_url,m3u8_full_path):
m3u8_obj = m3u8.load(m3u8_url)
# 使用线程池加速下载
with ThreadPoolExecutor(max_workers=10) as executor:
futures = []
for i, segment in enumerate(m3u8_obj.segments):
ts_url = 'https://tx-safety-video.acfun.cn/mediacloud/acfun/acfun_video/hls/' + segment.uri
ts_filename = os.path.join(m3u8_full_path, f"segment_{i}.ts")
futures.append(executor.submit(download_segment, ts_url, ts_filename,3))
# 等待所有任务完成
for future in futures:
future.result()
print("All TS files downloaded successfully.")
return m3u8_obj
def merge_ts_files(m3u8_obj, output_dir, full_path):
# 打开目标输出文件以写入模式
output_file = os.path.join(full_path,'output_video.ts')
with open(output_file, "wb") as merged:
for i in range(len(m3u8_obj.segments)):
ts_filename = os.path.join(output_dir, f"segment_{i}.ts")
# 检查TS文件是否存在
if not os.path.exists(ts_filename):
print(f"Warning: {ts_filename} does not exist. Skipping this segment.")
continue
# 打开并读取TS文件,将其内容写入到合并文件中
with open(ts_filename, "rb") as ts_file:
merged.write(ts_file.read())
print(f"Merged {ts_filename}")
print(f"All segments merged into {output_file}.")
return output_file
def ts_to_mp4(ts_file,output_file):
# 使用 ffmpeg 将 .ts 文件转换为 .mp4
subprocess.run(
['ffmpeg', '-i', ts_file, '-c', 'copy', output_file],
check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
# 获取M3U8的URL
def start_url():
url = 'https://www.acfun.cn/bangumi/aa5024869'
response = requests.get(url=url, headers=headers)
data = json.loads(re.findall(r'window.bangumiData = (.*?)};', response.text)[0] + '}')
info = data['currentVideoInfo']['ksPlayJsonHevc']
name = data['bangumiTitle']
m3u8_url = re.findall(r'{"id":1,"url":"(.*?)",', info)[0]
m3u8_full_path = os.path.join(output_dir, name + '_' + 'ts')
full_dir = os.path.join(output_dir,name)
if not os.path.exists(m3u8_full_path):
os.makedirs(m3u8_full_path, exist_ok=True)
m3u8_obj = down_load_ts(m3u8_url, m3u8_full_path)
if not os.path.exists(full_dir):
os.makedirs(full_dir, exist_ok=True)
ts_filename = merge_ts_files(m3u8_obj,m3u8_full_path,full_dir)
ts_to_mp4(ts_filename,os.path.join(os.path.dirname(ts_filename),os.path.basename(ts_filename).replace('.ts','.mp4')))
os.remove(ts_filename)
shutil.rmtree(m3u8_full_path)
if __name__ == '__main__':
start_url()