python下载,需要密钥版下载,多线程版【二】
#多线程赋值用字典格式 试下第二种方法 顺便把for循环 用正则表达 看能不能快点 #适应两种 m3u8 读取的格式 import requests import os import datetime import threading import re from queue import Queue import random import sys from fake_useragent import UserAgent #下载耗时:0:00:47 class xiazai(): def __init__(self): self.url = xiazaidizhi work_dir = os.getcwd() # print(work_dir) # 用来保存ts文件 file_dir = os.path.join(work_dir, wenjian) if not os.path.exists(file_dir): os.mkdir(file_dir) user_agent_list =[ 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36', 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36', 'Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36', 'Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36' ] #t = random.choice(user_agent_list) t = UserAgent(use_cache_server=False, verify_ssl=False).random self.headers={ 'User-Agent':t } self.savefile() def savefile(self): r = requests.get(self.url, headers=self.headers) # 合成带有hls的m3u8地址 if r.text.split('\n')[-1] == '': hls_mark = r.text.split('\n')[-2] # 以防\n结尾 hls_mark = hls_mark.split('/') if len(hls_mark) > 3: hls_mark = '/'.join(hls_mark[-3:]) self.base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0] url_m3u8_hls = self.base_url + hls_mark print(url_m3u8_hls) # url_m3u8_hls = file_url.replace('index.m3u8', hls_mark) # file_m3u8 = url_m3u8_hls.split('/')[-1] return url_m3u8_hls # print(url_m3u8_hls) else: hls_mark = '/'.join(hls_mark[-3:]) # self.base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0] self.base_url = self.url.split('/')[:3] self.base_url = '/'.join(self.base_url) url_m3u8_hls = self.base_url + hls_mark print(url_m3u8_hls) # url_m3u8_hls = file_url.replace('index.m3u8', hls_mark) # file_m3u8 = url_m3u8_hls.split('/')[-1] return url_m3u8_hls # print(url_m3u8_hls) else: hls_mark = r.text.split('\n')[-1] hls_mark = hls_mark.split('/') if len(hls_mark) > 3: hls_mark = '/'.join(hls_mark[-3:]) self.base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0] url_m3u8_hls = self.base_url + hls_mark print(url_m3u8_hls) # url_m3u8_hls = file_url.replace('index.m3u8', hls_mark) # file_m3u8 = url_m3u8_hls.split('/')[-1] return url_m3u8_hls # print(url_m3u8_hls) else: hls_mark = '/'.join(hls_mark[-3:]) #self.base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0] self.base_url = self.url.split('/')[:3] self.base_url = '/'.join(self.base_url) url_m3u8_hls = self.base_url + hls_mark print(url_m3u8_hls) # url_m3u8_hls = file_url.replace('index.m3u8', hls_mark) # file_m3u8 = url_m3u8_hls.split('/')[-1] return url_m3u8_hls # print(url_m3u8_hls) class xiazai1(): def __init__(self): user_agent_list = [ 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36', 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36', 'Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36', 'Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36' ] t = UserAgent(use_cache_server=False, verify_ssl=False).random self.headers = { 'User-Agent': t } def index(self): global url_m3u8_hls url_m3u8_hls = xiazai().savefile() try: r = requests.get(url_m3u8_hls, headers=self.headers).text # base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", url_m3u8_hls)[0] with open(wenjian + '/' + 'index1.m3u8', 'w') as f: f.write(r) f.close() except: print('下载失败!') def tihuan(self): try: with open(wenjian + '/'+ 'index1.m3u8', 'r') as f: r = f.read() f.close() text_bytes = r.split('\n') return text_bytes except: print('下载失败!') def url(self): text_bytes = self.tihuan() dizhi_1 = [] url_1 = [] for miyao in text_bytes: if '.ts' in miyao: m = miyao.split('/') dizhi = m[-1] dizhi_1.append(dizhi) url = miyao url_1.append(url) return (dizhi_1, url_1) def key(self): text_bytes = self.tihuan() for miyao in text_bytes: if 'EXT-X-KEY' in miyao: miyao = miyao.replace('"', '') key = miyao.split('/')[1:] key = '/'.join(key) key = '/' + key return key def index_shengcheng(self): key1 = self.key() dizhi, url1 = self.url() try: with open(wenjian + '/' + 'index1.m3u8', 'r') as f: lines = f.readlines() # 读取文本每一行 list = [] f = open(wenjian+'/'+'index.m3u8', 'w') for i in range(len(lines)): list.append(lines[i]) # 将每一行的数据加入列表 if key1 in list[i]: list[i] = re.sub(key1, 'key.key', list[i]) for t in range(len(dizhi)): if dizhi[t] in list[i]: list[i] = re.sub(url1[t], dizhi[t], list[i]) # print(list[i]) f.write(list[i]) except: print('错误!') def duqu(self): #url_m3u8_hls = xiazai().savefile() try: r = requests.get(url_m3u8_hls, headers=self.headers).text base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", url_m3u8_hls)[0] text_bytes = r.split('\n') # 筛选以.ts结尾的行 # 有些情况下可能是以其他格式的文件,比如png,下载后修改后缀即可 # ts_name = [i for i in text_string if i.endswith('.ts')] ''' ts_time = [i for i in text_bytes if i.startswith('#EXTINF')] dm_time = 0 for i in range(len(ts_time)): ts_time1 = ts_time[i].replace('#EXTINF:', '') ts_time2 = ts_time1.replace(',', '') dm_time = float(ts_time2) + dm_time ''' # self.shijian(dm_time) ts_queue = Queue(10000) concatfile = wenjian+'/' + "s" + '.txt' # print(dm_time) for miyao in text_bytes: if 'EXT-X-KEY' in miyao: miyao = miyao.replace('"','') key = miyao.split('/')[1:] key= '/'.join(key) zhu_url = url_m3u8_hls.split('/')[:3] zhu_url = '/'.join(zhu_url) key_url = zhu_url + '/'+ key r = requests.get(key_url, headers=self.headers) with open(wenjian + '/'+'key.key','w') as f: f.write(r.text) f.close() for line in text_bytes: if '.ts' in line: m = line.split('/') if 'http' in line: # print("ts>>", line) ts_queue.put(line) else: if len(m) >=3: base_url1 = url_m3u8_hls.split('/')[:3] base_url1 = '/'.join(base_url1) line = base_url1 + line # print(line) ts_queue.put(line) else: line = base_url + line # print(line) ts_queue.put(line) # print(ts_queue.put(line)) # print('ts>>',line) filename = re.search('([a-zA-Z0-9-]+.ts)', line).group(1).strip() open(concatfile, 'a+').write("file %s\n" % filename) # filename = re.search('([a-zA-Z0-9-]+.ts)', line).group(1).strip() # ts_neirong = [i for i in text_bytes if not i.startswith('#')] # ts_neirong.pop() # self.xiazai(ts_neirong,url_m3u8_hls) # print(ts_neirong) return ts_queue except: print('连接失败') sys.exit() def shijian(self,dm_time): shichang_time = str(datetime.timedelta(seconds=dm_time)) print('视频时长:%s' % shichang_time) def xiazai1(self,ts_queue): #ffmpeg -f concat -safe 0 -i s.txt -c copy output.mp4 #tt_name = threading.current_thread().getName() while not ts_queue.empty(): url = ts_queue.get() try: r = requests.get(url, stream=True, headers=self.headers) filename = re.search('([a-zA-Z0-9-]+.ts)', url).group(1).strip() with open(wenjian+'/' + filename, 'wb') as fp: fp.write(r.content) #print(tt_name + " " + filename + ' 下载成功') except: with open(wenjian+ '/'+ 'shibai.txt', 'a') as fp: fp.write(url + '\n') print(url + '下载失败') def hebing(self,name): basedir = os.path.abspath(os.path.dirname(__file__)) t = 'start /d "%s\%s" ffmpeg -f concat -safe 0 -i s.txt -c copy %s.mp4' %(basedir,wenjian,name) with open('2.bat', 'w') as f: f.write(t) f.close() # os.system(r'D:\Study\pythonProject\Python_down_m3u8\2.bat') os.startfile("2.bat") if __name__=='__main__': #xiazai('http://iqiyi.cdn9-okzy.com/20201019/16908_b0f2428f/index.m3u8') xiazaidizhi = input("请输入m3u8链接:") wenjian = input("请输入保存文件名:") start = datetime.datetime.now().replace(microsecond=0) xiazai1().index() xiazai1().index_shengcheng() s = xiazai1().duqu() #print(s) # print(s,concatfile) threads = [] for i in range(30): t = threading.Thread(target=xiazai1().xiazai1, name='th-' + str(i), kwargs={'ts_queue': s}) threads.append(t) for t in threads: t.start() for t in threads: t.join() end = datetime.datetime.now().replace(microsecond=0) print('下载耗时:' + str(end - start)) #下载耗时:0:01:23
下载完会出来
外加很多tx文件
如果人生还有重来,那就不叫人生。

浙公网安备 33010602011771号