# -*- coding: utf-8 -*-
# @Time : 2021/9/10 10:42
# @Author : dhl
from fake_useragent import UserAgent
import re, requests, os,cloudmusic,shutil,json
from lxml import etree
def file():
if os.path.exists("MP4") == False:
os.mkdir("MP4")
def check(str):
for i in ("\n", "\\", "\a", "?"," ", "#","|",",",".","。",","," "):
if i in str:
str = str.replace(i, "")
return str
def size(path):
size = os.path.getsize(path)
size = size / 1024 / 1024 * 10000 // 100 / 100
print(f"size: {size}.MB\n下载完成,即将打开\n")
def wyy(url):
print("=======网易云======")
id = int(url.split("=")[-1])
print(id)
type=url.split("?")[0].split("/")[-1]
if type=="song":
try:
music = cloudmusic.getMusic(id)
print("歌名:{}".format(music.name))
print("歌手:{}".format(music.artist))
music.download()
except:
pass
else:
if type=="playlist":
mu_li=cloudmusic.getPlaylist(id)
elif type=="album":
mu_li=cloudmusic.getAlbum(id)
print(mu_li)
for music in mu_li:
try:
music.download(level="standard")
print("歌名:{}".format(music.name))
print("歌手:{}".format(music.artist))
# size("cloudmusic/") #####
except:
print(music,"failed")
os.startfile("cloudmusic")
def zh(url):
print("==============知乎=============")
html=requests.get(url,headers={"user-agent":UserAgent().random}).text
tittle=re.findall(r'charSet="utf-8".*?title.*?true">(.*?)<',html,re.S)[0]
tittle=check(tittle)
print(f"{tittle}")
src=re.findall(r'iframe src="(.*?)"',html,re.S)[0]
url_real="https://lens.zhihu.com/api/v4/videos/"+src.split("?")[0].split("/")[-1]
json1=requests.get(url_real,headers={"user-agent":UserAgent().random}).json()["playlist"]
url=[aa for aa in json1.values()][0]['play_url']
resp1=requests.get(url,headers={"user-agent":UserAgent().random})
with open(f"MP4/{tittle}.mp4","wb")as f:
f.write(resp1.content)
size(f"MP4/{tittle}.mp4")
os.startfile(f"MP4")
def lsp(url):
print("========梨视频======")
id = url.split("_")[-1]
video_statua = f"https://www.pearvideo.com/videoStatus.jsp?contId={id}"
headers = {"user-agent": UserAgent().random, "Referer": url}
session = requests.session()
resp1 = session.get(url)
text1 = resp1.text
tittle = re.findall(r"title>(.*?)</title", text1, re.S)[0]
tittle=check(tittle)
print(tittle)
resp2 = session.get(video_statua, headers=headers)
resp2.encoding = 'utf-8'
json = resp2.json()
url1 = json["videoInfo"]["videos"]["srcUrl"]
url2 = url1.replace(url1.split("-")[0].split("/")[-1], f"cont-{id}")
with open(f"MP4/{tittle}.mp4", "wb")as f:
f.write(session.get(url2).content)
size(f"MP4/{tittle}.mp4")
os.startfile("MP4")
def dy(url):
print("============抖音==========")
try:
resp = requests.get(url, headers={"user-agent": UserAgent().random})
text = resp.text
title = re.findall(r'content="(.*?)"', text, re.S)[2]
title=check(title)
row = re.findall(r"-web.douyinvod.com%2F(.*?)--", text, re.S)[0]
url = "https://v26-web.douyinvod.com/" + row.replace("%2F", "/").replace("/3F", "?").replace("%3D","=").replace("%26", "&").replace("%3F", "?") + "--"
with open(f"MP4/{str(title)}.mp4", "wb")as f:
f.write(requests.get(url).content)
print(f"{title}.mp4 over")
size(f"MP4/{title}.mp4")
os.startfile("MP4")
except:
print("failed")
def hk(url):
print("============好看视频=============")
text = requests.get(url).text
text_e = etree.HTML(text)
title = text_e.xpath("/html/body/div/div[1]/div/div[1]/div[2]/h1/text()")[0]
title=check(title)
print(title)
url = re.findall(r'rank.*?title.*?url":"(.*?)"', text, re.S)[-1].replace("\\", "")
with open(f"MP4/{title}.mp4", "wb")as f:
f.write(requests.get(url).content)
size(f"MP4/{title}.mp4")
def wb(url):
url_r = "https://weibo.com/tv/api/component"
print("==========微博=========")
oid=url.split("/")[-1].split("?")[0]
mid=oid.split(":")[-1]
str = '{"Component_Play_Playinfo":{"oid":"' + oid + '"}}'
data = {"data": str}
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36",
'cookie': 'SINAGLOBAL=3094754119728.9976.1630819874992; SUB=_2AkMWaNkPf8NxqwJRmP4RyG7raoV_zwrEieKgNCjUJRMxHRl-yT8XqhNZtRB6Pej34F6Ggti-ixZIJVrwmkhDFTWWvCBk; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WWoQrFGkxgWuBN6gRc-WAO6; UOR=,,www.baidu.com; login_sid_t=2ee0f49b562f283db2c3705c703d7bbc; cross_origin_proto=SSL; _s_tentry=www.baidu.com; Apache=1861927238670.6287.1631067489803; ULV=1631067489807:2:2:2:1861927238670.6287.1631067489803:1630819874996; wb_view_log=1536*8641.25; YF-V-WEIBO-G0=b09171a17b2b5a470c42e2f713edace0; XSRF-TOKEN=pMpNMKd6_c3fiGAbbF1tEiVA',
'referer': f'https://weibo.com/tv/show/{oid}?mid={mid}'}
resp = requests.post(url_r, headers=headers, data=data)
json = resp.json()
title = json['data']['Component_Play_Playinfo']['text'].strip()
if "<"in title:
title=title.split("<")[0]
title=check(title)
print(f"---------正在下载{title}-------")
autor = json['data']['Component_Play_Playinfo']['nickname']
urls = json['data']['Component_Play_Playinfo']['urls'].values()
url ="https:"+[url for url in urls][0]
with open(f"MP4/{autor}{title}.mp4","wb")as f:
f.write(requests.get(url).content)
size(f"MP4/{autor}{title}.mp4")
def bi(url):
print("===========bilibili=========")
text=requests.get(url).text
title = re.findall(r'charset.*?content.*?type.*?data-vue-meta.*?>(.*?)<', text, re.S)[0]
title=check(title)
print(f"{title}")
url_jx = "https://www.leesoar.com/bilibili"
headers={
'Cookie': '__gads=ID=aa2bdda70311117b-2257eb6a8acb00b5:T=1630865380:RT=1630865380:S=ALNI_MaXjh9jimoa1G5gpXDYtSG_HGGy6A',
'Origin': 'https://www.leesoar.com',
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"
}
data={'parse':url}
data=json.dumps(data)
rurl=requests.post(url_jx,headers=headers,data=data).json()['msg']
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'}
with open(f"MP4/{title}.mp4","wb")as f:
f.write(requests.get(rurl,headers=headers).content) #############
size(f"MP4/{title}.mp4")
os.startfile("")
def ks(url):
print("============快手===========")
phoId=url.split("?")[0].split("/")[-1]
webPageArea=url.split("=")[-1]
url_r="https://www.kuaishou.com/graphql"
headers={"content-type":"application/json",
"Cookie":"did=web_c99e54b880416bec7c46addf61de4112; didv=1630464335300; kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; client_key=65890b29; userId=2119184593; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABqKrAZ3n-3x4QGhXm9PhYIeHKdFlMFLn4Y7_0l6oTODdjI_8nSfXpREGN3-gAYoKSxe06Cs2-eSyolV6UFM8S86Z6S9PN837nhY3FBBjugl-ytgsV9_OlXlxVDpY4oKXD2rsiIDXa_bNMM1Fx8EMbw47PgC6giZ9tpgl-Q80xXrXh_moHKr7Z3-r5oepJAZ16HY0s3zsEw64eeMNyzdf7JxoS1KQylfZfbCBEuMI0IcjfqenKIiDodfP1mTu8Ay49fnv_w3WQ93SpIYTkyNIQEAi8q34GoCgFMAE; kuaishou.server.web_ph=a0ebf4fd4c2d9b0f34fbdafe46f9e0386f3a",
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"}
data={"operationName":"visionVideoDetail",
"query": "query visionVideoDetail($photoId: String, $type: String, $page: String, $webPageArea: String) {\n visionVideoDetail(photoId: $photoId, type: $type, page: $page, webPageArea: $webPageArea) {\n status\n type\n author {\n id\n name\n following\n headerUrl\n __typename\n }\n photo {\n id\n duration\n caption\n likeCount\n realLikeCount\n coverUrl\n photoUrl\n liked\n timestamp\n expTag\n llsid\n viewCount\n videoRatio\n stereoType\n croppedPhotoUrl\n manifest {\n mediaType\n businessType\n version\n adaptationSet {\n id\n duration\n representation {\n id\n defaultSelect\n backupUrl\n codecs\n url\n height\n width\n avgBitrate\n maxBitrate\n m3u8Slice\n qualityType\n qualityLabel\n frameRate\n featureP2sp\n hidden\n disableAdaptive\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n tags {\n type\n name\n __typename\n }\n commentLimit {\n canAddComment\n __typename\n }\n llsid\n danmakuSwitch\n __typename\n }\n}\n",
"variables":{"photoId":phoId,"page":"detail","webPageArea":webPageArea}}
data=json.dumps(data)
resp=requests.post(url_r,data=data,headers=headers)
json1=resp.json()
url=json1['data']['visionVideoDetail']['photo']['photoUrl']
autor=json1['data']["visionVideoDetail"]["author"]["name"]
autor=check(autor)
print(autor)
caption=json1["data"]["visionVideoDetail"]["photo"]['caption']
caption=check(caption)
print(caption)
try:
with open(f'MP4/{caption}{autor}.mp4',"wb")as f:
f.write(requests.get(url).content)
os.startfile("MP4")
size(f'MP4/{caption}{autor}.mp4')
except:
print("down failed")
def An(url):
li=[]
print("===========A站==============")
html = requests.get(url, headers={"user-agent": UserAgent().random}).text
url_m3u8 = re.findall(r'backupUrl.*?:.*?"(.*?)\\"', html, re.S)[0]
name = re.findall(r'video-description clearfix.*?class.*?span>(.*?)<', html, re.S)[0]
name=check(name)
print(f"{name}")
m3u8 = requests.get(url_m3u8).text
list = m3u8.split("\n")
if list[1].split(":")[-1].split("\n")[0] == "3":
hls = ""
else:
hls = "hls/"
for line in list:
if line.startswith("#") or line.startswith("\n")or line=="":
continue
url_ts = f'https://tx-safety-video.acfun.cn/mediacloud/acfun/acfun_video/{hls}' + line.strip()
name_ts = url_ts.split("?")[0].split(".")[-2].strip()
with open(name_ts, "wb")as f:
f.write(requests.get(url_ts).content)
print(name_ts)
li.append(name_ts)
s="+".join(li)
os.system(f"copy/b {s} {name}.mp4")
for ts in li:
os.remove(ts)
shutil.move(f"{name}.mp4",f"MP4/{name}.mp4")
size(f"MP4/{name}.mp4")
os.startfile("MP4")
def xhs(url):
print("======小红书=====")
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36",
"cookie": "xhsTracker=url=index&searchengine=baidu; xhsTrackerId=bae027aa-94ee-4613-c658-7cf4b0b31cc7; timestamp2=202109080f80c326a19211d0c5b59ae9; timestamp2.sig=_WmvLNOywKqS3NGWb-klQAYlbIf9_5vNmc6U4ZSzg6k; extra_exp_ids=gif_clt1,ques_exp1; noteIndex=1"}
resp=requests.get(url,headers=headers)
text=resp.text
url=re.findall(r'<video.*?post.*?src="(.*?)"',text,re.S)[0].replace("amp;","")
title=re.findall(r'headline":.*?"(.*?)"',text,re.S)[0]
print(title)
title=check(title)
with open(f"MP4\{title}.mp4","wb")as f:
f.write(requests.get(url).content)
size(f"MP4\{title}.mp4")
def kspg(url1):
print("=========快手主页=====")
url = "https://www.kuaishou.com/graphql"
headers = {"content-type": "application/json",
"Cookie": "did=web_c99e54b880416bec7c46addf61de4112; didv=1630464335300; kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; client_key=65890b29; userId=2119184593; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABqKrAZ3n-3x4QGhXm9PhYIeHKdFlMFLn4Y7_0l6oTODdjI_8nSfXpREGN3-gAYoKSxe06Cs2-eSyolV6UFM8S86Z6S9PN837nhY3FBBjugl-ytgsV9_OlXlxVDpY4oKXD2rsiIDXa_bNMM1Fx8EMbw47PgC6giZ9tpgl-Q80xXrXh_moHKr7Z3-r5oepJAZ16HY0s3zsEw64eeMNyzdf7JxoS1KQylfZfbCBEuMI0IcjfqenKIiDodfP1mTu8Ay49fnv_w3WQ93SpIYTkyNIQEAi8q34GoCgFMAE; kuaishou.server.web_ph=a0ebf4fd4c2d9b0f34fbdafe46f9e0386f3a",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"}
pcursor = ''
userId = url1.split("/")[-1]
while pcursor != "no_more":
data = {"operationName": "visionProfilePhotoList",
"query": "query visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n type\n author {\n id\n name\n following\n headerUrl\n headerUrls {\n cdn\n url\n __typename\n }\n __typename\n }\n tags {\n type\n name\n __typename\n }\n photo {\n id\n duration\n caption\n likeCount\n realLikeCount\n coverUrl\n coverUrls {\n cdn\n url\n __typename\n }\n photoUrls {\n cdn\n url\n __typename\n }\n photoUrl\n liked\n timestamp\n expTag\n animatedCoverUrl\n stereoType\n videoRatio\n __typename\n }\n canAddComment\n currentPcursor\n llsid\n status\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n",
"variables": {"page": "profile", "pcursor": pcursor, "userId": userId}}
data = json.dumps(data)
json1 = requests.post(url, data=data, headers=headers).json()
pcursor = json1["data"]["visionProfilePhotoList"]["pcursor"]
feeds = json1['data']['visionProfilePhotoList']['feeds']
autor = feeds[0]['author']['name']
autor=check(autor)
if os.path.exists(f"MP4/{autor}") == False:
os.mkdir(f"MP4/{autor}")
else:
print(f"file:{autor} exited")
os.startfile(f"MP4\{autor}")
for feed in feeds:
url = feed['photo']['photoUrl']
tittle = feed['photo']['caption']
tittle = check(tittle)
with open(f"MP4/{autor}/{tittle}.mp4", "wb")as f:
f.write(requests.get(url).content)
print(autor)
print(tittle)
path=f"MP4/{autor}/{tittle}.mp4"
size=os.path.getsize(path)/ 1024 / 1024 * 10000 // 100 / 100
print(f"size:{size}MB")
print("____________________________________________________________________")
def select(url):
try:
web=url.split(".com")[0].split(".")[-1]
if web=="bilibili":
bi(url)
elif web=="pearvideo":
lsp(url)
elif web=="163":
wyy(url)
elif web=="kuaishou":
if "&"not in url:
kspg(url)
else:
ks(url)
elif web=="https://weibo":
wb(url)
elif web=="zhihu":
zh(url)
elif web=="baidu":
hk(url)
elif web=="douyin":
dy(url)
elif web=="xiaohongshu":
xhs(url)
else:
if url.split(".cn")[0].split(".")[-1]=="acfun":
An(url)
else:
print("未知网址")
except:
pass
if __name__ == '__main__':
while True:
url=input("url:")
file()
select(url)