视频下载

# -*- coding: utf-8 -*-
# @Time    : 2021/9/10 10:42
# @Author  : dhl
from fake_useragent import UserAgent
import  re, requests, os,cloudmusic,shutil,json
from lxml import etree
def file():
    if os.path.exists("MP4") == False:
        os.mkdir("MP4")
def check(str):
    for i in ("\n", "\\", "\a", "?"," ", "#","|",",",".","。","，"," "):
        if i in str:
            str = str.replace(i, "")
    return str
def size(path):
    size = os.path.getsize(path)
    size = size / 1024 / 1024 * 10000 // 100 / 100
    print(f"size: {size}.MB\n下载完成，即将打开\n")
def wyy(url):
    print("=======网易云======")
    id = int(url.split("=")[-1])
    print(id)
    type=url.split("?")[0].split("/")[-1]
    if type=="song":
      try:
        music = cloudmusic.getMusic(id)
        print("歌名：{}".format(music.name))
        print("歌手：{}".format(music.artist))
        music.download()
      except:
          pass
    else:
        if type=="playlist":
            mu_li=cloudmusic.getPlaylist(id)
        elif type=="album":
            mu_li=cloudmusic.getAlbum(id)
        print(mu_li)
        for music in mu_li:
            try:
                music.download(level="standard")
                print("歌名：{}".format(music.name))
                print("歌手：{}".format(music.artist))
                # size("cloudmusic/")         #####
            except:
                print(music,"failed")
    os.startfile("cloudmusic")
def zh(url):
    print("==============知乎=============")
    html=requests.get(url,headers={"user-agent":UserAgent().random}).text
    tittle=re.findall(r'charSet="utf-8".*?title.*?true">(.*?)<',html,re.S)[0]
    tittle=check(tittle)
    print(f"{tittle}")
    src=re.findall(r'iframe src="(.*?)"',html,re.S)[0]
    url_real="https://lens.zhihu.com/api/v4/videos/"+src.split("?")[0].split("/")[-1]
    json1=requests.get(url_real,headers={"user-agent":UserAgent().random}).json()["playlist"]
    url=[aa for aa in json1.values()][0]['play_url']
    resp1=requests.get(url,headers={"user-agent":UserAgent().random})
    with open(f"MP4/{tittle}.mp4","wb")as f:
        f.write(resp1.content)
    size(f"MP4/{tittle}.mp4")
    os.startfile(f"MP4")
def lsp(url):
    print("========梨视频======")
    id = url.split("_")[-1]
    video_statua = f"https://www.pearvideo.com/videoStatus.jsp?contId={id}"
    headers = {"user-agent": UserAgent().random, "Referer": url}
    session = requests.session()
    resp1 = session.get(url)
    text1 = resp1.text
    tittle = re.findall(r"title>(.*?)</title", text1, re.S)[0]
    tittle=check(tittle)
    print(tittle)
    resp2 = session.get(video_statua, headers=headers)
    resp2.encoding = 'utf-8'
    json = resp2.json()
    url1 = json["videoInfo"]["videos"]["srcUrl"]
    url2 = url1.replace(url1.split("-")[0].split("/")[-1], f"cont-{id}")
    with open(f"MP4/{tittle}.mp4", "wb")as f:
        f.write(session.get(url2).content)
    size(f"MP4/{tittle}.mp4")
    os.startfile("MP4")
def dy(url):
    print("============抖音==========")
    try:
        resp = requests.get(url, headers={"user-agent": UserAgent().random})
        text = resp.text
        title = re.findall(r'content="(.*?)"', text, re.S)[2]
        title=check(title)
        row = re.findall(r"-web.douyinvod.com%2F(.*?)--", text, re.S)[0]
        url = "https://v26-web.douyinvod.com/" + row.replace("%2F", "/").replace("/3F", "?").replace("%3D","=").replace("%26", "&").replace("%3F", "?") + "--"
        with open(f"MP4/{str(title)}.mp4", "wb")as f:
            f.write(requests.get(url).content)
            print(f"{title}.mp4 over")
            size(f"MP4/{title}.mp4")
            os.startfile("MP4")
    except:
        print("failed")
def hk(url):
    print("============好看视频=============")
    text = requests.get(url).text
    text_e = etree.HTML(text)
    title = text_e.xpath("/html/body/div/div[1]/div/div[1]/div[2]/h1/text()")[0]
    title=check(title)
    print(title)
    url = re.findall(r'rank.*?title.*?url":"(.*?)"', text, re.S)[-1].replace("\\", "")
    with open(f"MP4/{title}.mp4", "wb")as f:
        f.write(requests.get(url).content)
    size(f"MP4/{title}.mp4")
def wb(url):
    url_r = "https://weibo.com/tv/api/component"
    print("==========微博=========")
    oid=url.split("/")[-1].split("?")[0]
    mid=oid.split(":")[-1]
    str = '{"Component_Play_Playinfo":{"oid":"' + oid + '"}}'
    data = {"data": str}
    headers = {
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36",
        'cookie': 'SINAGLOBAL=3094754119728.9976.1630819874992; SUB=_2AkMWaNkPf8NxqwJRmP4RyG7raoV_zwrEieKgNCjUJRMxHRl-yT8XqhNZtRB6Pej34F6Ggti-ixZIJVrwmkhDFTWWvCBk; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WWoQrFGkxgWuBN6gRc-WAO6; UOR=,,www.baidu.com; login_sid_t=2ee0f49b562f283db2c3705c703d7bbc; cross_origin_proto=SSL; _s_tentry=www.baidu.com; Apache=1861927238670.6287.1631067489803; ULV=1631067489807:2:2:2:1861927238670.6287.1631067489803:1630819874996; wb_view_log=1536*8641.25; YF-V-WEIBO-G0=b09171a17b2b5a470c42e2f713edace0; XSRF-TOKEN=pMpNMKd6_c3fiGAbbF1tEiVA',
        'referer': f'https://weibo.com/tv/show/{oid}?mid={mid}'}
    resp = requests.post(url_r, headers=headers, data=data)
    json = resp.json()
    title = json['data']['Component_Play_Playinfo']['text'].strip()
    if "<"in title:
        title=title.split("<")[0]
    title=check(title)
    print(f"---------正在下载{title}-------")
    autor = json['data']['Component_Play_Playinfo']['nickname']
    urls = json['data']['Component_Play_Playinfo']['urls'].values()
    url ="https:"+[url for url in urls][0]
    with open(f"MP4/{autor}{title}.mp4","wb")as f:
        f.write(requests.get(url).content)
    size(f"MP4/{autor}{title}.mp4")
def bi(url):
    print("===========bilibili=========")
    text=requests.get(url).text
    title = re.findall(r'charset.*?content.*?type.*?data-vue-meta.*?>(.*?)<', text, re.S)[0]
    title=check(title)
    print(f"{title}")
    url_jx = "https://www.leesoar.com/bilibili"
    headers={
        'Cookie': '__gads=ID=aa2bdda70311117b-2257eb6a8acb00b5:T=1630865380:RT=1630865380:S=ALNI_MaXjh9jimoa1G5gpXDYtSG_HGGy6A',
        'Origin': 'https://www.leesoar.com',
        "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"
    }
    data={'parse':url}
    data=json.dumps(data)
    rurl=requests.post(url_jx,headers=headers,data=data).json()['msg']
    headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'}
    with open(f"MP4/{title}.mp4","wb")as f:
        f.write(requests.get(rurl,headers=headers).content)              #############
        size(f"MP4/{title}.mp4")
        os.startfile("")
def ks(url):
    print("============快手===========")
    phoId=url.split("?")[0].split("/")[-1]
    webPageArea=url.split("=")[-1]
    url_r="https://www.kuaishou.com/graphql"
    headers={"content-type":"application/json",
             "Cookie":"did=web_c99e54b880416bec7c46addf61de4112; didv=1630464335300; kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; client_key=65890b29; userId=2119184593; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABqKrAZ3n-3x4QGhXm9PhYIeHKdFlMFLn4Y7_0l6oTODdjI_8nSfXpREGN3-gAYoKSxe06Cs2-eSyolV6UFM8S86Z6S9PN837nhY3FBBjugl-ytgsV9_OlXlxVDpY4oKXD2rsiIDXa_bNMM1Fx8EMbw47PgC6giZ9tpgl-Q80xXrXh_moHKr7Z3-r5oepJAZ16HY0s3zsEw64eeMNyzdf7JxoS1KQylfZfbCBEuMI0IcjfqenKIiDodfP1mTu8Ay49fnv_w3WQ93SpIYTkyNIQEAi8q34GoCgFMAE; kuaishou.server.web_ph=a0ebf4fd4c2d9b0f34fbdafe46f9e0386f3a",
             "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"}
    data={"operationName":"visionVideoDetail",
          "query": "query visionVideoDetail($photoId: String, $type: String, $page: String, $webPageArea: String) {\n  visionVideoDetail(photoId: $photoId, type: $type, page: $page, webPageArea: $webPageArea) {\n    status\n    type\n    author {\n      id\n      name\n      following\n      headerUrl\n      __typename\n    }\n    photo {\n      id\n      duration\n      caption\n      likeCount\n      realLikeCount\n      coverUrl\n      photoUrl\n      liked\n      timestamp\n      expTag\n      llsid\n      viewCount\n      videoRatio\n      stereoType\n      croppedPhotoUrl\n      manifest {\n        mediaType\n        businessType\n        version\n        adaptationSet {\n          id\n          duration\n          representation {\n            id\n            defaultSelect\n            backupUrl\n            codecs\n            url\n            height\n            width\n            avgBitrate\n            maxBitrate\n            m3u8Slice\n            qualityType\n            qualityLabel\n            frameRate\n            featureP2sp\n            hidden\n            disableAdaptive\n            __typename\n          }\n          __typename\n        }\n        __typename\n      }\n      __typename\n    }\n    tags {\n      type\n      name\n      __typename\n    }\n    commentLimit {\n      canAddComment\n      __typename\n    }\n    llsid\n    danmakuSwitch\n    __typename\n  }\n}\n",
          "variables":{"photoId":phoId,"page":"detail","webPageArea":webPageArea}}
    data=json.dumps(data)
    resp=requests.post(url_r,data=data,headers=headers)
    json1=resp.json()
    url=json1['data']['visionVideoDetail']['photo']['photoUrl']
    autor=json1['data']["visionVideoDetail"]["author"]["name"]
    autor=check(autor)
    print(autor)
    caption=json1["data"]["visionVideoDetail"]["photo"]['caption']
    caption=check(caption)
    print(caption)
    try:
        with open(f'MP4/{caption}{autor}.mp4',"wb")as f:
            f.write(requests.get(url).content)
            os.startfile("MP4")
        size(f'MP4/{caption}{autor}.mp4')
    except:
        print("down failed")
def An(url):
    li=[]
    print("===========A站==============")
    html = requests.get(url, headers={"user-agent": UserAgent().random}).text
    url_m3u8 = re.findall(r'backupUrl.*?:.*?"(.*?)\\"', html, re.S)[0]
    name = re.findall(r'video-description clearfix.*?class.*?span>(.*?)<', html, re.S)[0]
    name=check(name)
    print(f"{name}")
    m3u8 = requests.get(url_m3u8).text
    list = m3u8.split("\n")
    if list[1].split(":")[-1].split("\n")[0] == "3":
        hls = ""
    else:
        hls = "hls/"
    for line in list:
        if line.startswith("#") or line.startswith("\n")or line=="":
            continue
        url_ts = f'https://tx-safety-video.acfun.cn/mediacloud/acfun/acfun_video/{hls}' + line.strip()
        name_ts = url_ts.split("?")[0].split(".")[-2].strip()
        with open(name_ts, "wb")as f:
            f.write(requests.get(url_ts).content)
            print(name_ts)
            li.append(name_ts)
    s="+".join(li)
    os.system(f"copy/b  {s}  {name}.mp4")
    for ts in li:
        os.remove(ts)
    shutil.move(f"{name}.mp4",f"MP4/{name}.mp4")
    size(f"MP4/{name}.mp4")
    os.startfile("MP4")
def xhs(url):
    print("======小红书=====")
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36",
               "cookie": "xhsTracker=url=index&searchengine=baidu; xhsTrackerId=bae027aa-94ee-4613-c658-7cf4b0b31cc7; timestamp2=202109080f80c326a19211d0c5b59ae9; timestamp2.sig=_WmvLNOywKqS3NGWb-klQAYlbIf9_5vNmc6U4ZSzg6k; extra_exp_ids=gif_clt1,ques_exp1; noteIndex=1"}
    resp=requests.get(url,headers=headers)
    text=resp.text
    url=re.findall(r'<video.*?post.*?src="(.*?)"',text,re.S)[0].replace("amp;","")
    title=re.findall(r'headline":.*?"(.*?)"',text,re.S)[0]
    print(title)
    title=check(title)
    with open(f"MP4\{title}.mp4","wb")as f:
        f.write(requests.get(url).content)
    size(f"MP4\{title}.mp4")
def kspg(url1):
    print("=========快手主页=====")
    url = "https://www.kuaishou.com/graphql"
    headers = {"content-type": "application/json",
               "Cookie": "did=web_c99e54b880416bec7c46addf61de4112; didv=1630464335300; kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; client_key=65890b29; userId=2119184593; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABqKrAZ3n-3x4QGhXm9PhYIeHKdFlMFLn4Y7_0l6oTODdjI_8nSfXpREGN3-gAYoKSxe06Cs2-eSyolV6UFM8S86Z6S9PN837nhY3FBBjugl-ytgsV9_OlXlxVDpY4oKXD2rsiIDXa_bNMM1Fx8EMbw47PgC6giZ9tpgl-Q80xXrXh_moHKr7Z3-r5oepJAZ16HY0s3zsEw64eeMNyzdf7JxoS1KQylfZfbCBEuMI0IcjfqenKIiDodfP1mTu8Ay49fnv_w3WQ93SpIYTkyNIQEAi8q34GoCgFMAE; kuaishou.server.web_ph=a0ebf4fd4c2d9b0f34fbdafe46f9e0386f3a",
               "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"}
    pcursor = ''
    userId = url1.split("/")[-1]
    while pcursor != "no_more":
        data = {"operationName": "visionProfilePhotoList",
                "query": "query visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n  visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      type\n      author {\n        id\n        name\n        following\n        headerUrl\n        headerUrls {\n          cdn\n          url\n          __typename\n        }\n        __typename\n      }\n      tags {\n        type\n        name\n        __typename\n      }\n      photo {\n        id\n        duration\n        caption\n        likeCount\n        realLikeCount\n        coverUrl\n        coverUrls {\n          cdn\n          url\n          __typename\n        }\n        photoUrls {\n          cdn\n          url\n          __typename\n        }\n        photoUrl\n        liked\n        timestamp\n        expTag\n        animatedCoverUrl\n        stereoType\n        videoRatio\n        __typename\n      }\n      canAddComment\n      currentPcursor\n      llsid\n      status\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n",
                "variables": {"page": "profile", "pcursor": pcursor, "userId": userId}}
        data = json.dumps(data)
        json1 = requests.post(url, data=data, headers=headers).json()
        pcursor = json1["data"]["visionProfilePhotoList"]["pcursor"]
        feeds = json1['data']['visionProfilePhotoList']['feeds']
        autor = feeds[0]['author']['name']
        autor=check(autor)
        if os.path.exists(f"MP4/{autor}") == False:
            os.mkdir(f"MP4/{autor}")
        else:
                print(f"file：{autor} exited")
                os.startfile(f"MP4\{autor}")
        for feed in feeds:
            url = feed['photo']['photoUrl']
            tittle = feed['photo']['caption']
            tittle = check(tittle)
            with open(f"MP4/{autor}/{tittle}.mp4", "wb")as f:
                 f.write(requests.get(url).content)
            print(autor)
            print(tittle)
            path=f"MP4/{autor}/{tittle}.mp4"
            size=os.path.getsize(path)/ 1024 / 1024 * 10000 // 100 / 100
            print(f"size:{size}MB")
            print("____________________________________________________________________")
def select(url):
    try:
        web=url.split(".com")[0].split(".")[-1]
        if web=="bilibili":
            bi(url)
        elif web=="pearvideo":
            lsp(url)
        elif web=="163":
            wyy(url)
        elif web=="kuaishou":
            if "&"not in url:
                kspg(url)
            else:
                ks(url)
        elif web=="https://weibo":
            wb(url)
        elif web=="zhihu":
            zh(url)
        elif web=="baidu":
            hk(url)
        elif web=="douyin":
            dy(url)
        elif web=="xiaohongshu":
            xhs(url)
        else:
            if url.split(".cn")[0].split(".")[-1]=="acfun":
                An(url)
            else:
                print("未知网址")
    except:
        pass
if __name__ == '__main__':
    while True:
        url=input("url:")
        file()
        select(url)
posted @ 2021-09-14 17:33 吃夜宵喊我阅读(358) 评论(0) 收藏举报
刷新页面返回顶部
视频下载

公告