# -*- coding: utf-8 -*-
"""
Created on Fri Aug 28 17:21:10 2020
@author: Mto
"""
"""
网址:http://www.yhdm.tv/
目的
获取视频文件
8月28日,代码功能基本实现
"""
import requests
import re
from bs4 import BeautifulSoup
def getHTML(url):
"""访问网站获取页面,返回页面"""
header = {
'Host':'www.yhdm.tv',
'User-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0'
}
r = requests.get(url,headers=header)
r.encoding = 'utf-8'
return r
def GetMp4HTML(url):
"""访问网站获取页面,返回页面"""
header = {
'User-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0'
}
r = requests.get(url,headers=header)
r.encoding = 'utf-8'
print(r.status_code)
return r
def processHTML(r):
"""使用bs4进行处理"""
soup = BeautifulSoup(r.text,'html.parser')
return soup
def GetLinksAndTitle(url1,title,links):
r = getHTML(url1)
soup = processHTML(r)
print('要下载的动漫为:'+soup.h1.string)
a = soup.findAll(style="display:block")
href = a[0].find_all('a')
for i in range(0,len(href)):
title.append(href[i].text)
links.append('http://www.yhdm.tv' + href[i]['href'])
def getmp4(link):
"""提取页面中的视频文件链接"""
r = getHTML(link)
soup = processHTML(r)
s = soup.select('div#playbox')
geturl = re.compile('^https.*\.mp4')
mo = geturl.search(s[0].attrs['data-vid'])
return(str(mo.group()))
#download(str(mo.group()), title)
def download(mp4link,title):
"""下载视频"""
#r = GetMp4HTML(mp4link)
print(mp4link)
print(title+'模拟访问成功,不下了,放过那个可怜的服务器吧')
# try:
# with open(title+'.mp4','wb') as f:
# f.write(r.content)
# print(title+'下载成功')
# except:
# print(title+'下载失败')
def main():
title = []
links=[]
url = 'http://www.yhdm.tv/show/4790.html'
GetLinksAndTitle(url, title, links)
for i in range(0,3):
mp4link = getmp4(links[i])
download(mp4link, title[i])
main()