import re
import requests
from lxml import etree
from requests.exceptions import RequestException
def get_one_page(url):
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}
response = requests.get(url, headers=headers, verify=False)
if response.status_code == 200:
return response.text
return None
except RequestException:
return None
def process_movie(html):
dom = etree.HTML(html)
movie_urls = dom.xpath('//h4[@class="video-name one-line"]/a[@href]/@href')
movie_names = dom.xpath('//h4[@class="video-name one-line"]/a/text()')
for movie_url, movie_name in zip(movie_urls, movie_names):
movie_id_string = requests.get(movie_url).text
movie_mp4_url = re.search('source src="(.*)"', movie_id_string).group(1)
movie_result = requests.get(movie_mp4_url).content
with open('%s.mp4' % movie_name, 'wb') as f:
print('正在下载{}'.format(movie_name))
f.write(movie_result)
def main():
url = 'https://maoyan.com/news?showTab=3'
html = get_one_page(url)
process_movie(html)
if __name__ == '__main__':
main()