from lxml import etree
import re
from multiprocessing.dummy import Pool
'下载梨视频生活栏目中最热的视频'
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
# 访问生活栏目
html_live = requests.get(url='https://www.pearvideo.com/category_5', headers=headers).text
# 拿到生活栏目最热视频的名字和视频地址
response = etree.HTML(html_live)
list_li = response.xpath('//ul[@id="listvideoListUl"]/li')
ls = []
for li in list_li:
name = li.xpath('./div/a/div[@class="vervideo-title"]/text()')[0] + '.mp4'
url = 'https://www.pearvideo.com/' + li.xpath('./div/a/@href')[0]
dic = {
'url': url,
'name': name
}
ls.append(dic)
# 访问视频地址下载视频
def get_video(dic):
url = dic["url"]
name = dic["name"]
print(name + '正在下载........')
page_data = requests.get(url=url, headers=headers).text
video_url = re.findall('srcUrl=(.*?),', page_data)[0].replace('"', '')
video = requests.get(url=video_url, headers=headers).content
# 把视频数据写入到本地
with open(name, 'wb')as fp:
fp.write(video)
print(name + '下载完成')
pool = Pool(4)
pool.map(get_video, ls)
# 等待子线程结束后关闭线程池
pool.close()
pool.join()