1 import re,os
2 from multiprocessing.dummy import Pool
3 headers = {
4 "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
5 }
6
7 def load_video(dic):
8 print(dic["name"],"开始下载")
9 data = requests.get(url=dic["url"],headers=headers).content
10 with open("./梨视频/"+dic["name"],"wb") as f:
11 f.write(data)
12 print(dic['name'],"下载完成!")
13
14 if __name__ == '__main__':
15 if not os.path.exists("./梨视频"):
16 os.mkdir("./梨视频")
17 url = "https://www.pearvideo.com/category_5"
18 page_text = requests.get(url=url,headers=headers).text
19 tree = etree.HTML(page_text)
20 li_list = tree.xpath('//*[@id="listvideoListUl"]/li')[:-1]
21 video_list = []
22 for li in li_list:
23 href = 'https://www.pearvideo.com/'+li.xpath('./div/a/@href')[0]
24 name = li.xpath('./div/a/div[2]/text()')[0] + ".mp4"
25 p_text = requests.get(url=href,headers=headers).text
26 ex = 'srcUrl="(.*?)",vdoUrl=srcUrl'
27 video = re.findall(ex,p_text)[0]
28 dic = {
29 "name":name,
30 "url":video,
31 }
32 video_list.append(dic)
33 pool = Pool(3)
34 pool.map(load_video,video_list)