1 import time
2 import requests
3 from bs4 import BeautifulSoup
4
5 index = 1
6 num = 1
7 while index <= 27:
8 header = {
9 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
10 "Chrome/60.0.3100.0 Safari/537.36 "
11 }
12 url = 'https://desk.3gbizhi.com/deskDM/index_{}.html'.format(index)
13 resp = requests.get(url, headers=header)
14 # bs4解析页面
15 page = BeautifulSoup(resp.text, 'html.parser')
16 pageList = page.find_all('a', style="display: block;", target="_blank")
17 armUrl = set([])
18 for i in pageList:
19 armUrl.add(i.get('href'))
20
21 # 最终页面发起请求
22 hrefList = set([])
23 for url in armUrl:
24 resp = requests.get(url, headers=header)
25 time.sleep(0.001)
26 # bs4解析页面
27 page = BeautifulSoup(resp.text, 'html.parser')
28 src = page.find('div', class_="morew").find('a').get('href')
29 # 下载图片
30 imgResp = requests.get(src)
31 imgName = src.split("/")[-1]
32 with open("cartoonImg/" + imgName, mode="wb") as f:
33 f.write(imgResp.content)
34 print("图片下载中------------------" + str(num))
35 num = num+1
36 f.close()
37 index = index + 1
38 print('\n下载完成!')