1 import requests
2 import time
3 from lxml import etree
4 import os
5
6 headers = {
7 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
8 'Cookie': '_xsrf=2|564c23e9|a46b8cac21ef08bf52221f1b840f5462|1565579147; Hm_lvt_2670efbdd59c7e3ed3749b458cafaa37=1565579182; BAIDU_SSP_lcr=https://cn.bing.com/; _qqq_uuid_="2|1:0|10:1565579148|10:_qqq_uuid_|56:NGVlZTIxMjdiYjYxMTQ5ZTU2NDk2YjhkMmM1ODM2Njg2N2I0Y2JjYw==|520099f3c590773f531a9c5f1eb1d82adba8893e730d3b508c3045904e1cfd9c"; _ga=GA1.2.349374621.1565579182; _gid=GA1.2.361209414.1565579182; Hm_lpvt_2670efbdd59c7e3ed3749b458cafaa37=1565580312'
9 }
10 def handle_request(url,page):
11 #由于首页的构建方式和后续界面不一致,所以需要进行判断
12 if page==1:
13 url=url
14 else:
15 url=url+'index_'+str(page)+'.html'
16 #print(url)
17 request = requests.get(url=url, headers=headers)
18 #print(request.content)
19 return request.text
20
21 #分析界面,生成下载的界面的链接,并且返回
22 def parse_content(content,file_down):
23 #建造对象
24 tree=etree.HTML(content)
25 image_list=tree.xpath('//*[@id="main"]/div[3]/ul/li/a/@href')
26 #懒加载技术
27 #print(image_list)
28 #print(len(image_list))
29 for image_src in image_list:
30 image_src='http://pic.netbian.com'+image_src
31 parse_secondcontent(image_src,file_down)
32
33 def parse_secondcontent(image_src,file_down):
34 request=requests.get(url=image_src,headers=headers)
35 tree=etree.HTML(request.text)
36 url_list=tree.xpath('//*[@id="img"]/img/@src')
37 for url in url_list:
38 image_url='http://pic.netbian.com'+url
39 download_image(image_url,file_down)
40
41 def download_image(image_url,file_down):
42 print('开始下载图片---请稍后')
43 dirpath = file_down.replace('\\','/')
44 # 创建文件夹
45 if not os.path.exists(dirpath):
46 os.mkdir(dirpath)
47 # 创建文件名(返回文件path最后的的名称作为文件名)
48 filename = os.path.basename(image_url)
49 # 搞个文件路径(生成一个路径)
50 filepath = os.path.join(dirpath, filename)
51 # 发送请求,保存图片
52 request = requests.get(url=image_url,headers=headers)
53 with open(filepath, 'wb') as fp:
54 fp.write(request.content)
55 print('图片下载结束')
56
57 def main():
58 number=int(input('请输入分类的代码:1.4k风景 2.4k美女 3.4k游戏 4.4k动漫 5.'))
59 if number ==1:
60 url='http://pic.netbian.com/4kfengjing/'
61 elif number ==2:
62 url='http://pic.netbian.com/4kmeinv/'
63 elif number ==3:
64 url='http://pic.netbian.com/4kyouxi/'
65 elif number==4:
66 url='http://pic.netbian.com/4kdongman/'
67 else:
68 url='http://pic.netbian.com/'
69 file_down = input('请输入本地图片下载地址')
70 start_page = int(input('请输入你的起始页面'))
71 end_page = int(input('请输入你的终止界面'))
72 for page in range(start_page,end_page+1):
73 print('开始下载%s页的内容'%page)
74 #根据url和page去生成指定的请求request
75 request=handle_request(url,page)
76 parse_content(request,file_down)
77 print('第%s页的图片下载完成'%page)
78 time.sleep(3)
79 print('谢谢你的使用')
80
81 if __name__ == '__main__':
82 main()