实战爬取某网站图片-Python

直接上代码

 1 #!/usr/bin/python
 2 # -*- coding: UTF-8 -*-
 3 from bs4 import BeautifulSoup
 4 import requests
 5 import sys
 6 import os
 7 
 8 #获取图集id
 9 try:
10     headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.55"}
11     html = requests.get('https://www.tujigu.com/', headers=headers)
12     html.encoding = 'utf-8-sig'
13     soup = BeautifulSoup(html.text,"html.parser").find_all('p',class_='biaoti')
14     dict = {}
15     for i in soup:
16         id_url = i.find('a')['href']
17         id = id_url[25:len(id_url)-1]
18         name_str = i.find('a').contents[0]
19         name = name_str.replace(" ", "")
20         dict.update({id:name})
21 except:
22     print("获取id和name失败")
23     sys.exit()
24 
25 
26 for id,name in dict.items():
27     #创建目录
28     path= r"C:\Users\ATRAY\Documents\Downloads\{}".format(name)
29     folder = os.path.exists(path)
30     if not folder:                   
31         os.makedirs(path)            
32     else:
33         print ("创建目录失败")
34     #下载图片
35     num = 1
36     while True:
37         picture_url="https://tjg.gzhuibei.com/a/1/{}/{}.jpg".format(id,num)
38         picture=requests.get(picture_url,headers=headers)
39         if picture.status_code == 200:
40             with open(path + "\{}.jpg".format(num),"wb") as code:
41                 code.write(picture.content)
42             num = num + 1
43         else:
44             print("以下载{}写真{}张".format(name,num - 1))
45             break

 

posted @ 2021-07-25 17:11  ATRAY  阅读(282)  评论(0)    收藏  举报
站长工具: