实战爬取某网站图片-Python
直接上代码
1 #!/usr/bin/python 2 # -*- coding: UTF-8 -*- 3 from bs4 import BeautifulSoup 4 import requests 5 import sys 6 import os 7 8 #获取图集id 9 try: 10 headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.55"} 11 html = requests.get('https://www.tujigu.com/', headers=headers) 12 html.encoding = 'utf-8-sig' 13 soup = BeautifulSoup(html.text,"html.parser").find_all('p',class_='biaoti') 14 dict = {} 15 for i in soup: 16 id_url = i.find('a')['href'] 17 id = id_url[25:len(id_url)-1] 18 name_str = i.find('a').contents[0] 19 name = name_str.replace(" ", "") 20 dict.update({id:name}) 21 except: 22 print("获取id和name失败") 23 sys.exit() 24 25 26 for id,name in dict.items(): 27 #创建目录 28 path= r"C:\Users\ATRAY\Documents\Downloads\{}".format(name) 29 folder = os.path.exists(path) 30 if not folder: 31 os.makedirs(path) 32 else: 33 print ("创建目录失败") 34 #下载图片 35 num = 1 36 while True: 37 picture_url="https://tjg.gzhuibei.com/a/1/{}/{}.jpg".format(id,num) 38 picture=requests.get(picture_url,headers=headers) 39 if picture.status_code == 200: 40 with open(path + "\{}.jpg".format(num),"wb") as code: 41 code.write(picture.content) 42 num = num + 1 43 else: 44 print("以下载{}写真{}张".format(name,num - 1)) 45 break

浙公网安备 33010602011771号