案例-爬取爱斗图图片:xpath
网址: https://aidotu.com/search/0-0-0-1.html
xpath代码:
import requests
import json
from lxml import etree
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"}
url = 'https://aidotu.com/search/0-0-0-1.html'
def down_img(file_name,url):
img_resp = requests.get(url, headers = headers)
with open(file_name, mode="wb") as f:
f.write(img_resp.content)
def get_info(url):
resp = requests.get(url, headers=headers)
resp.encoding = 'UTF-8'
html_tree = etree.HTML(resp.text)
data = html_tree.xpath('//div[@class="main"]/div/div/div[1]/div[2]/div/div')
for item in data:
item_name = item.xpath('./a/@title')[0]
img_url = item.xpath('./a/img/@src')[0]
full_url = "https:{}".format(img_url)
url_suffix = full_url.split('.')[-1]
file_name = '{}.{}'.format(item_name, url_suffix)
#print("名字==>", file_name)
#print("img_url==>", full_url)
down_img(file_name,full_url)
# 分页
for item in range(1,224):
full_url = "https://aidotu.com/search/0-0-0-{}.html".format(item)
get_info(full_url)
浙公网安备 33010602011771号