scrapy 框架简单 爬取 4K高清 壁纸

import scrapy


class TpSpider(scrapy.Spider):
name = 'tp'
# allowed_domains = ['baidu.com']
# 壁纸网址路径 爬取10页
start_urls = ['http://pic.netbian.com/4kmeinv/index.html'] + \
['http://pic.netbian.com/4kmeinv/index_%s.html' % page for page in range(2, 11)]

def parse(self, response):
# 获取普通图片地址
li_list = response.xpath('//ul[@class="clearfix"]/li')
for li in li_list:
img = li.xpath('./a/@href').extract_first()
imgs = 'http://pic.netbian.com' + img
yield scrapy.Request(url=imgs, callback=self.imgs_parse)

def imgs_parse(self, response):
# 获取4K高清图片
div_list = response.xpath('//div[@class="photo-pic"]/a')
for div in div_list:
name = div.xpath('./img/@alt').extract_first()
img = div.xpath('./img/@src').extract_first()
imgs = 'http://pic.netbian.com' + img
yield scrapy.Request(url=imgs, callback=self.img_parse, meta={'name': name})

def img_parse(self, response):
# 下载图片
name = response.meta['name']
# 下载图片保存本地
with open('./imgs/%s.jpg' % name, 'wb')as f:
f.write(response.body)
print('正在下载图片:%s' % name)
posted @ 2020-07-15 09:04  小_赵  阅读(151)  评论(0编辑  收藏  举报