scrapy 框架简单爬取 4K高清壁纸

import scrapy


class TpSpider(scrapy.Spider):
    name = 'tp'
    # allowed_domains = ['baidu.com']
    # 壁纸网址路径  爬取10页
    start_urls = ['http://pic.netbian.com/4kmeinv/index.html'] + \
                 ['http://pic.netbian.com/4kmeinv/index_%s.html' % page for page in range(2, 11)]
    
    def parse(self, response):
        # 获取普通图片地址
        li_list = response.xpath('//ul[@class="clearfix"]/li')
        for li in li_list:
            img = li.xpath('./a/@href').extract_first()
            imgs = 'http://pic.netbian.com' + img
            yield scrapy.Request(url=imgs, callback=self.imgs_parse)

    def imgs_parse(self, response):
        # 获取4K高清图片
        div_list = response.xpath('//div[@class="photo-pic"]/a')
        for div in div_list:
            name = div.xpath('./img/@alt').extract_first()
            img = div.xpath('./img/@src').extract_first()
            imgs = 'http://pic.netbian.com' + img
            yield scrapy.Request(url=imgs, callback=self.img_parse, meta={'name': name})

    def img_parse(self, response):
        # 下载图片
        name = response.meta['name']
        # 下载图片保存本地
        with open('./imgs/%s.jpg' % name, 'wb')as f:
            f.write(response.body)
        print('正在下载图片：%s' % name)

posted @ 2020-07-15 09:04 小_赵阅读(169) 评论(0) 收藏举报

刷新页面返回顶部

小_赵

scrapy 框架简单 爬取 4K高清 壁纸

公告

scrapy 框架简单爬取 4K高清壁纸