使用爬虫抓取王者荣耀英雄皮肤

1:创建爬虫项目

scrapy startproject wzry

2:创建爬虫

scrapy  genspider jishudaniu example.com

3:启动爬虫

scrapy crawl jishudaniu

 

# -*- coding: utf-8 -*-
import scrapy
import os
import urllib.request

class JishudaniuSpider(scrapy.Spider):
    name = "jishudaniu"
    #allowed_domains = ["example.com"]

    #爬虫爬取链接的起点
    start_urls = ['https://pvp.qq.com/web201605/herolist.shtml']

    def parse(self, response):
        host_name="https://pvp.qq.com/web201605/"
        hero_list = response.xpath('//div[@class="herolist-box"]/div[@class="herolist-content"]/ul/li/a');#// 表示HTML网页结构中任意部位
        for link in hero_list:
            href=link.xpath('./@href').extract()[0]  #./表示当前
            detial_url=host_name+href
            yield scrapy.Request(detial_url, self.detial_parse)
            #print(href)


    def detial_parse(self,response):
        message=response.xpath('/html/body/script[10]/text()').extract()[0];
        heroName = message.split(",")[0].replace("'", "").split(" = ")[1]
        heroNo = message.split(",")[1].replace("'", "").replace(";", "").split(" = ")[1].strip()
        #print(message.split(",")[0].replace("'", "").split("=")[1]);
        #print(message.split(",")[1].replace("'", "").replace(";", "").split(" = ")[1]);
        heroSkinLinksTemplate = f"https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{heroNo}/{heroNo}-bigskin-"

        filePath = "E:\\wzryimg\\"
        if not os.path.exists(filePath + heroName):
            os.makedirs(filePath + heroName)

        skins = response.xpath('//div[@class="pic-pf"]/ul/@data-imgname').extract()[0]
        skin_list = skins.split("|")
        tempSkinList = []
        for skin in skin_list:
            tempSkinList.append(skin.split("&")[0])
        for index in range(0,len(tempSkinList)):
            #获取皮肤名称 os.sep:分割符
            skinname=tempSkinList[index]
            fileName="{}{}{}{}".format(filePath + heroName,os.sep,skinname,".jpg")
            print(heroSkinLinksTemplate)
            urllib.request.urlretrieve(heroSkinLinksTemplate + "{0}.jpg".format(index + 1), filename=fileName)

  

posted @ 2020-12-13 23:18  小小强学习网  阅读(364)  评论(0编辑  收藏  举报