使用爬虫抓取王者荣耀英雄皮肤
1:创建爬虫项目
scrapy startproject wzry
2:创建爬虫
scrapy genspider jishudaniu example.com
3:启动爬虫
scrapy crawl jishudaniu
# -*- coding: utf-8 -*-
import scrapy
import os
import urllib.request
class JishudaniuSpider(scrapy.Spider):
name = "jishudaniu"
#allowed_domains = ["example.com"]
#爬虫爬取链接的起点
start_urls = ['https://pvp.qq.com/web201605/herolist.shtml']
def parse(self, response):
host_name="https://pvp.qq.com/web201605/"
hero_list = response.xpath('//div[@class="herolist-box"]/div[@class="herolist-content"]/ul/li/a');#// 表示HTML网页结构中任意部位
for link in hero_list:
href=link.xpath('./@href').extract()[0] #./表示当前
detial_url=host_name+href
yield scrapy.Request(detial_url, self.detial_parse)
#print(href)
def detial_parse(self,response):
message=response.xpath('/html/body/script[10]/text()').extract()[0];
heroName = message.split(",")[0].replace("'", "").split(" = ")[1]
heroNo = message.split(",")[1].replace("'", "").replace(";", "").split(" = ")[1].strip()
#print(message.split(",")[0].replace("'", "").split("=")[1]);
#print(message.split(",")[1].replace("'", "").replace(";", "").split(" = ")[1]);
heroSkinLinksTemplate = f"https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{heroNo}/{heroNo}-bigskin-"
filePath = "E:\\wzryimg\\"
if not os.path.exists(filePath + heroName):
os.makedirs(filePath + heroName)
skins = response.xpath('//div[@class="pic-pf"]/ul/@data-imgname').extract()[0]
skin_list = skins.split("|")
tempSkinList = []
for skin in skin_list:
tempSkinList.append(skin.split("&")[0])
for index in range(0,len(tempSkinList)):
#获取皮肤名称 os.sep:分割符
skinname=tempSkinList[index]
fileName="{}{}{}{}".format(filePath + heroName,os.sep,skinname,".jpg")
print(heroSkinLinksTemplate)
urllib.request.urlretrieve(heroSkinLinksTemplate + "{0}.jpg".format(index + 1), filename=fileName)

浙公网安备 33010602011771号