import requests
import re
import os
from lxml import etree
def get_index():
url = 'https://pvp.qq.com/web201605/herolist.shtml'
response = requests.get(url)
if response.status_code == 200:
return response.content.decode('gbk')
else:
return None
def parse_index(html):
selector = etree.HTML(html)
links = selector.xpath('//*[@class="herolist clearfix"]/li/a/@href')
links = ['https://pvp.qq.com/web201605/' + link for link in links]
return links
def parse_deatil(url):
response = requests.get(url)
html = response.content.decode('gbk')
name = re.findall(r'<h2 class="cover-name">(.*?)</h2>', html, re.S)
id = re.findall(r'<span class="hidden">(\d+)</span>', html, re.S)
skin_name_str = re.findall(r'<ul class="pic-pf-list pic-pf-list3" data-imgname="(.*?)">', html, re.S)
skin_name_list = skin_name_str[0].split('|')
skin_name_list = [name.split('&')[0] for name in skin_name_list]
dir_name = '王者荣耀皮肤'
# 保存目录
if not os.path.exists(dir_name):
os.mkdir(dir_name)
for i, skin_name in enumerate(skin_name_list):
skin_url = 'http://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{}/{}-bigskin-{}.jpg'.format(id[0], id[0], i+1)
file_name = '{}.jpg'.format(skin_name)
res = requests.get(skin_url)
if not os.path.exists(dir_name + '/' + name[0]):
os.mkdir(dir_name + '/' + name[0])
with open(dir_name + '/' + name[0] + '/' + file_name, 'wb') as f:
f.write(res.content)
print('正在爬取:{}{}'.format(name[0], skin_name_list))
if __name__ == '__main__':
html = get_index()
for link in parse_index(html):
parse_deatil(link)