scrapy框架爬取17173游戏

1.任务:

scrapy框架爬取17173游戏排行榜,内容包括游戏名称、票数,并保存

2.爬虫代码 -- a17173.py

 

 1 import scrapy
 2 import json
 3 
 4 class A17173Spider(scrapy.Spider):
 5     name = '17173'
 6     allowed_domains = ['17173.com']
 7     start_urls = ['http://top.17173.com/default-index.html?oper_status=1&game_frame=0&game_type=0&game_theme=0&game_feature=0&page={}'.format(num) for num in range(1,6)]
 8     def parse(self, response):
 9         newgame_ranks = response.xpath('//div[@class="main-c1"]//div[@class="c1"]/em/text()').extract()
10         newgame_names = response.xpath('//div[@class="main-c1"]//div[@class="con"]/a/text()').extract()
11         newgame_votess = [x.strip() for x in response.xpath('//div[@class="main-c1"]//div[@class="item-in"]/div[@class="c3"]/text()').extract()]
12         hotgame_ranks = response.xpath('//div[@class="main-c2"]//div[@class="c1"]/em/text()').extract()
13         hotgame_names = response.xpath('//div[@class="main-c2"]//div[@class="con"]/a/text()').extract()
14         hotgame_votess = [x.strip() for x in response.xpath('//div[@class="main-c2"]//div[@class="item-in"]/div[@class="c3"]/text()').extract()]
15 
16         for newgame_rank, newgame_name, newgame_votes, hotgame_rank, hotgame_name, hotgame_votes in zip(newgame_ranks, newgame_names, newgame_votess, hotgame_ranks, hotgame_names, hotgame_votess):
17             yield {
18                 'newgame_rank' : newgame_rank,
19                 'newgame_name': newgame_name,
20                 'newgame_votes': newgame_votes,
21                 'hotgame_rank': hotgame_rank,
22                 'hotgame_name': hotgame_name,
23                 'hotgame_votes': hotgame_votes
24             }
25         print(type(hotgame_votess))

 

3.pipelines.py

 1 # Define your item pipelines here
 2 #
 3 # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 4 # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 5 
 6 
 7 # useful for handling different item types with a single interface
 8 from itemadapter import ItemAdapter
 9 
10 
11 class GamePipeline:
12     def open_spider(self,spider):
13         self.filename1 = open('新游期待榜.txt','w',encoding='utf-8')
14         self.filename2 = open('热门游戏榜.txt','w',encoding='utf-8')
15 
16 
17     def process_item(self, item, spider):
18         info1 = item['newgame_rank'] + '\t' + item['newgame_name'] + '\t' + item['newgame_votes'] + '\n'
19         self.filename1.write(info1)
20         self.filename1.flush()
21 
22         info2 = item['hotgame_rank'] + '\t' + item['hotgame_name'] + '\t' + item['hotgame_votes'] + '\n'
23         self.filename2.write(info2)
24         self.filename2.flush()
25 
26         return item
27 
28 
29     def close_spider(self,spider):
30         self.filename1.close()
31         self.filename2.close()

4.结果

 

 

 

posted @ 2021-03-10 01:26  简单de人  阅读(114)  评论(0)    收藏  举报