1 # -*- coding: utf-8 -*-
2 import scrapy
3 from pyquery import PyQuery as pq
4
5 from zolphone.items import ZolphoneItem
6
7
8 class PhoneSpider(scrapy.Spider):
9 name = "phone"
10 # allowed_domains = ["www.zol.com.cn"]
11 # start_url = 'http://detail.zol.com.cn/cell_phone_index/subcate57_0_list_1_0_1_1_0_1.html'
12 start_url = 'http://detail.zol.com.cn/cell_phone_index/subcate57_0_list_1_0_1_1_0_'
13
14 def start_requests(self):
15
16 for page in range(1, 209):
17 url = self.start_url + str(page) + '.html'
18 yield scrapy.Request(url,callback=self.parse_index)
19
20
21 def parse_index(self, response):
22 base_url = 'http://detail.zol.com.cn'
23 doc = pq(response.text)
24 lis = doc('.list-box .list-item').items()
25 for result in lis:
26 detail_url = base_url + result.find('.pro-intro h3 a').attr('href')
27 yield scrapy.Request(url=detail_url, callback=self.parse_detail)
28
29 def parse_detail(self,response):
30 doc = pq(response.text)
31 title1 = response.css('.page-title h1::text').extract_first()
32 title2 = doc('.page-title h2').text()
33 price = doc('.product-price .price-type').text()
34 release_time = doc('.section div h3 .showdate').text()
35 print(title1, title2, price, release_time)
36 item = ZolphoneItem()
37 item['title1'] = title1
38 item['title2'] = title2
39 item['price'] = price
40 item['release_time'] = release_time
41
42 yield item
1 import scrapy
2
3
4 class ZolphoneItem(scrapy.Item):
5 # define the fields for your item here like:
6 # name = scrapy.Field()
7 title1 = scrapy.Field()
8 title2 = scrapy.Field()
9 price = scrapy.Field()
10 release_time = scrapy.Field()