from boss.items import BossItem
class ZhiPinSpider(CrwalSpider):
name='Zhipin'
allwed_domains=['zhipin.com']
start_urls=['https://www.zhipin.com/c100010000/?query=python&page=1']
rules={
#匹配职位列表页的规则
Rule(LinkExtractor(allow=r'.+\?query=python&page=\d'),follow=True)
#匹配职位详情页的规则
Rule(LinkExtractor(allow=r'.+\?query=python&page=\d'),callback="parse_job",follow=False)
def parse_job(self,response):
title=response.xpath('//h1[@class="name"]/text()').get().strip()
company=response.xpath('//div[@class="info-company"]//a/text()').get()
item=BossItem(title=title,company=company)
yield item