[scrapy] spider object has no attribute '_rules'

这是因为__init__方法没有继承父类

解决办法:

# -*- coding:utf-8 -*-
from selenium import webdriver
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors import LinkExtractor
from sina_comment.items import SinaCommentItem
import re

class MySpider(CrawlSpider):

	name = "sina"
	#设置下载延时
	download_delay = 2
	allowed_domains = ['ent.sina.com.cn']
	#第一篇文章地址
	start_urls = ['http://ent.sina.com.cn/']

	rules = [
	    Rule(LinkExtractor(allow=('http://ent.sina.com.cn/.*shtml')), callback='parse_item'),
	]
	def __init__(self,*args, **kwargs):
		super(MySpider, self).__init__(*args, **kwargs)  # 这里是关键
		self.driver = webdriver.Chrome()
	def parse_item(self, response):
	    self.log('Hi, this is an article page! %s' % response.url)
	    try:
	    	self.driver

  

posted @ 2015-05-19 15:29  捕蛇者说  阅读(2305)  评论(0编辑  收藏  举报