[scrapy] spider object has no attribute '_rules'
这是因为__init__方法没有继承父类
解决办法:
# -*- coding:utf-8 -*-
from selenium import webdriver
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors import LinkExtractor
from sina_comment.items import SinaCommentItem
import re
class MySpider(CrawlSpider):
name = "sina"
#设置下载延时
download_delay = 2
allowed_domains = ['ent.sina.com.cn']
#第一篇文章地址
start_urls = ['http://ent.sina.com.cn/']
rules = [
Rule(LinkExtractor(allow=('http://ent.sina.com.cn/.*shtml')), callback='parse_item'),
]
def __init__(self,*args, **kwargs):
super(MySpider, self).__init__(*args, **kwargs) # 这里是关键
self.driver = webdriver.Chrome()
def parse_item(self, response):
self.log('Hi, this is an article page! %s' % response.url)
try:
self.driver
浙公网安备 33010602011771号