import pymongo
class SpiderPipeline(object):
def __init__(self, mongo_uri, mongo_db):
# mongodb的路由
self.mongo_uri = mongo_uri
# mongodb的库名
self.mongo_db = mongo_db
def open_spider(self, spider):
# spider开始时被调用
self.client = pymongo.MongoClient(self.mongo_uri)
# db
self.db = self.client[self.mongo_db]
@classmethod
def from_crawler(cls, crawler):
# 获取配置文件
return cls(
mongo_uri=crawler.settings.get('MONGO_URI'),
mongo_db=crawler.settings.get('MONGO_DB'),
)
def process_item(self, item, spider):
# 与数据库交互存储数据
self.db['dunzi'].insert(dict(item))
return item
def close_spider(self, spider):
# spider关闭时被调用
self.client.close()