scrapy 框架持久化存储的三个方法存入 mysql 文件 redis

这里就不做详细讲解了毕竟不是一句两句能说的清楚,所以我把代码和注释放到了这里谢谢!

import pymysql
from redis import Redis
# 写入mysql
class WangyiMysql(object):
    conn = None
    cursor = None
    def open_spider(self,spider):
        self.conn = pymysql.Connection(host='127.0.0.1',port=3306,user='root',password='',db='spider',charset='utf8')
        print(self.conn)
    def process_item(self, item, spider):
        sql = 'insert into news values ("%s","%s")'%(item['title'],item['content'])
        self.cursor = self.conn.cursor()

        try:
            self.cursor.execute(sql)
            self.conn.commit()
        except Exception as e:
            print(e)
            self.conn.rollback()
        return item

    def close_spider(self,spider):
        self.cursor.close()
        self.conn.close()

# 写入redis
class WangyiRedis(object):
    conn = None
    def open_spider(self,spider):
        self.conn = Redis(host='127.0.0.1',port=6379)
        print(self.conn)

    def process_item(self, item, spider):
        self.conn.lpush('news',item)

# 写入文件
class ChoutiproPipeline(object):
    #重写父类方法，该方法只会被执行一次
    fp = None
    def open_spider(self,spider):
        print('开始爬虫......')
        self.fp = open('chouti.txt','w',encoding='utf-8')

    #该方法调用后就可以接受爬虫类提交过来的item对象，且赋值给了item参数
    def process_item(self, item, spider):
        author = item['title']
        content = item['content']
        self.fp.write(author+':'+content+'\n')

        return item #将item传递给下一个即将被执行的管道类

    def close_spider(self,spider):
        # print('爬虫结束！')
        self.fp.close()

　　如果你要这三项同时执行的话,记得在setings 里面配置一下这个

    ITEM_PIPELINES = {
    'first_blod.pipelines.FirstBlodPipeline': 300,
    'first_blod.pipelines.MysqlPileLine': 301,
　　　# 记得写类名,哪个执行写哪个
}

posted @ 2019-10-15 17:50 koala_dz 阅读(440) 评论(0) 收藏举报

刷新页面返回顶部

scrapy 框架持久化存储的三个方法 存入 mysql 文件 redis

scrapy 框架持久化存储的三个方法存入 mysql 文件 redis