Scrapy学习-7-数据存储至数据库

使用MySQL数据库存储
安装mysql模块包
pip install mysqlclient

 

相关库文件
sudo apt-get install libmysqlclient-devel

sudo apt-get install python-devel mysql-devel

 

阻塞型的数据写入操作
class MysqlPipeline(object):

    def __init__(self):
        self.conn = pymysql.connect('192.168.1.1', 'root', '123456', 'titlespider',
                                    charset='utf-8', use_unicode=True)
        self.cursor = self.conn.cursor()

    def process_item(self, item, spider):
        insert_sql = """
            insert into article(title, cteate_time, url, content)
            VALUES (%s, %s, %s, %s)
        """
        self.cursor.execute(insert_sql, (item['title'], item['cteate_time'], item['url'], item['content']))
        self.conn.commit()
        return item

 

使用twisted提供的数据库连接池,异步化写入,缓解写数据操作堵塞
# 首先在settings定义数据库关键字变量

    MYSQL_HOST = '192.168.1.1'
    MYSQL_USER = 'root'
    MYSQL_PASSWD = '123456'
    MYSQL_DB = 'articlespider'

# 然后编写Pipeline类

    from twisted.enterprise import adbapi
    import pymysql
    import pymysql.cursors      

    class MysqlTwistedPipeline(object):

        def __init__(self, dbpool):
            self.dbpool = dbpool

        @classmethod
        def from_settings(cls, settings):
            conn_dict = dict(
                host = settings['MYSQL_HOST'],
                user = settings['MYSQL_USER'],
                passwd = settings['MYSQL_PASSWD'],
                database = settings['MYSQL_DB'],
                charset = 'utf-8',
                cursorclass = pymysql.cursors.DictCursor,
                use_unicode = True
            )
            dbpool = adbapi.ConnectionPool('PyMySQL', **conn_dict)
            return cls(dbpool)

        def process_item(self, item, spider):
            query = self.dbpool.runInteraction(self.do_insert, item)
            query.addErrorback(self.handle_error)
            return item

        def do_insert(self, item):
            insert_sql = """
                        insert into article(title, cteate_time, url, content)
                        VALUES (%s, %s, %s, %s)
                    """
            self.cursor.execute(insert_sql, (item['title'], item['cteate_time'], item['url'], item['content']))
            self.conn.commit()

        def handle_error(self, failure):
            print(failure)

 

使用类似django-model的方式写入数据库
https://github.com/scrapy-plugins/scrapy-djangoitem
 
 
posted @ 2018-05-17 21:16  前路~  阅读(174)  评论(0编辑  收藏  举报