读书网入库练习
settings.py
DB_HOST = 'localhost'
DB_PORT = 3306
DB_USER = 'root'
DB_PWD = '1234'
DB_NAME = 'guli'
DB_CHARSET = 'utf8'
# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
'readPro.pipelines.ReadproPipeline': 300,
}
pipelines.py
from scrapy.utils.project import get_project_settings
import pymysql
class ReadproPipeline:
def open_spider(self, spider):
settings = get_project_settings()
self.host = settings['DB_HOST']
self.port = settings['DB_PORT']
self.user = settings['DB_USER']
self.pwd = settings['DB_PWD']
self.name = settings['DB_NAME']
self.charset = settings['DB_CHARSET']
print(self.host)
print(self.pwd)
self.connect()
def connect(self):
self.conn = pymysql.connect(host=self.host,
port=self.port,
user=self.user,
password=self.pwd,
db=self.name,
charset=self.charset)
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
sql = 'insert into book(name,src) values("{}","{}")'.format(item['name'],item['src'])
self.cursor.execute(sql)
self.conn.commit()
return item
def close_spider(self, spider):
self.conn.close()
self.cursor.close()
浙公网安备 33010602011771号