import pymongo
import requests
import random
import time
import pymysql
db = pymongo.MongoClient()['cs']['dn']
db1 = pymysql.connect(user='root',password='root',db='cs',charset='utf8')
cursor = db1.cursor()
class CsdnPipeline(object):
def __init__(self):
self.set = set()
def process_item(self, item, spider):
if item not in self.set:
title = item['title']
content_text = item['content_text']
create_time_datetime = item['create_time_datetime']
nickName = item['nickName']
read_count = item['read_count']
content_img = item['content_img']
keyword = item['keyword']
if len(content_img)>0:
path = []
for img in content_img:
img_name = 'F:\\34\\tu\\'+str(time.time()).split('.')[1]+str(random.randrange(1,9999999999999999999999999))+'.jpg'
img_source = requests.get(img).content
op = open(img_name,'wb')
op.write(img_source)
op.close()
path.append(img_name)
item['content_img'] = path
else:
item['content_img'] = '暂无图片'
db.insert(dict(item))
import json
data = json.dumps(dict(item))
sql = "insert into dn1(`data`) VALUES ('{}')".format(data)
cursor.execute(sql)
db1.commit()
self.set.add(item)
return item
else:
print('已经存在')
return item