去游标
去游标
mongo
游标机制:
在遍历全表、集合的情况下,当表、集合本身在增量时,游
刚开始是一致的,后续,就有效增量为0
1094295 / 1300000 ---- {'_id': ObjectId('5b03c2a99341f521755dd7c1')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:47
1094295 / 1305000 ---- {'_id': ObjectId('5b03cc479341f521755deb49')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:49
1094295 / 1310000 ---- {'_id': ObjectId('5b03d61a9341f521755dfed1')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:50
1094295 / 1315000 ---- {'_id': ObjectId('5b03d6249341f521755e1259')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:52
1094295 / 1320000 ---- {'_id': ObjectId('5b03e79d9341f521755e25e1')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:54
1094295 / 1325000 ---- {'_id': ObjectId('5b05107d9341f521755e3969')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:55
from ProjectUtil.usingModuleTOMODIFY import getNow, mysql_write, mysql_fetch, time, randomSleep, return_logging
import os, random
from pymongo import MongoClient
'''
logging INIT
'''
this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os.sep)[
-1]
f_log = '{}{}{}'.format(time.strftime('%Y%m%d', time.localtime(time.time())),
this_file_name, '.log')
logging = return_logging(f_log)
# 打开mongo连接
host, username, password = '10.14.14.52', 'ain', 'adm'
uri = "mongodb://%s:%s@%s" % (username, password, host,)
mongo_ask_id_f = 'mongo_ask_id.txt'
c = 0
start_ = getNow()
os.remove(mongo_ask_id_f)
id_l = []
while True:
try:
mongo_client = MongoClient(uri)
db = mongo_client.superpub
c_ask = db.ask
# c_similar_keyword = db.similar_keyword
cursor = c_ask.find({}, {'_id': 1})
while cursor.alive:
for doc in cursor:
c += 1
# 猜测游标机制
# 使得游标下移速度不小于数据增长速度(该表只增)
if c % 100 == 0:
# print(c,'猜测游标机制')
continue
id_ = doc['_id']
with open(mongo_ask_id_f, 'a', encoding='utf-8') as fa:
s = '{}\n'.format(id_)
fa.write(s)
if c % 1000 == 0:
print('----------------------------',c)
with open(mongo_ask_id_f, 'r', encoding='utf-8') as fr:
l = [i.replace('\n', '') for i in fr]
print(len(set(l)), '/', c, '----', doc)
del l
s = '{}{}{}{}\n'.format('start:', start_, 'now:', getNow())
print(s)
except Exception as e:
print(e)
try:
mongo_client.close()
except Exception as e:
print(e)

浙公网安备 33010602011771号