from ProjectUtil.usingModuleTOMODIFY import getNow
from pymongo import MongoClient
# mongo key
host, username, password = '10.14.14.12', 'ain', 'ad'
uri = "mongodb://%s:%s@%s" % (username, password, host,)
q_f_export, q_export = '/data/bigdata/mongoexport/superpub-ask-question.csv', []
q_f_mysql, q_mysql = '/data/bigdata/mongoexport/question.txt', []
q_f_distinct, q_distinct = '{}-distinct'.format(q_f_export), []
MYSQL_max_q_id = 3979647
with open(q_f_distinct, 'r', encoding='utf-8') as fr:
q_distinct = [i.rstrip('\n') for i in fr]
fr.close()
start_ = getNow()
mongo_client = MongoClient(uri)
db = mongo_client.superpub
mongo_collection = db.ask
def get_momgo_res(question):
global mongo_collection
cursor = mongo_collection.find({"question": question}, {'answer': 1})
a = []
while cursor.alive:
for doc in cursor:
this_a = doc['answer']
if this_a not in a:
a.append(this_a)
return a
def w(f, s):
with open(f, 'w', encoding='utf-8') as fw:
fw.write(s)
fw.close()
le_ = len(q_distinct)
c = MYSQL_max_q_id + 1
dir_ = '/data/bigdata/mongoexport/QA/'
for q in q_distinct:
try:
a = get_momgo_res(q)
if len(a) == 0:
continue
else:
f = '{}{}q'.format(dir_, c)
w(f, q)
c += 1
index_ = 0
for i in a:
f = '{}{}a{}'.format(dir_, c, index_)
w(f, i)
index_ += 1
except Exception as e:
print(e)
if c % 10000 == 0:
print(c - MYSQL_max_q_id, '/', le_, start_, ':',
getNow())