Mongodb bulk_write & UpdateOne

 

Using the bulk_write can speed the mongo up, compared with the one by one update or insert.

 1 import pymongo, time, psutil
 2 from concurrent.futures import ProcessPoolExecutor
 3 from pymongo.operations import UpdateOne
 4 
 5 
 6 tic = time.time()
 7 mongoclient = pymongo.MongoClient(host="mongodb3.xxx.com", port=27017)
 8 MongoDB = mongoclient["xxx"]
 9 collection = MongoDB.resume_test
10 document = {
11     "Duplicate": '',
12     "Skill": '',
13     "SourceURL": '',
14 }
15 document = set(document)
16 
17 def bulk_write(requests, collection, last_one=False):
18     if len(requests) > 10000 or last_one:
19         collection.bulk_write(requests)
20         return []
21     else:
22         return requests
23 
24 
25 requests = []
26 for index, data in enumerate(collection.find({})):
27     if index % 10000 == 0:
28         print(index)
29 
30     id = data['_id']
31     data = set(data)
32     add = {el: '' for el in document.difference(data)}
33     if add:
34         requests.append(UpdateOne({'_id': id}, {'$set': add}))
35         requests = bulk_write(requests, collection)
36 requests = bulk_write(requests, collection, last_one=True)
37 
38 toc = time.time()
39 print(f'finished, time cost: {toc - tic}')

 

posted @ 2019-08-22 16:37  NachoLau  阅读(869)  评论(0编辑  收藏  举报