python elasticsearch 深度分页——scroll的使用与清除(clear_scroll)
网上的大部教程都讲到了elasticsearch使用scroll游标的方法,但使用后往往没有清除游标,这会造成scroll超过最大数量的限制而报错,应该在任务结束时去手动清理scroll(否则只能等到设定的时间后游标才会自动清理)
from elasticsearch import Elasticsearch
def main():
es = Elasticsearch([***], http_auth = ('***', '****'), port = *** )
query = ***
page = es.search(
index= ** *,
scroll = '2m',
size = 1000,
body = {"query": query})
sid = page['_scroll_id']
sid_list = [sid]
scroll_size_max = page['hits']['total']['value']
cnt = 0
while cnt < scroll_size_max:
for info in page['hits']['hits']:
# do something
cnt += 1
page = es.scroll(scroll_id=sid, scroll='2m')
sid = page['_scroll_id']
sid_list.append(sid)
for sid_del in sid_list:
es.clear_scroll(scroll_id=sid_del)
if __name__ == "__main__":
main()

浙公网安备 33010602011771号