并发查询ElasticSearch, 根据分片来实现

并发查询ES,根据分片的个数来设置并发

  1. 获取所有的分片
  2. 设置并发
  3. 每个线程都可以使用scroll全量查询分片数据.

直连分片的这种方式有可能会导致ES集群压力增加,只能适用于低频、需要快速导出数据的场景,不能过度依赖.

所用到的依赖:

    <dependencies>
        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>elasticsearch-rest-high-level-client</artifactId>
            <version>7.6.2</version>
        </dependency>
    </dependencies>

代码:


import java.io.IOException;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest;
import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse;
import org.elasticsearch.action.search.ClearScrollRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchScrollRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.Scroll;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.SortOrder;

public class ShardQuery {

    private static String                   index   = "index_name";
    private static AtomicLong               count   = new AtomicLong(0); // 统计当前已查询ES记录数.(测试代码, 生产环境需要删掉)
    private static ScheduledFuture<?>       scheduledFuture; // (测试代码, 生产环境需要删掉)
    private static ScheduledExecutorService service = new ScheduledThreadPoolExecutor(1);

    /**
     * 无密码认证.
     */
    private static RestHighLevelClient newEsClient() {
        HttpHost host = new HttpHost("ip", port);
        RestClientBuilder restClientBuilder = RestClient.builder(host);
        return new RestHighLevelClient(restClientBuilder);
    }

    /**
     * 有密码认证.
     */
//    private static RestHighLevelClient newEsClient() {
//        HttpHost host = new HttpHost("ip", port);
//        RestClientBuilder restClientBuilder = RestClient.builder(host);
//        CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
//        credentialsProvider.setCredentials(AuthScope.ANY,
//            new UsernamePasswordCredentials("username", "passwd"));
//        restClientBuilder.setHttpClientConfigCallback((httpClientBuilder) -> {
//            httpClientBuilder.disableAuthCaching();
//            return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
//        });
//        return new RestHighLevelClient(restClientBuilder);
//    }

    public static void main(String[] args) {
        countPrinter();

        int shards = getShardsNum(index);
        ExecutorService exec = Executors.newFixedThreadPool(shards);
        CountDownLatch countDownLatch = new CountDownLatch(shards);

        for (int i = 0; i < shards; i++) {
            int finalI = i;
            exec.execute(new Runnable() {
                @Override
                public void run() {
                    RestHighLevelClient client = newEsClient();
                    Scroll scroll = new Scroll(TimeValue.timeValueSeconds(30));
                    SearchRequest searchRequest = new SearchRequest(index);
                    searchRequest.scroll(scroll);
                    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
                    searchSourceBuilder.query(QueryBuilders.matchAllQuery());
                    searchSourceBuilder.sort("_doc", SortOrder.ASC);
                    searchSourceBuilder.size(5);// 测试值. 生产环境建议设置在 1000 ~ 10000
                    searchRequest.source(searchSourceBuilder);

                    if (shards > 1) {
                        searchRequest.preference("_shards:" + finalI); // 分片偏好. 只有分片数大于1时才起作用.
                    }

                    System.out.println(String.format("启动线程%s, 编号:%d", Thread.currentThread().getName(), finalI));

                    SearchResponse searchResponse = null;
                    try {
                        searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }

                    assert searchResponse != null;
                    String scrollId = searchResponse.getScrollId();
                    SearchHit[] hits = searchResponse.getHits().getHits();
                    while (hits.length != 0) {
                        for (SearchHit hit : hits) {
                            String data = hit.getSourceAsString();
                            System.out.println(finalI + " 查询数据: " + data);
                            // todo 此处发送数据到其他sink datasource
                            count.getAndIncrement();
                        }

                        SearchScrollRequest searchScrollRequest = new SearchScrollRequest(scrollId);
                        searchScrollRequest.scroll(scroll);

                        SearchResponse searchScrollResponse = null;
                        try {
                            searchScrollResponse = client.scroll(searchScrollRequest,
                                RequestOptions.DEFAULT);
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                        assert searchScrollResponse != null;
                        scrollId = searchScrollResponse.getScrollId();
                        hits = searchScrollResponse.getHits().getHits();
                    }
                    ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
                    clearScrollRequest.addScrollId(scrollId);
                    try {
                        client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT);
                        client.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    } finally {
                        countDownLatch.countDown();
                    }
                }
            });
        }
        try {
            countDownLatch.await();
            scheduledFuture.cancel(true);
            System.out.println("最终计数器, 一共读取记录数: " + count.toString());
            exec.shutdown();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }

    /**
     * 查询index的分片数量.
     */
    private static int getShardsNum(String indexName) {
        RestHighLevelClient client = newEsClient();
        GetSettingsRequest settingsRequest = new GetSettingsRequest().indices(indexName);
        settingsRequest.names("index.number_of_shards");
        int shards = 1;
        try {
            GetSettingsResponse settingsResponse = client
                .indices()
                .getSettings(settingsRequest, RequestOptions.DEFAULT);
            String numberOfShards = settingsResponse
                .getSetting(indexName, "index.number_of_shards");
            shards = Integer.parseInt(numberOfShards);
            client.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return shards;
    }

    private static void countPrinter() {
        scheduledFuture = service.scheduleAtFixedRate(new Runnable() {
            public void run() {
                System.out.println(System.currentTimeMillis() + "\t当前count: " + count.get());
            }
        }, 0, 1, TimeUnit.SECONDS);
    }
}

posted @ 2021-03-31 11:19  须小弥  阅读(1272)  评论(0编辑  收藏  举报