【ElasticSearch】聚合Aggregation

【ElasticSearch】聚合Aggregation

关于text类型默认是禁止聚合/排序操作的,通过 fielddata=true 开启

PUT cms_search_inside_0d1a60ff-654d-4c1d-9d92-795ff0f9/_mapping
{
  "properties": {
    "mc_0_pubOrg": { 
      "type":     "text",
      "fielddata": true
    }
  }
}

1、分组聚合,无子聚合

GET myindex/_search
{
  "from": 0,
  "size": 0,
  "query": {
    "bool": {
      "must": [
        {
          "terms": {
            "siteId": [
              "1298113079338340354"
            ],
            "boost": 1
          }
        },
        {
          "range": {
            "resourcePublicationDate": {
              "from": "2023-01-01 00:00:00",
              "to": "2023-12-31 00:00:00",
              "include_lower": true,
              "include_upper": true,
              "boost": 1
            }
          }
        }
      ],
      "adjust_pure_negative": true,
      "boost": 1
    }
  },
  "aggregations": {
    "groupByFieldId": {
      "terms": {
        "field": "catalogId",
        "size": 100,
        "min_doc_count": 1,
        "shard_min_doc_count": 0,
        "show_term_doc_count_error": false,
        "order": [
          {
            "_count": "asc"
          },
          {
            "_key": "asc"
          }
        ]
      }
    }
  }
}

结果

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 17,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "groupByFieldId" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "1461259099665141761",
          "doc_count" : 2
        },
        {
          "key" : "1460860333350993921",
          "doc_count" : 3
        },
        {
          "key" : "1460860104652374017",
          "doc_count" : 12
        }
      ]
    }
  }
}

Java实现

SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.from(0);
searchSourceBuilder.size(0);
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
boolQueryBuilder.must(QueryBuilders.termsQuery(ElasticsearchConstants.ES_QUERY_SITE_ID, siteIdList));
RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery(ElasticsearchConstants.ES_QUERY_RESOURCE_PUBLICATION_DATE);
rangeQueryBuilder.gte(startTime);
rangeQueryBuilder.lte(endTime);
boolQueryBuilder.must(rangeQueryBuilder);
boolQueryBuilder.must(QueryBuilders.termQuery(ElasticsearchConstants.ES_QUERY_STATUS, 3));
boolQueryBuilder.must(QueryBuilders.termQuery(ElasticsearchConstants.ES_QUERY_ENABLE, 1));
boolQueryBuilder.must(QueryBuilders.termQuery(ElasticsearchConstants.ES_QUERY_TEMPLATE_STATUS, 30));
BucketOrder order;
if ("asc".equals(sort)) {
    order = BucketOrder.count(true);
} else {
    order = BucketOrder.count(false);
}
TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("groupByFieldId").field("catalogId").size(number).order(order);
searchSourceBuilder.aggregation(termsAggregationBuilder);
searchSourceBuilder.query(boolQueryBuilder);
String[] indices = new String[]{"myindex"};
SearchRequest request = Requests.searchRequest(indices).source(searchSourceBuilder);
SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
if (response.status() == RestStatus.OK) {
    // 方式一
    ParsedStringTerms groupByFieldId = (ParsedStringTerms) response.getAggregations().asMap().get("groupByFieldId");
    List buckets = groupByFieldId.getBuckets();
    if (CollectionUtil.isNotEmpty(buckets)) {
        for (Object bucket : buckets) {
            ParsedStringTerms.ParsedBucket groupParsedBucket = (ParsedStringTerms.ParsedBucket) bucket;
            String catalogId = groupParsedBucket.getKeyAsString();
            Long count = groupParsedBucket.getDocCount();
            StatisticResultVo vo = new StatisticResultVo();
            vo.setId(catalogId);
            vo.setNumber(count);
            list.add(vo);
        }
    }
    // 方式二
    Map<String, Aggregation> aggregationMap = response.getAggregations().asMap();
    Aggregation aggregation = aggregationMap.get("groupByFieldId");
    if (aggregation instanceof ParsedStringTerms) {
        ParsedStringTerms parsedStringTerms = (ParsedStringTerms) aggregation;
        List<? extends Terms.Bucket> goupBucketList = parsedStringTerms.getBuckets();
        if (CollectionUtil.isNotEmpty(goupBucketList)) {
            for (Terms.Bucket goupBucket : goupBucketList) {
                if (goupBucket instanceof ParsedStringTerms.ParsedBucket) {
                    ParsedStringTerms.ParsedBucket groupParsedBucket = (ParsedStringTerms.ParsedBucket) goupBucket;
                    String catalogId = groupParsedBucket.getKeyAsString();
                    Long count = groupParsedBucket.getDocCount();
                    StatisticResultVo vo = new StatisticResultVo();
                    vo.setId(catalogId);
                    vo.setNumber(count);
                    list.add(vo);
                }
            }
        }
    }
}

 

2、分组聚合,有子聚合

GET myindex/_search
{
  "from": 0,
  "size": 0,
  "query": {
    "bool": {
      "must": [
        {
          "terms": {
            "siteId": [
              "1298113079338340354"
            ],
            "boost": 1
          }
        },
        {
          "range": {
            "accessTime": {
              "from": "2023-01-01 00:00:00",
              "to": "2023-12-31 00:00:00",
              "include_lower": true,
              "include_upper": true,
              "boost": 1
            }
          }
        }
      ],
      "adjust_pure_negative": true,
      "boost": 1
    }
  },
  "aggregations": {
    "groupByFieldId": {
      "terms": {
        "field": "catId",
        "size": 100,
        "min_doc_count": 1,
        "shard_min_doc_count": 0,
        "show_term_doc_count_error": false,
        "order": [
          {
            "_count": "asc"
          },
          {
            "_key": "asc"
          }
        ]
      },
      "aggregations": {
        "uvCount": {
          "cardinality": {
            "field": "accessIp"
          }
        }
      }
    }
  }
}

结果

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 6,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "groupByFieldId" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "1460860104652374017",
          "doc_count" : 3,
          "uvCount" : {
            "value" : 1
          }
        },
        {
          "key" : "1460860333350993921",
          "doc_count" : 3,
          "uvCount" : {
            "value" : 1
          }
        }
      ]
    }
  }
}

java实现

SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.from(0);
searchSourceBuilder.size(0);
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
boolQueryBuilder.must(QueryBuilders.termsQuery("siteId", siteIdList));
RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("accessTime");
rangeQueryBuilder.gte(startTime);
rangeQueryBuilder.lte(endTime);
boolQueryBuilder.must(rangeQueryBuilder);
BucketOrder order;
if ("asc".equals(sort)) {
    order = BucketOrder.count(true);
} else {
    order = BucketOrder.count(false);
}
searchSourceBuilder.aggregation(AggregationBuilders.terms("groupByFieldId").field("catId").size(number).order(order).subAggregation(AggregationBuilders.cardinality("uvCount").field("accessIp")));
searchSourceBuilder.query(boolQueryBuilder);
String[] indices = new String[]{"myindex"};
log.info("DSL:" + searchSourceBuilder.toString());
SearchRequest request = Requests.searchRequest(indices).source(searchSourceBuilder);
SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
if (response.status() == RestStatus.OK) {
    ParsedStringTerms groupByFieldId = (ParsedStringTerms) response.getAggregations().asMap().get("groupByFieldId");
    List buckets = groupByFieldId.getBuckets();
    if (CollectionUtil.isNotEmpty(buckets)) {
        for (Object bucket : buckets) {
            ParsedStringTerms.ParsedBucket parsedBucket = (ParsedStringTerms.ParsedBucket) bucket;
            ParsedCardinality cardinalityValue = (ParsedCardinality) parsedBucket.getAggregations().asMap().get("uvCount");
            int pvCount = new Long(parsedBucket.getDocCount()).intValue();
            int uvCount = new Long(cardinalityValue.getValue()).intValue();
            String id = parsedBucket.getKeyAsString();
            StatisticResultVo vo = new StatisticResultVo();
            vo.setId(id);
            vo.setNumber(pvCount);
            vo.setNumber2(uvCount);
            list.add(vo);
        }
    }
}

 

3、多重分组聚合

GET myindex/_search
{
  "from": 0,
  "size": 0,
  "query": {
    "bool": {
      "must": [
        {
          "term": {
            "siteId": {
              "value": "1298113079338340354",
              "boost": 1
            }
          }
        },
        {
          "range": {
            "accessTime": {
              "from": "2023-01-01 00:00:00",
              "to": "2023-12-31 23:59:59",
              "include_lower": true,
              "include_upper": true,
              "boost": 1
            }
          }
        }
      ],
      "adjust_pure_negative": true,
      "boost": 1
    }
  },
  "aggregations": {
    "groupByTime": {
      "terms": {
        "field": "timeym",
        "size": 9999999,
        "min_doc_count": 1,
        "shard_min_doc_count": 0,
        "show_term_doc_count_error": false,
        "order": {
          "_key": "asc"
        }
      },
      "aggregations": {
        "groupByIp": {
          "terms": {
            "field": "accessIp",
            "size": 9999999,
            "min_doc_count": 1,
            "shard_min_doc_count": 0,
            "show_term_doc_count_error": false,
            "order": [
              {
                "_count": "desc"
              },
              {
                "_key": "asc"
              }
            ]
          },
          "aggregations": {
            "uvCount": {
              "cardinality": {
                "field": "accessIp"
              }
            }
          }
        }
      }
    }
  }
}

结果

#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.13/security-minimal-setup.html to enable security.
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "groupByTime" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "202309",
          "doc_count" : 1,
          "groupByIp" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : "0:0:0:0:0:0:0:1",
                "doc_count" : 1,
                "uvCount" : {
                  "value" : 1
                }
              }
            ]
          }
        },
        {
          "key" : "202310",
          "doc_count" : 7,
          "groupByIp" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : "0:0:0:0:0:0:0:1",
                "doc_count" : 4,
                "uvCount" : {
                  "value" : 1
                }
              },
              {
                "key" : "192.168.100.21",
                "doc_count" : 2,
                "uvCount" : {
                  "value" : 1
                }
              },
              {
                "key" : "10.25.62.4",
                "doc_count" : 1,
                "uvCount" : {
                  "value" : 1
                }
              }
            ]
          }
        }
      ]
    }
  }
}

java实现

SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.from(0);
searchSourceBuilder.size(0);
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
// 站点
if (StrUtil.isNotEmpty(statisticsVo.getSiteId())) {
    boolQueryBuilder.must(QueryBuilders.termQuery("siteId", statisticsVo.getSiteId()));
}
// 时间
RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("accessTime");
rangeQueryBuilder.gte(statisticsVo.getStartTime());
rangeQueryBuilder.lte(statisticsVo.getEndTime());
boolQueryBuilder.must(rangeQueryBuilder);
searchSourceBuilder.query(boolQueryBuilder);
// 聚合
String field;
if (statisticsVo.getDateType().equals(3)) {
    field = "timeym";
} else if (statisticsVo.getDateType().equals(2)) {
    field = "timeymd";
} else {
    field = "timeymdh";
}
TermsAggregationBuilder groupByIpAggregation = AggregationBuilders.terms("groupByIp").field("accessIp").size(9999999).order(BucketOrder.count(false))
        .subAggregation(AggregationBuilders.cardinality("uvCount").field("accessIp"));
searchSourceBuilder.aggregation(AggregationBuilders.terms("groupByTime").field(field).size(9999999).order(BucketOrder.key(true)).subAggregation(groupByIpAggregation));
log.info("大屏访问量 DSL:" + searchSourceBuilder.toString());
SearchRequest request = Requests.searchRequest(tableName).source(searchSourceBuilder);
SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
if (response.status() == RestStatus.OK) {
    ParsedStringTerms groupByTime = (ParsedStringTerms) response.getAggregations().asMap().get("groupByTime");
    List timeList = groupByTime.getBuckets();
    if (CollectionUtil.isNotEmpty(timeList)) {
        for (Object timeObject : timeList) {
            ParsedStringTerms.ParsedBucket timeBucket = (ParsedStringTerms.ParsedBucket) timeObject;
            String timeKey = timeBucket.getKeyAsString();
            int timeTotal = new Long(timeBucket.getDocCount()).intValue();
            TemplateAccessVo item = new TemplateAccessVo();
            if (statisticsVo.getDateType().equals(3)) {
                item.setCountTime(timeKey.substring(0, 4) + "-" + timeKey.substring(4, 6));
            } else if (statisticsVo.getDateType().equals(2)) {
                item.setCountTime(timeKey.substring(0, 4) + "-" + timeKey.substring(4, 6) + "-" + timeKey.substring(6, 8));
            } else {
                item.setCountTime(timeKey.substring(8, 10) + ":00");
            }
            item.setPvCount(timeTotal);

            ParsedStringTerms groupByIp = (ParsedStringTerms) timeBucket.getAggregations().asMap().get("groupByIp");
            List ipList = groupByIp.getBuckets();
            if (CollectionUtil.isNotEmpty(ipList)) {
                List<TemplateAccessVo> ipStatistics = new ArrayList<>();
                item.setIpStatistics(ipStatistics);
                for (Object ipObject : ipList) {
                    ParsedStringTerms.ParsedBucket ipBucket = (ParsedStringTerms.ParsedBucket) ipObject;
                    String ipKey = ipBucket.getKeyAsString();
                    int ipTotal = new Long(ipBucket.getDocCount()).intValue();

                    ParsedCardinality cardinalityValue = (ParsedCardinality) ipBucket.getAggregations().asMap().get("uvCount");
                    int uvCount = new Long(cardinalityValue.getValue()).intValue();
                    TemplateAccessVo sub = new TemplateAccessVo();
                    sub.setCountIP(ipKey);
                    sub.setPvCount(ipTotal);
                    sub.setUvCount(uvCount);
                    ipStatistics.add(sub);
                }
            }
            list.add(item);
        }
    }
}

 

 

4、日期聚合

按天聚合
"aggregations": {
  "dateHistogram": {
    "date_histogram": {
      "field": "myDate",
      "format": "yyyy-MM-dd",
      "interval": "day",
      "offset": 0,
      "order": {
        "_key": "asc"
      },
      "keyed": false,
      "min_doc_count": 0
    }
  }
}
按月聚合
"aggregations": {
  "dateHistogram": {
    "date_histogram": {
      "field": "myDate",
      "format": "yyyy-MM",
      "interval": "month",
      "offset": 0,
      "order": {
        "_key": "asc"
      },
      "keyed": false,
      "min_doc_count": 0
    }
  }
}
按钮聚合
"aggregations": {
  "dateHistogram": {
    "date_histogram": {
      "field": "myDate",
      "format": "yyyy",
      "interval": "year",
      "offset": 0,
      "order": {
        "_key": "asc"
      },
      "keyed": false,
      "min_doc_count": 0
    }
  }
}
int type = 1;
DateHistogramInterval interval;
String format;
String start;
String end;
// 1按天、2按月、3按年
if (type.equals(1)) {
    interval = DateHistogramInterval.DAY;
    format = "yyyy-MM-dd";
    start = "2023-10-01";
    end = "2023-10-31";
} else if (type.equals(2)) {
    interval = DateHistogramInterval.MONTH;
    format = "yyyy-MM";
    start = "2022-09";
    end = "2023-10";
} else {
    interval = DateHistogramInterval.YEAR;
    format = "yyyy";
    start = "2022";
    end = "2023";
}
// 聚合
DateHistogramAggregationBuilder aggregationBuilder = AggregationBuilders.dateHistogram("myDate")
        .dateHistogramInterval(interval)
        .minDocCount(0)
        .field("myDate")
        .format(format)
        .order(BucketOrder.key(true))
        .extendedBounds(new LongBounds(start, end));

 

 

GET cms_search_inside_0d1a60ff-654d-4c1d-9d92-795ff0f9/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "term": {
            "siteId": {
              "value": "1298113079338340354",
              "boost": 1
            }
          }
        },
        {
          "wildcard": {
            "searchType": {
              "wildcard": "*wenjian*",
              "boost": 1
            }
          }
        },
        {
          "multi_match": {
            "query": "西安市未央区",
            "fields": [
              "resourceSummary^1.0",
              "title^1.0"
            ],
            "type": "best_fields",
            "operator": "OR",
            "analyzer": "ik_smart",
            "slop": 0,
            "prefix_length": 0,
            "max_expansions": 50,
            "minimum_should_match": "75%",
            "zero_terms_query": "NONE",
            "auto_generate_synonyms_phrase_query": true,
            "fuzzy_transpositions": true,
            "boost": 1
          }
        }
      ],
      "must_not": [
        {
          "terms": {
            "cmsCatalogId": [
              "1531451844967424002",
              "1460863730129059841",
              "1531451955084681217",
              "1484063351423234049"
            ],
            "boost": 1
          }
        }
      ],
      "adjust_pure_negative": true,
      "boost": 1
    }
  },
  "aggregations": {
    "group": {
      "terms": {
        "field": "mc_0_pubOrg",
        "size": 2000,
        "min_doc_count": 1,
        "shard_min_doc_count": 0,
        "show_term_doc_count_error": false,
        "order": [
          {
            "_count": "desc"
          },
          {
            "_key": "asc"
          }
        ]
      }
    }
  },
  "highlight": {
    "fields": {
      "title": {
        "fragment_size": 800000,
        "number_of_fragments": 0
      },
      "resourceSummary": {
        "fragment_size": 800000,
        "number_of_fragments": 0
      }
    }
  },
  "collapse": {
    "field": "templateId",
    "inner_hits": {
      "name": "collapse",
      "ignore_unmapped": true,
      "from": 0,
      "size": 0,
      "version": false,
      "seq_no_primary_term": false,
      "explain": false,
      "track_scores": true,
      "sort": [
        {
          "_score": {
            "order": "desc"
          }
        }
      ]
    }
  }
}

 

posted @ 2023-01-06 09:55  翠微  阅读(52)  评论(0编辑  收藏  举报