DSL:聚合
当前文档内容
PUT devicelog_01
{
"mappings" : {
"log" : {
"properties" : {
"Items" : {
"properties" : {
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"value" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"OperationDateTime" : {
"type" : "date"
},
"systemId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 5,
"max_score" : 1.0,
"hits" : [
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0001",
"_score" : 1.0,
"_source" : {
"systemId" : "000001",
"OperationDateTime" : 1583321643000,
"Items" : [
{
"name" : "kk",
"value" : "12"
},
{
"name" : "gg",
"value" : "24"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0004",
"_score" : 1.0,
"_source" : {
"systemId" : "000004",
"OperationDateTime" : 1583494443000,
"Items" : [
{
"name" : "kk",
"value" : "44"
},
{
"name" : "gg",
"value" : "44"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0002",
"_score" : 1.0,
"_source" : {
"systemId" : "000002",
"OperationDateTime" : 1583148843000,
"Items" : [
{
"name" : "kk",
"value" : "11"
},
{
"name" : "gg",
"value" : "11"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0003",
"_score" : 1.0,
"_source" : {
"systemId" : "000003",
"OperationDateTime" : 1583408043000,
"Items" : [
{
"name" : "kk",
"value" : "55"
},
{
"name" : "gg",
"value" : "55"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0005",
"_score" : 1.0,
"_source" : {
"systemId" : "000005",
"OperationDateTime" : 1583580843000,
"Items" : [
{
"name" : "kk",
"value" : "66"
},
{
"name" : "gg",
"value" : "66"
}
]
}
}
]
}
}
GET libary/_mapping
GET libary/_search
PUT libary/book/003
{
"name": "book3",
"sales": 40.23,
"number": "000003",
"have": 50000,
"nohave": 500
}
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 1.0,
"hits" : [
{
"_index" : "libary",
"_type" : "book",
"_id" : "003",
"_score" : 1.0,
"_source" : {
"name" : "book3",
"sales" : 40.23,
"number" : "000003",
"have" : 50000,
"nohave" : 500
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "001",
"_score" : 1.0,
"_source" : {
"name" : "book1",
"sales" : 10.23,
"number" : "000001",
"have" : 20000,
"nohave" : 100
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "002",
"_score" : 1.0,
"_source" : {
"name" : "book2",
"sales" : 30.23,
"number" : "000002",
"have" : 30000,
"nohave" : 300
}
}
]
}
}
度量聚合
度量聚合接收一个输入文档集并生成至少一个统计值
min、max、sum、avg聚合
min、max、sum和avg聚合的使用很相似。它们对于给定字段分别返回最小值、最大值、总和和平均值。任何数值型字段都可以作为这些值的源。
max
GET libary/_search
{
"aggs": {
"max_have": {
"max": {
"field": "have"
}
}
}
}
结果
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 1.0,
"hits" : [
{
"_index" : "libary",
"_type" : "book",
"_id" : "003",
"_score" : 1.0,
"_source" : {
"name" : "book3",
"sales" : 40.23,
"number" : "000003",
"have" : 50000,
"nohave" : 500
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "001",
"_score" : 1.0,
"_source" : {
"name" : "book1",
"sales" : 10.23,
"number" : "000001",
"have" : 20000,
"nohave" : 100
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "002",
"_score" : 1.0,
"_source" : {
"name" : "book2",
"sales" : 30.23,
"number" : "000002",
"have" : 30000,
"nohave" : 300
}
}
]
},
"aggregations" : {
"max_have" : {
"value" : 50000.0
}
}
}
使用脚本
聚合使用的值是原始的have值上减去100
GET libary/_search
{
"aggs": {
"max_have": {
"max": {
"script": "doc['have'].value-100"
}
}
}
}
{
"took" : 61,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 1.0,
"hits" : [
{
"_index" : "libary",
"_type" : "book",
"_id" : "003",
"_score" : 1.0,
"_source" : {
"name" : "book3",
"sales" : 40.23,
"number" : "000003",
"have" : 50000,
"nohave" : 500
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "001",
"_score" : 1.0,
"_source" : {
"name" : "book1",
"sales" : 10.23,
"number" : "000001",
"have" : 20000,
"nohave" : 100
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "002",
"_score" : 1.0,
"_source" : {
"name" : "book2",
"sales" : 30.23,
"number" : "000002",
"have" : 30000,
"nohave" : 300
}
}
]
},
"aggregations" : {
"max_have" : {
"value" : 49900.0
}
}
}
还可以如下写
GET libary/_search
{
"aggs": {
"max_have": {
"max": {
"field": "have",
"script": "_value-100"
}
}
}
}
value_count聚合
value_count聚合跟前面描述的聚合类似,只是输入字段不一定要是数值型的。
计算出某个字段出现的总的次数
GET libary/_search
{
"aggs": {
"number_of_items": {
"value_count": {
"field": "number"
}
}
}
}
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 1.0,
"hits" : [
{
"_index" : "libary",
"_type" : "book",
"_id" : "003",
"_score" : 1.0,
"_source" : {
"name" : "book3",
"sales" : 40.23,
"number" : "000003",
"have" : 50000,
"nohave" : 500
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "001",
"_score" : 1.0,
"_source" : {
"name" : "book1",
"sales" : 10.23,
"number" : "000001",
"have" : 20000,
"nohave" : 100
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "002",
"_score" : 1.0,
"_source" : {
"name" : "book2",
"sales" : 30.23,
"number" : "000002",
"have" : 30000,
"nohave" : 300
}
}
]
},
"aggregations" : {
"number_of_items" : {
"value" : 3
}
}
}
stats和extended_stats聚合
stats和extended_stats聚合可以看成是在单一聚合对象中返回所有前面描述聚合的一种聚合。
把各个统计都统计一遍
可以看到,除了已知的值,我们还得到平方和、方差和标准差统计。
GET libary/_search
{
"aggs": {
"stats_have": {
"extended_stats": {
"field": "have"
}
}
}
}
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 1.0,
"hits" : [
{
"_index" : "libary",
"_type" : "book",
"_id" : "003",
"_score" : 1.0,
"_source" : {
"name" : "book3",
"sales" : 40.23,
"number" : "000003",
"have" : 50000,
"nohave" : 500
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "001",
"_score" : 1.0,
"_source" : {
"name" : "book1",
"sales" : 10.23,
"number" : "000001",
"have" : 20000,
"nohave" : 100
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "002",
"_score" : 1.0,
"_source" : {
"name" : "book2",
"sales" : 30.23,
"number" : "000002",
"have" : 30000,
"nohave" : 300
}
}
]
},
"aggregations" : {
"stats_have" : {
"count" : 3,
"min" : 20000.0,
"max" : 50000.0,
"avg" : 33333.333333333336,
"sum" : 100000.0,
"sum_of_squares" : 3.8E9,
"variance" : 1.555555555555555E8,
"std_deviation" : 12472.191289246468,
"std_deviation_bounds" : {
"upper" : 58277.71591182627,
"lower" : 8388.9507548404
}
}
}
}
桶聚合
桶聚合返回很多子集,并限定输入数据到一个特殊的叫做桶的子集中。
terms聚合
terms聚合为字段中每个词条返回一个桶。这允许你生成字段每个值的统计。
计算出have的每个值有多少个
GET libary/_search
{
"aggs": {
"availability": {
"terms": {
"field": "have"
}
}
}
}
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 1.0,
"hits" : [
{
"_index" : "libary",
"_type" : "book",
"_id" : "003",
"_score" : 1.0,
"_source" : {
"name" : "book3",
"sales" : 40.23,
"number" : "000003",
"have" : 50000,
"nohave" : 500
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "001",
"_score" : 1.0,
"_source" : {
"name" : "book1",
"sales" : 10.23,
"number" : "000001",
"have" : 20000,
"nohave" : 100
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "002",
"_score" : 1.0,
"_source" : {
"name" : "book2",
"sales" : 30.23,
"number" : "000002",
"have" : 30000,
"nohave" : 300
}
}
]
},
"aggregations" : {
"availability" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 20000,
"doc_count" : 1
},
{
"key" : 30000,
"doc_count" : 1
},
{
"key" : 50000,
"doc_count" : 1
}
]
}
}
}
为了使用key属性值对聚合排序,可以发送以下查询
可以按升序排(asc),也可以按降序排(desc)。在我们的例子中,使用key属性(_team)排序。另一个选择是_count,告诉Elasticsearch使用doc_count属性来排序。
GET libary/_search
{
"aggs": {
"availability": {
"terms": {
"field": "have",
"size": 2,
"order": {
"_term": "asc"
}
}
}
}
}
range聚合
range聚合使用定义的范围来创建桶。
GET libary/_search
{
"aggs": {
"range1": {
"range": {
"field": "have",
"ranges": [
{
"from": 1000,
"to": 40000
},
{
"from": 40000,
"to": 50001
}
]
}
}
}
}
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 1.0,
"hits" : [
{
"_index" : "libary",
"_type" : "book",
"_id" : "003",
"_score" : 1.0,
"_source" : {
"name" : "book3",
"sales" : 40.23,
"number" : "000003",
"have" : 50000,
"nohave" : 500
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "001",
"_score" : 1.0,
"_source" : {
"name" : "book1",
"sales" : 10.23,
"number" : "000001",
"have" : 20000,
"nohave" : 100
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "002",
"_score" : 1.0,
"_source" : {
"name" : "book2",
"sales" : 30.23,
"number" : "000002",
"have" : 30000,
"nohave" : 300
}
}
]
},
"aggregations" : {
"range1" : {
"buckets" : [
{
"key" : "1000.0-40000.0",
"from" : 1000.0,
"to" : 40000.0,
"doc_count" : 2
},
{
"key" : "40000.0-50001.0",
"from" : 40000.0,
"to" : 50001.0,
"doc_count" : 1
}
]
}
}
}
创建用户界面时,可以为每个桶自动生成一个标签。打开此功能很简单:只需要添加keyed属性并将其设置为true
GET libary/_search
{
"aggs": {
"range1": {
"range": {
"field": "have",
"keyed": true,
"ranges": [
{
"from": 1000,
"to": 40000
},
{
"from": 40000,
"to": 50001
}
]
}
}
}
}
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 1.0,
"hits" : [
{
"_index" : "libary",
"_type" : "book",
"_id" : "003",
"_score" : 1.0,
"_source" : {
"name" : "book3",
"sales" : 40.23,
"number" : "000003",
"have" : 50000,
"nohave" : 500
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "001",
"_score" : 1.0,
"_source" : {
"name" : "book1",
"sales" : 10.23,
"number" : "000001",
"have" : 20000,
"nohave" : 100
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "002",
"_score" : 1.0,
"_source" : {
"name" : "book2",
"sales" : 30.23,
"number" : "000002",
"have" : 30000,
"nohave" : 300
}
}
]
},
"aggregations" : {
"range1" : {
"buckets" : {
"1000.0-40000.0" : {
"from" : 1000.0,
"to" : 40000.0,
"doc_count" : 2
},
"40000.0-50001.0" : {
"from" : 40000.0,
"to" : 50001.0,
"doc_count" : 1
}
}
}
}
}
你可能已经注意到,结构略有变化,buckets字段不再是表,而是图,键值是从范围生成的。
这行得通,但不太漂亮。我们的例子中,给每个桶一个名称会更有用。这是可能的,我们可以为
每个范围添加key属性并把它的值设置为所需的名称
GET libary/_search
{
"aggs": {
"range1": {
"range": {
"field": "have",
"keyed": true,
"ranges": [
{
"key": "key1",
"from": 1000,
"to": 40000
},
{
"key": "key2",
"from": 40000,
"to": 50001
}
]
}
}
}
}
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 1.0,
"hits" : [
{
"_index" : "libary",
"_type" : "book",
"_id" : "003",
"_score" : 1.0,
"_source" : {
"name" : "book3",
"sales" : 40.23,
"number" : "000003",
"have" : 50000,
"nohave" : 500
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "001",
"_score" : 1.0,
"_source" : {
"name" : "book1",
"sales" : 10.23,
"number" : "000001",
"have" : 20000,
"nohave" : 100
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "002",
"_score" : 1.0,
"_source" : {
"name" : "book2",
"sales" : 30.23,
"number" : "000002",
"have" : 30000,
"nohave" : 300
}
}
]
},
"aggregations" : {
"range1" : {
"buckets" : {
"key1" : {
"from" : 1000.0,
"to" : 40000.0,
"doc_count" : 2
},
"key2" : {
"from" : 40000.0,
"to" : 50001.0,
"doc_count" : 1
}
}
}
}
}
date_range聚合
date_range聚合类似于前面讨论的range聚集,但它专用在使用日期类型的字段。
GET devicelog_01/_search
{
"aggs": {
"date1": {
"date_range": {
"field": "OperationDateTime",
"ranges": [
{
"from": "2020-03-04",
"to": "now"
}
]
}
}
}
}
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 5,
"max_score" : 1.0,
"hits" : [
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0001",
"_score" : 1.0,
"_source" : {
"systemId" : "000001",
"OperationDateTime" : 1583321643000,
"Items" : [
{
"name" : "kk",
"value" : "12"
},
{
"name" : "gg",
"value" : "24"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0004",
"_score" : 1.0,
"_source" : {
"systemId" : "000004",
"OperationDateTime" : 1583494443000,
"Items" : [
{
"name" : "kk",
"value" : "44"
},
{
"name" : "gg",
"value" : "44"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0002",
"_score" : 1.0,
"_source" : {
"systemId" : "000002",
"OperationDateTime" : 1583148843000,
"Items" : [
{
"name" : "kk",
"value" : "11"
},
{
"name" : "gg",
"value" : "11"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0003",
"_score" : 1.0,
"_source" : {
"systemId" : "000003",
"OperationDateTime" : 1583408043000,
"Items" : [
{
"name" : "kk",
"value" : "55"
},
{
"name" : "gg",
"value" : "55"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0005",
"_score" : 1.0,
"_source" : {
"systemId" : "000005",
"OperationDateTime" : 1583580843000,
"Items" : [
{
"name" : "kk",
"value" : "66"
},
{
"name" : "gg",
"value" : "66"
}
]
}
}
]
},
"aggregations" : {
"date1" : {
"buckets" : [
{
"key" : "2020-03-04T00:00:00.000Z-2020-04-04T13:35:24.625Z",
"from" : 1.58328E12,
"from_as_string" : "2020-03-04T00:00:00.000Z",
"to" : 1.586007324625E12,
"to_as_string" : "2020-04-04T13:35:24.625Z",
"doc_count" : 4
}
]
}
}
}
与普通的range聚合比较,唯一改变的是聚合类型(date_range)。可以用Elasticsearch认可的字符串格式传递日期(更多信息请参阅第2章),
或者用数值:自1970-01-01以来的毫秒数
Elasticsearch也允许使用format属性来定义日期格式。
GET devicelog_01/_search
{
"aggs": {
"date1": {
"date_range": {
"field": "OperationDateTime",
"format": "YYYY-MM-dd",
"ranges": [
{
"from": "2020-03-05",
"to": "now"
}
]
}
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 5,
"max_score" : 1.0,
"hits" : [
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0001",
"_score" : 1.0,
"_source" : {
"systemId" : "000001",
"OperationDateTime" : 1583321643000,
"Items" : [
{
"name" : "kk",
"value" : "12"
},
{
"name" : "gg",
"value" : "24"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0004",
"_score" : 1.0,
"_source" : {
"systemId" : "000004",
"OperationDateTime" : 1583494443000,
"Items" : [
{
"name" : "kk",
"value" : "44"
},
{
"name" : "gg",
"value" : "44"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0002",
"_score" : 1.0,
"_source" : {
"systemId" : "000002",
"OperationDateTime" : 1583148843000,
"Items" : [
{
"name" : "kk",
"value" : "11"
},
{
"name" : "gg",
"value" : "11"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0003",
"_score" : 1.0,
"_source" : {
"systemId" : "000003",
"OperationDateTime" : 1583408043000,
"Items" : [
{
"name" : "kk",
"value" : "55"
},
{
"name" : "gg",
"value" : "55"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0005",
"_score" : 1.0,
"_source" : {
"systemId" : "000005",
"OperationDateTime" : 1583580843000,
"Items" : [
{
"name" : "kk",
"value" : "66"
},
{
"name" : "gg",
"value" : "66"
}
]
}
}
]
},
"aggregations" : {
"date1" : {
"buckets" : [
{
"key" : "2020-03-05-2020-04-04",
"from" : 1.5833664E12,
"from_as_string" : "2020-03-05",
"to" : 1.586007818589E12,
"to_as_string" : "2020-04-04",
"doc_count" : 3
}
]
}
}
}
关于date_range聚合,还有一点。有时,我们会想要建立一个能随时间变化的聚合,
GET devicelog_01/_search
{
"aggs": {
"date1": {
"date_range": {
"field": "OperationDateTime",
"format": "YYYY-MM-dd",
"ranges": [
{
"to": "now-9M/M"
},
{
"from": "now-4M/M",
"to": "now-1M/M"
}
]
}
}
}
}
{
"took" : 11,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 5,
"max_score" : 1.0,
"hits" : [
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0001",
"_score" : 1.0,
"_source" : {
"systemId" : "000001",
"OperationDateTime" : 1583321643000,
"Items" : [
{
"name" : "kk",
"value" : "12"
},
{
"name" : "gg",
"value" : "24"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0004",
"_score" : 1.0,
"_source" : {
"systemId" : "000004",
"OperationDateTime" : 1583494443000,
"Items" : [
{
"name" : "kk",
"value" : "44"
},
{
"name" : "gg",
"value" : "44"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0002",
"_score" : 1.0,
"_source" : {
"systemId" : "000002",
"OperationDateTime" : 1583148843000,
"Items" : [
{
"name" : "kk",
"value" : "11"
},
{
"name" : "gg",
"value" : "11"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0003",
"_score" : 1.0,
"_source" : {
"systemId" : "000003",
"OperationDateTime" : 1583408043000,
"Items" : [
{
"name" : "kk",
"value" : "55"
},
{
"name" : "gg",
"value" : "55"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0005",
"_score" : 1.0,
"_source" : {
"systemId" : "000005",
"OperationDateTime" : 1583580843000,
"Items" : [
{
"name" : "kk",
"value" : "66"
},
{
"name" : "gg",
"value" : "66"
}
]
}
}
]
},
"aggregations" : {
"date1" : {
"buckets" : [
{
"key" : "*-2019-07-01",
"to" : 1.5619392E12,
"to_as_string" : "2019-07-01",
"doc_count" : 0
},
{
"key" : "2019-12-01-2020-03-01",
"from" : 1.5751584E12,
"from_as_string" : "2019-12-01",
"to" : 1.5830208E12,
"to_as_string" : "2020-03-01",
"doc_count" : 0
}
]
}
}
}
这里的关键是如now-9M的表达式。Elasticsearch使用数学生成相应的值。你可以使用y(年)、M(月)、w(周)、d(日)、h(小时)、m(分钟)和s(秒)。
例如,表达式now+3d表示现在起的3天后。在我们的示例中,/M表示只取已被转成月份的日期。由于这种表示法,我们可以只计算完整月。
第二个优点是计算的日期对缓存更友好,如果没有四舍五入,日期每一毫秒都更改,导致居于range的每个缓存都没有意义。
missing聚合
查看没有某个字段的条目是多少。
为此,我们使用missing聚合,在这种情况下它是个好东西。
GET libary/_search
{
"aggs": {
"missing1": {
"missing": {
"field": "KK"
}
}
}
}
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 4,
"max_score" : 1.0,
"hits" : [
{
"_index" : "libary",
"_type" : "book",
"_id" : "003",
"_score" : 1.0,
"_source" : {
"name" : "book3",
"sales" : 40.23,
"number" : "000003",
"have" : 50000,
"nohave" : 500
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "006",
"_score" : 1.0,
"_source" : {
"name" : "book8",
"sales" : 40.23,
"number" : "000003",
"have" : 50000,
"nohave" : 500,
"KK" : 44
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "001",
"_score" : 1.0,
"_source" : {
"name" : "book1",
"sales" : 10.23,
"number" : "000001",
"have" : 20000,
"nohave" : 100
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "002",
"_score" : 1.0,
"_source" : {
"name" : "book2",
"sales" : 30.23,
"number" : "000002",
"have" : 30000,
"nohave" : 300
}
}
]
},
"aggregations" : {
"missing1" : {
"doc_count" : 3
}
}
}
nested聚合
4.3节介绍了嵌套文档。使用这个数据来看看下一种聚合类型:nested聚合。
IPv4 range聚合
range聚合的最后一个形式是基于互联网地址的聚合。它工作在定义成ip类型的字段上,
允许 以 CIDR的格式来定义IP范围( CIDR: http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)。
histogram聚合
histogram聚合定义桶。
这里,新的信息片段是interval,它定义了将用于创建桶的每个范围的长度。
与range聚合一样,histogram聚合同样允许我们使用keyed属性。其他可用的选项是min_doc_count,使我们能够控制为创建一个桶需要的最小文档数目。
如果把min_doc_count属性设置为零,Elasticsearch还将包括文档数目为0的桶。
GET libary/_search
{
"aggs": {
"yy": {
"histogram": {
"field": "sales",
"interval": 50
}
}
}
}
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 4,
"max_score" : 1.0,
"hits" : [
{
"_index" : "libary",
"_type" : "book",
"_id" : "003",
"_score" : 1.0,
"_source" : {
"name" : "book3",
"sales" : 40.23,
"number" : "000003",
"have" : 50000,
"nohave" : 500
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "006",
"_score" : 1.0,
"_source" : {
"name" : "book8",
"sales" : 40.23,
"number" : "000003",
"have" : 50000,
"nohave" : 500,
"KK" : 44
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "001",
"_score" : 1.0,
"_source" : {
"name" : "book1",
"sales" : 10.23,
"number" : "000001",
"have" : 20000,
"nohave" : 100
}
},
{
"_index" : "libary",
"_type" : "book",
"_id" : "002",
"_score" : 1.0,
"_source" : {
"name" : "book2",
"sales" : 30.23,
"number" : "000002",
"have" : 30000,
"nohave" : 300
}
}
]
},
"aggregations" : {
"yy" : {
"buckets" : [
{
"key" : 0.0,
"doc_count" : 4
}
]
}
}
}
与range聚合一样,histogram聚合同样允许我们使用keyed属性。其他可用的选项是min_doc_count,使我们能够控制为创建一个桶需要的最小文档数目。
如果把min_doc_count属性设置为零,Elasticsearch还将包括文档数目为0的桶。
date_histogram聚合
正如date_range聚合是range聚合的一种特殊形式,date_histogram聚合也是histogram聚合的一种扩展,专用在日期上。
GET devicelog_01/_search
{
"aggs": {
"y1": {
"date_histogram": {
"field": "OperationDateTime",
"format": "yyyy-MM-dd",
"interval": "1d"
}
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 5,
"max_score" : 1.0,
"hits" : [
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0001",
"_score" : 1.0,
"_source" : {
"systemId" : "000001",
"OperationDateTime" : 1583321643000,
"Items" : [
{
"name" : "kk",
"value" : "12"
},
{
"name" : "gg",
"value" : "24"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0004",
"_score" : 1.0,
"_source" : {
"systemId" : "000004",
"OperationDateTime" : 1583494443000,
"Items" : [
{
"name" : "kk",
"value" : "44"
},
{
"name" : "gg",
"value" : "44"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0002",
"_score" : 1.0,
"_source" : {
"systemId" : "000002",
"OperationDateTime" : 1583148843000,
"Items" : [
{
"name" : "kk",
"value" : "11"
},
{
"name" : "gg",
"value" : "11"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0003",
"_score" : 1.0,
"_source" : {
"systemId" : "000003",
"OperationDateTime" : 1583408043000,
"Items" : [
{
"name" : "kk",
"value" : "55"
},
{
"name" : "gg",
"value" : "55"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0005",
"_score" : 1.0,
"_source" : {
"systemId" : "000005",
"OperationDateTime" : 1583580843000,
"Items" : [
{
"name" : "kk",
"value" : "66"
},
{
"name" : "gg",
"value" : "66"
}
]
}
}
]
},
"aggregations" : {
"y1" : {
"buckets" : [
{
"key_as_string" : "2020-03-02",
"key" : 1583107200000,
"doc_count" : 1
},
{
"key_as_string" : "2020-03-03",
"key" : 1583193600000,
"doc_count" : 0
},
{
"key_as_string" : "2020-03-04",
"key" : 1583280000000,
"doc_count" : 1
},
{
"key_as_string" : "2020-03-05",
"key" : 1583366400000,
"doc_count" : 1
},
{
"key_as_string" : "2020-03-06",
"key" : 1583452800000,
"doc_count" : 1
},
{
"key_as_string" : "2020-03-07",
"key" : 1583539200000,
"doc_count" : 1
}
]
}
}
}
可以看到interval属性上一个重要的区别。它现在用一个字符串来描述时间间隔,在我们的例子中是10天。
当然,可以将它设置为任何值,使用与在date_range聚合中讨论过的格式相同的后缀。
值得一提的是,数字可以是一个浮点值,例如1.5m,即每1.5分钟。format属性跟
date_range聚合中一样,归功于此,Elasticsearch可以根据定义的格式添加一个人类可读的日期。
当然,format属性不是必需的,但它是有用的。除此以外,类似于range聚合,keyed和min_doc_count属性仍然有效。
时区
Elasticsearch将所有日期存储成UTC时区。你可以定义用于显示的时区。日期转换有两种方法,
可以在把元素分配给桶之前转换日期,也可以在分配之后转换。因此,取决于所选方法和桶的定
义,一个元素可能分配给不同的桶。有两个属性定义此行为:pre_zone和post_zone。此外,
还有一个time_zone,基本上用来设置pre_zone属性的值。有如下三种符号来设置这些属性。
可以设置小时偏移,例如:pre_zone:-4或time_zone:5;
可以使用时间格式,例如:pre_zone:"-4:30";
可以使用时区的名字,例如:time_zone:"Europe/Warsaw"。
查看时区:http://joda-time.sourceforge.net/timezones.html
GET devicelog_01/_search
{
"aggs": {
"y1": {
"date_histogram": {
"field": "OperationDateTime",
"format": "yyyy-MM-dd",
"interval": "1d",
"time_zone": "Asia/Shanghai"
}
}
}
}
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 5,
"max_score" : 1.0,
"hits" : [
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0001",
"_score" : 1.0,
"_source" : {
"systemId" : "000001",
"OperationDateTime" : 1583321643000,
"Items" : [
{
"name" : "kk",
"value" : "12"
},
{
"name" : "gg",
"value" : "24"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0004",
"_score" : 1.0,
"_source" : {
"systemId" : "000004",
"OperationDateTime" : 1583494443000,
"Items" : [
{
"name" : "kk",
"value" : "44"
},
{
"name" : "gg",
"value" : "44"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0002",
"_score" : 1.0,
"_source" : {
"systemId" : "000002",
"OperationDateTime" : 1583148843000,
"Items" : [
{
"name" : "kk",
"value" : "11"
},
{
"name" : "gg",
"value" : "11"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0003",
"_score" : 1.0,
"_source" : {
"systemId" : "000003",
"OperationDateTime" : 1583408043000,
"Items" : [
{
"name" : "kk",
"value" : "55"
},
{
"name" : "gg",
"value" : "55"
}
]
}
},
{
"_index" : "devicelog_01",
"_type" : "log",
"_id" : "0005",
"_score" : 1.0,
"_source" : {
"systemId" : "000005",
"OperationDateTime" : 1583580843000,
"Items" : [
{
"name" : "kk",
"value" : "66"
},
{
"name" : "gg",
"value" : "66"
}
]
}
}
]
},
"aggregations" : {
"y1" : {
"buckets" : [
{
"key_as_string" : "2020-03-02",
"key" : 1583078400000,
"doc_count" : 1
},
{
"key_as_string" : "2020-03-03",
"key" : 1583164800000,
"doc_count" : 0
},
{
"key_as_string" : "2020-03-04",
"key" : 1583251200000,
"doc_count" : 1
},
{
"key_as_string" : "2020-03-05",
"key" : 1583337600000,
"doc_count" : 1
},
{
"key_as_string" : "2020-03-06",
"key" : 1583424000000,
"doc_count" : 1
},
{
"key_as_string" : "2020-03-07",
"key" : 1583510400000,
"doc_count" : 1
}
]
}
}
}
查询示例
PUT devicelog_02
{
"mappings" : {
"log" : {
"properties" : {
"Items" : {
"properties" : {
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"value" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"OperationDateTime" : {
"type" : "date"
},
"systemId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
PUT /devicelog_02/log/1583224443000
{
"systemId":"2020-03-03, 16:34:03",
"OperationDateTime": 1583224443000,
"Items": [
{"name":"kk","value":"2020-03-06, 22:34:03"},
{"name":"gg","value":"11"},
{"dd":"111" ,"value": "22"}
]
}
GET devicelog_02/_search
GET devicelog_02/_search
{
"size": 0,
"_source": false,
"aggs": {
"group_by_time": {
"date_histogram": {
"field": "OperationDateTime",
"interval": "1d",
"time_zone": "Asia/Shanghai"
},
"aggs": {
"details": {
"top_hits": {
"size": 1,
"_source": ["systemId","Items"],
"sort": [{
"OperationDateTime": {
"order": "desc"
}
}]
}
}
}
}
}
}
https://blog.csdn.net/Z446136354/article/details/97245114
GET devicelog_01/_search
GET devicelog_01/_search
{
"query": {
"bool": {
"must": [
{"term": {
"Items.name": {
"value": "yan"
}
}}
]
}
}
}
聚合测试
设置映射
View Code批量插入数据
View Code查询
View Codejava代码查询
View Code参考资料
https://www.cnblogs.com/jatpeo/p/11767473.html
https://www.kgraph.cn/220.html
https://my.oschina.net/muziH?tab=newest&catalogId=523874
nested
https://blog.csdn.net/u012332735/article/details/62222953/
es列表对象统计
https://blog.csdn.net/muzizongheng/article/details/85228987


浙公网安备 33010602011771号