DSL:聚合

当前文档内容

PUT devicelog_01
{
    "mappings" : {
      "log" : {
        "properties" : {
          "Items" : {
            "properties" : {
              "name" : {
                "type" : "text",
                "fields" : {
                  "keyword" : {
                    "type" : "keyword",
                    "ignore_above" : 256
                  }
                }
              },
              "value" : {
                "type" : "text",
                "fields" : {
                  "keyword" : {
                    "type" : "keyword",
                    "ignore_above" : 256
                  }
                }
              }
            }
          },
          "OperationDateTime" : {
            "type" : "date"
          },
          "systemId" : {
            "type" : "text",
            "fields" : {
              "keyword" : {
                "type" : "keyword",
                "ignore_above" : 256
              }
            }
          }
        }
      }
    }
}
View Code
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 5,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0001",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000001",
          "OperationDateTime" : 1583321643000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "12"
            },
            {
              "name" : "gg",
              "value" : "24"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0004",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000004",
          "OperationDateTime" : 1583494443000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "44"
            },
            {
              "name" : "gg",
              "value" : "44"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0002",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000002",
          "OperationDateTime" : 1583148843000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "11"
            },
            {
              "name" : "gg",
              "value" : "11"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0003",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000003",
          "OperationDateTime" : 1583408043000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "55"
            },
            {
              "name" : "gg",
              "value" : "55"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0005",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000005",
          "OperationDateTime" : 1583580843000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "66"
            },
            {
              "name" : "gg",
              "value" : "66"
            }
          ]
        }
      }
    ]
  }
}
View Code
GET libary/_mapping
GET libary/_search
PUT libary/book/003
{
  "name": "book3",
  "sales": 40.23,
  "number": "000003",
  "have": 50000,
  "nohave": 500
}
View Code
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 3,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "003",
        "_score" : 1.0,
        "_source" : {
          "name" : "book3",
          "sales" : 40.23,
          "number" : "000003",
          "have" : 50000,
          "nohave" : 500
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "001",
        "_score" : 1.0,
        "_source" : {
          "name" : "book1",
          "sales" : 10.23,
          "number" : "000001",
          "have" : 20000,
          "nohave" : 100
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "002",
        "_score" : 1.0,
        "_source" : {
          "name" : "book2",
          "sales" : 30.23,
          "number" : "000002",
          "have" : 30000,
          "nohave" : 300
        }
      }
    ]
  }
}
View Code

度量聚合

度量聚合接收一个输入文档集并生成至少一个统计值

min、max、sum、avg聚合

min、max、sum和avg聚合的使用很相似。它们对于给定字段分别返回最小值、最大值、总和和平均值。任何数值型字段都可以作为这些值的源。

max

GET libary/_search
{
  "aggs": {
    "max_have": {
      "max": {
        "field": "have"
      }
    }
  }
} 
View Code

结果

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 3,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "003",
        "_score" : 1.0,
        "_source" : {
          "name" : "book3",
          "sales" : 40.23,
          "number" : "000003",
          "have" : 50000,
          "nohave" : 500
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "001",
        "_score" : 1.0,
        "_source" : {
          "name" : "book1",
          "sales" : 10.23,
          "number" : "000001",
          "have" : 20000,
          "nohave" : 100
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "002",
        "_score" : 1.0,
        "_source" : {
          "name" : "book2",
          "sales" : 30.23,
          "number" : "000002",
          "have" : 30000,
          "nohave" : 300
        }
      }
    ]
  },
  "aggregations" : {
    "max_have" : {
      "value" : 50000.0
    }
  }
}
View Code

使用脚本

聚合使用的值是原始的have值上减去100

GET libary/_search
{
  "aggs": {
    "max_have": {
      "max": {
        "script": "doc['have'].value-100"
      }
    }
  }
}
View Code
{
  "took" : 61,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 3,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "003",
        "_score" : 1.0,
        "_source" : {
          "name" : "book3",
          "sales" : 40.23,
          "number" : "000003",
          "have" : 50000,
          "nohave" : 500
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "001",
        "_score" : 1.0,
        "_source" : {
          "name" : "book1",
          "sales" : 10.23,
          "number" : "000001",
          "have" : 20000,
          "nohave" : 100
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "002",
        "_score" : 1.0,
        "_source" : {
          "name" : "book2",
          "sales" : 30.23,
          "number" : "000002",
          "have" : 30000,
          "nohave" : 300
        }
      }
    ]
  },
  "aggregations" : {
    "max_have" : {
      "value" : 49900.0
    }
  }
}
View Code

还可以如下写

GET libary/_search
{
  "aggs": {
    "max_have": {
      "max": {
        "field": "have", 
        "script": "_value-100"
      }
    }
  }
}
View Code

value_count聚合

value_count聚合跟前面描述的聚合类似,只是输入字段不一定要是数值型的。

计算出某个字段出现的总的次数

GET libary/_search
{
  "aggs": {
    "number_of_items": {
      "value_count": {
        "field": "number"
      }
    }
  }
} 
View Code
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 3,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "003",
        "_score" : 1.0,
        "_source" : {
          "name" : "book3",
          "sales" : 40.23,
          "number" : "000003",
          "have" : 50000,
          "nohave" : 500
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "001",
        "_score" : 1.0,
        "_source" : {
          "name" : "book1",
          "sales" : 10.23,
          "number" : "000001",
          "have" : 20000,
          "nohave" : 100
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "002",
        "_score" : 1.0,
        "_source" : {
          "name" : "book2",
          "sales" : 30.23,
          "number" : "000002",
          "have" : 30000,
          "nohave" : 300
        }
      }
    ]
  },
  "aggregations" : {
    "number_of_items" : {
      "value" : 3
    }
  }
}
View Code

stats和extended_stats聚合

stats和extended_stats聚合可以看成是在单一聚合对象中返回所有前面描述聚合的一种聚合。

把各个统计都统计一遍

可以看到,除了已知的值,我们还得到平方和、方差和标准差统计。

GET libary/_search
{
  "aggs": {
    "stats_have": {
      "extended_stats": {
        "field": "have"
      }
    }
  }
} 
View Code
{
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 3,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "003",
        "_score" : 1.0,
        "_source" : {
          "name" : "book3",
          "sales" : 40.23,
          "number" : "000003",
          "have" : 50000,
          "nohave" : 500
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "001",
        "_score" : 1.0,
        "_source" : {
          "name" : "book1",
          "sales" : 10.23,
          "number" : "000001",
          "have" : 20000,
          "nohave" : 100
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "002",
        "_score" : 1.0,
        "_source" : {
          "name" : "book2",
          "sales" : 30.23,
          "number" : "000002",
          "have" : 30000,
          "nohave" : 300
        }
      }
    ]
  },
  "aggregations" : {
    "stats_have" : {
      "count" : 3,
      "min" : 20000.0,
      "max" : 50000.0,
      "avg" : 33333.333333333336,
      "sum" : 100000.0,
      "sum_of_squares" : 3.8E9,
      "variance" : 1.555555555555555E8,
      "std_deviation" : 12472.191289246468,
      "std_deviation_bounds" : {
        "upper" : 58277.71591182627,
        "lower" : 8388.9507548404
      }
    }
  }
}
View Code

桶聚合

桶聚合返回很多子集,并限定输入数据到一个特殊的叫做桶的子集中。

terms聚合

terms聚合为字段中每个词条返回一个桶。这允许你生成字段每个值的统计。

计算出have的每个值有多少个

GET libary/_search
{
  "aggs": {
    "availability": {
      "terms": {
        "field": "have"
      }
    }
  }
} 
View Code
{
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 3,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "003",
        "_score" : 1.0,
        "_source" : {
          "name" : "book3",
          "sales" : 40.23,
          "number" : "000003",
          "have" : 50000,
          "nohave" : 500
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "001",
        "_score" : 1.0,
        "_source" : {
          "name" : "book1",
          "sales" : 10.23,
          "number" : "000001",
          "have" : 20000,
          "nohave" : 100
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "002",
        "_score" : 1.0,
        "_source" : {
          "name" : "book2",
          "sales" : 30.23,
          "number" : "000002",
          "have" : 30000,
          "nohave" : 300
        }
      }
    ]
  },
  "aggregations" : {
    "availability" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : 20000,
          "doc_count" : 1
        },
        {
          "key" : 30000,
          "doc_count" : 1
        },
        {
          "key" : 50000,
          "doc_count" : 1
        }
      ]
    }
  }
}
View Code

为了使用key属性值对聚合排序,可以发送以下查询

可以按升序排(asc),也可以按降序排(desc)。在我们的例子中,使用key属性(_team)排序。另一个选择是_count,告诉Elasticsearch使用doc_count属性来排序。

GET libary/_search
{
  "aggs": {
    "availability": {
      "terms": {
        "field": "have",
        "size": 2,
        "order": {
          "_term": "asc"
        }
      }
    }
  }
} 
View Code

range聚合

range聚合使用定义的范围来创建桶。

GET libary/_search
{
  "aggs": {
    "range1": {
      "range": {
        "field": "have",
        "ranges": [
          {
            "from": 1000,
            "to": 40000
          },
          {
            "from": 40000,
            "to": 50001
          }
        ]
      }
    }
  }
}
View Code
{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 3,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "003",
        "_score" : 1.0,
        "_source" : {
          "name" : "book3",
          "sales" : 40.23,
          "number" : "000003",
          "have" : 50000,
          "nohave" : 500
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "001",
        "_score" : 1.0,
        "_source" : {
          "name" : "book1",
          "sales" : 10.23,
          "number" : "000001",
          "have" : 20000,
          "nohave" : 100
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "002",
        "_score" : 1.0,
        "_source" : {
          "name" : "book2",
          "sales" : 30.23,
          "number" : "000002",
          "have" : 30000,
          "nohave" : 300
        }
      }
    ]
  },
  "aggregations" : {
    "range1" : {
      "buckets" : [
        {
          "key" : "1000.0-40000.0",
          "from" : 1000.0,
          "to" : 40000.0,
          "doc_count" : 2
        },
        {
          "key" : "40000.0-50001.0",
          "from" : 40000.0,
          "to" : 50001.0,
          "doc_count" : 1
        }
      ]
    }
  }
}
View Code

创建用户界面时,可以为每个桶自动生成一个标签。打开此功能很简单:只需要添加keyed属性并将其设置为true

GET libary/_search
{
  "aggs": {
    "range1": {
      "range": {
        "field": "have",
        "keyed": true, 
        "ranges": [
          {
            "from": 1000,
            "to": 40000
          },
          {
            "from": 40000,
            "to": 50001
          }
        ]
      }
    }
  }
}
View Code
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 3,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "003",
        "_score" : 1.0,
        "_source" : {
          "name" : "book3",
          "sales" : 40.23,
          "number" : "000003",
          "have" : 50000,
          "nohave" : 500
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "001",
        "_score" : 1.0,
        "_source" : {
          "name" : "book1",
          "sales" : 10.23,
          "number" : "000001",
          "have" : 20000,
          "nohave" : 100
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "002",
        "_score" : 1.0,
        "_source" : {
          "name" : "book2",
          "sales" : 30.23,
          "number" : "000002",
          "have" : 30000,
          "nohave" : 300
        }
      }
    ]
  },
  "aggregations" : {
    "range1" : {
      "buckets" : {
        "1000.0-40000.0" : {
          "from" : 1000.0,
          "to" : 40000.0,
          "doc_count" : 2
        },
        "40000.0-50001.0" : {
          "from" : 40000.0,
          "to" : 50001.0,
          "doc_count" : 1
        }
      }
    }
  }
}
View Code

你可能已经注意到,结构略有变化,buckets字段不再是表,而是图,键值是从范围生成的。

这行得通,但不太漂亮。我们的例子中,给每个桶一个名称会更有用。这是可能的,我们可以为

每个范围添加key属性并把它的值设置为所需的名称

GET libary/_search
{
  "aggs": {
    "range1": {
      "range": {
        "field": "have",
        "keyed": true, 
        "ranges": [
          {
            "key": "key1", 
            "from": 1000,
            "to": 40000
          },
          {
            "key": "key2", 
            "from": 40000,
            "to": 50001
          }
        ]
      }
    }
  }
} 
View Code
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 3,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "003",
        "_score" : 1.0,
        "_source" : {
          "name" : "book3",
          "sales" : 40.23,
          "number" : "000003",
          "have" : 50000,
          "nohave" : 500
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "001",
        "_score" : 1.0,
        "_source" : {
          "name" : "book1",
          "sales" : 10.23,
          "number" : "000001",
          "have" : 20000,
          "nohave" : 100
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "002",
        "_score" : 1.0,
        "_source" : {
          "name" : "book2",
          "sales" : 30.23,
          "number" : "000002",
          "have" : 30000,
          "nohave" : 300
        }
      }
    ]
  },
  "aggregations" : {
    "range1" : {
      "buckets" : {
        "key1" : {
          "from" : 1000.0,
          "to" : 40000.0,
          "doc_count" : 2
        },
        "key2" : {
          "from" : 40000.0,
          "to" : 50001.0,
          "doc_count" : 1
        }
      }
    }
  }
}
View Code

date_range聚合

date_range聚合类似于前面讨论的range聚集,但它专用在使用日期类型的字段。

GET devicelog_01/_search
{
  "aggs": {
    "date1": {
      "date_range": {
        "field": "OperationDateTime",
        "ranges": [
          {
            "from": "2020-03-04",
            "to": "now"
          }
        ]
      }
    }
  }
}
View Code
{
  "took" : 4,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 5,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0001",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000001",
          "OperationDateTime" : 1583321643000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "12"
            },
            {
              "name" : "gg",
              "value" : "24"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0004",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000004",
          "OperationDateTime" : 1583494443000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "44"
            },
            {
              "name" : "gg",
              "value" : "44"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0002",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000002",
          "OperationDateTime" : 1583148843000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "11"
            },
            {
              "name" : "gg",
              "value" : "11"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0003",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000003",
          "OperationDateTime" : 1583408043000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "55"
            },
            {
              "name" : "gg",
              "value" : "55"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0005",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000005",
          "OperationDateTime" : 1583580843000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "66"
            },
            {
              "name" : "gg",
              "value" : "66"
            }
          ]
        }
      }
    ]
  },
  "aggregations" : {
    "date1" : {
      "buckets" : [
        {
          "key" : "2020-03-04T00:00:00.000Z-2020-04-04T13:35:24.625Z",
          "from" : 1.58328E12,
          "from_as_string" : "2020-03-04T00:00:00.000Z",
          "to" : 1.586007324625E12,
          "to_as_string" : "2020-04-04T13:35:24.625Z",
          "doc_count" : 4
        }
      ]
    }
  }
}
View Code

与普通的range聚合比较,唯一改变的是聚合类型(date_range)。可以用Elasticsearch认可的字符串格式传递日期(更多信息请参阅第2章),

或者用数值:自1970-01-01以来的毫秒数

Elasticsearch也允许使用format属性来定义日期格式。

GET devicelog_01/_search
{
  "aggs": {
    "date1": {
      "date_range": {
        "field": "OperationDateTime",
        "format": "YYYY-MM-dd", 
        "ranges": [
          {
            "from": "2020-03-05",
            "to": "now"
          }
        ]
      }
    }
  }
}
View Code
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 5,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0001",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000001",
          "OperationDateTime" : 1583321643000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "12"
            },
            {
              "name" : "gg",
              "value" : "24"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0004",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000004",
          "OperationDateTime" : 1583494443000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "44"
            },
            {
              "name" : "gg",
              "value" : "44"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0002",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000002",
          "OperationDateTime" : 1583148843000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "11"
            },
            {
              "name" : "gg",
              "value" : "11"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0003",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000003",
          "OperationDateTime" : 1583408043000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "55"
            },
            {
              "name" : "gg",
              "value" : "55"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0005",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000005",
          "OperationDateTime" : 1583580843000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "66"
            },
            {
              "name" : "gg",
              "value" : "66"
            }
          ]
        }
      }
    ]
  },
  "aggregations" : {
    "date1" : {
      "buckets" : [
        {
          "key" : "2020-03-05-2020-04-04",
          "from" : 1.5833664E12,
          "from_as_string" : "2020-03-05",
          "to" : 1.586007818589E12,
          "to_as_string" : "2020-04-04",
          "doc_count" : 3
        }
      ]
    }
  }
}
View Code

关于date_range聚合,还有一点。有时,我们会想要建立一个能随时间变化的聚合,

GET devicelog_01/_search
{
  "aggs": {
    "date1": {
      "date_range": {
        "field": "OperationDateTime",
        "format": "YYYY-MM-dd", 
        "ranges": [
          {
            "to": "now-9M/M"
          },
          {
            "from": "now-4M/M",
            "to": "now-1M/M"
          }
        ]
      }
    }
  }
}
View Code
{
  "took" : 11,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 5,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0001",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000001",
          "OperationDateTime" : 1583321643000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "12"
            },
            {
              "name" : "gg",
              "value" : "24"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0004",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000004",
          "OperationDateTime" : 1583494443000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "44"
            },
            {
              "name" : "gg",
              "value" : "44"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0002",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000002",
          "OperationDateTime" : 1583148843000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "11"
            },
            {
              "name" : "gg",
              "value" : "11"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0003",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000003",
          "OperationDateTime" : 1583408043000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "55"
            },
            {
              "name" : "gg",
              "value" : "55"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0005",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000005",
          "OperationDateTime" : 1583580843000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "66"
            },
            {
              "name" : "gg",
              "value" : "66"
            }
          ]
        }
      }
    ]
  },
  "aggregations" : {
    "date1" : {
      "buckets" : [
        {
          "key" : "*-2019-07-01",
          "to" : 1.5619392E12,
          "to_as_string" : "2019-07-01",
          "doc_count" : 0
        },
        {
          "key" : "2019-12-01-2020-03-01",
          "from" : 1.5751584E12,
          "from_as_string" : "2019-12-01",
          "to" : 1.5830208E12,
          "to_as_string" : "2020-03-01",
          "doc_count" : 0
        }
      ]
    }
  }
}
View Code

这里的关键是如now-9M的表达式。Elasticsearch使用数学生成相应的值。你可以使用y(年)、M(月)、w(周)、d(日)、h(小时)、m(分钟)和s(秒)。

例如,表达式now+3d表示现在起的3天后。在我们的示例中,/M表示只取已被转成月份的日期。由于这种表示法,我们可以只计算完整月。

第二个优点是计算的日期对缓存更友好,如果没有四舍五入,日期每一毫秒都更改,导致居于range的每个缓存都没有意义。

missing聚合

查看没有某个字段的条目是多少。

为此,我们使用missing聚合,在这种情况下它是个好东西。

GET libary/_search
{
  "aggs": {
    "missing1": {
      "missing": {
        "field": "KK"
      }
    }
  }
}
View Code
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 4,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "003",
        "_score" : 1.0,
        "_source" : {
          "name" : "book3",
          "sales" : 40.23,
          "number" : "000003",
          "have" : 50000,
          "nohave" : 500
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "006",
        "_score" : 1.0,
        "_source" : {
          "name" : "book8",
          "sales" : 40.23,
          "number" : "000003",
          "have" : 50000,
          "nohave" : 500,
          "KK" : 44
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "001",
        "_score" : 1.0,
        "_source" : {
          "name" : "book1",
          "sales" : 10.23,
          "number" : "000001",
          "have" : 20000,
          "nohave" : 100
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "002",
        "_score" : 1.0,
        "_source" : {
          "name" : "book2",
          "sales" : 30.23,
          "number" : "000002",
          "have" : 30000,
          "nohave" : 300
        }
      }
    ]
  },
  "aggregations" : {
    "missing1" : {
      "doc_count" : 3
    }
  }
}
View Code

nested聚合

4.3节介绍了嵌套文档。使用这个数据来看看下一种聚合类型:nested聚合。

IPv4 range聚合

range聚合的最后一个形式是基于互联网地址的聚合。它工作在定义成ip类型的字段上,

允许 以 CIDR的格式来定义IP范围( CIDR: http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing)。

histogram聚合

histogram聚合定义桶。

这里,新的信息片段是interval,它定义了将用于创建桶的每个范围的长度。

与range聚合一样,histogram聚合同样允许我们使用keyed属性。其他可用的选项是min_doc_count,使我们能够控制为创建一个桶需要的最小文档数目。

如果把min_doc_count属性设置为零,Elasticsearch还将包括文档数目为0的桶。

GET libary/_search
{
  "aggs": {
    "yy": {
      "histogram": {
        "field": "sales",
        "interval": 50
      }
    }
  }
}
View Code
{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 4,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "003",
        "_score" : 1.0,
        "_source" : {
          "name" : "book3",
          "sales" : 40.23,
          "number" : "000003",
          "have" : 50000,
          "nohave" : 500
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "006",
        "_score" : 1.0,
        "_source" : {
          "name" : "book8",
          "sales" : 40.23,
          "number" : "000003",
          "have" : 50000,
          "nohave" : 500,
          "KK" : 44
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "001",
        "_score" : 1.0,
        "_source" : {
          "name" : "book1",
          "sales" : 10.23,
          "number" : "000001",
          "have" : 20000,
          "nohave" : 100
        }
      },
      {
        "_index" : "libary",
        "_type" : "book",
        "_id" : "002",
        "_score" : 1.0,
        "_source" : {
          "name" : "book2",
          "sales" : 30.23,
          "number" : "000002",
          "have" : 30000,
          "nohave" : 300
        }
      }
    ]
  },
  "aggregations" : {
    "yy" : {
      "buckets" : [
        {
          "key" : 0.0,
          "doc_count" : 4
        }
      ]
    }
  }
}
View Code

与range聚合一样,histogram聚合同样允许我们使用keyed属性。其他可用的选项是min_doc_count,使我们能够控制为创建一个桶需要的最小文档数目。

如果把min_doc_count属性设置为零,Elasticsearch还将包括文档数目为0的桶。

date_histogram聚合

正如date_range聚合是range聚合的一种特殊形式,date_histogram聚合也是histogram聚合的一种扩展,专用在日期上。

GET devicelog_01/_search
{
  "aggs": {
    "y1": {
      "date_histogram": {
        "field": "OperationDateTime",
        "format": "yyyy-MM-dd", 
        "interval": "1d"
      }
    }
  }
}
View Code
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 5,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0001",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000001",
          "OperationDateTime" : 1583321643000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "12"
            },
            {
              "name" : "gg",
              "value" : "24"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0004",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000004",
          "OperationDateTime" : 1583494443000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "44"
            },
            {
              "name" : "gg",
              "value" : "44"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0002",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000002",
          "OperationDateTime" : 1583148843000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "11"
            },
            {
              "name" : "gg",
              "value" : "11"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0003",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000003",
          "OperationDateTime" : 1583408043000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "55"
            },
            {
              "name" : "gg",
              "value" : "55"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0005",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000005",
          "OperationDateTime" : 1583580843000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "66"
            },
            {
              "name" : "gg",
              "value" : "66"
            }
          ]
        }
      }
    ]
  },
  "aggregations" : {
    "y1" : {
      "buckets" : [
        {
          "key_as_string" : "2020-03-02",
          "key" : 1583107200000,
          "doc_count" : 1
        },
        {
          "key_as_string" : "2020-03-03",
          "key" : 1583193600000,
          "doc_count" : 0
        },
        {
          "key_as_string" : "2020-03-04",
          "key" : 1583280000000,
          "doc_count" : 1
        },
        {
          "key_as_string" : "2020-03-05",
          "key" : 1583366400000,
          "doc_count" : 1
        },
        {
          "key_as_string" : "2020-03-06",
          "key" : 1583452800000,
          "doc_count" : 1
        },
        {
          "key_as_string" : "2020-03-07",
          "key" : 1583539200000,
          "doc_count" : 1
        }
      ]
    }
  }
}
View Code

可以看到interval属性上一个重要的区别。它现在用一个字符串来描述时间间隔,在我们的例子中是10天。

当然,可以将它设置为任何值,使用与在date_range聚合中讨论过的格式相同的后缀。

值得一提的是,数字可以是一个浮点值,例如1.5m,即每1.5分钟。format属性跟

date_range聚合中一样,归功于此,Elasticsearch可以根据定义的格式添加一个人类可读的日期。

当然,format属性不是必需的,但它是有用的。除此以外,类似于range聚合,keyed和min_doc_count属性仍然有效。

时区

Elasticsearch将所有日期存储成UTC时区。你可以定义用于显示的时区。日期转换有两种方法,

可以在把元素分配给桶之前转换日期,也可以在分配之后转换。因此,取决于所选方法和桶的定

义,一个元素可能分配给不同的桶。有两个属性定义此行为:pre_zone和post_zone。此外,

还有一个time_zone,基本上用来设置pre_zone属性的值。有如下三种符号来设置这些属性。

 可以设置小时偏移,例如:pre_zone:-4或time_zone:5;

 可以使用时间格式,例如:pre_zone:"-4:30";

 可以使用时区的名字,例如:time_zone:"Europe/Warsaw"。 

查看时区:http://joda-time.sourceforge.net/timezones.html

GET devicelog_01/_search
{
  "aggs": {
    "y1": {
      "date_histogram": {
        "field": "OperationDateTime",
        "format": "yyyy-MM-dd", 
        "interval": "1d",
        "time_zone": "Asia/Shanghai"
      }
    }
  }
}
View Code
{
  "took" : 5,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 5,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0001",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000001",
          "OperationDateTime" : 1583321643000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "12"
            },
            {
              "name" : "gg",
              "value" : "24"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0004",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000004",
          "OperationDateTime" : 1583494443000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "44"
            },
            {
              "name" : "gg",
              "value" : "44"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0002",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000002",
          "OperationDateTime" : 1583148843000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "11"
            },
            {
              "name" : "gg",
              "value" : "11"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0003",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000003",
          "OperationDateTime" : 1583408043000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "55"
            },
            {
              "name" : "gg",
              "value" : "55"
            }
          ]
        }
      },
      {
        "_index" : "devicelog_01",
        "_type" : "log",
        "_id" : "0005",
        "_score" : 1.0,
        "_source" : {
          "systemId" : "000005",
          "OperationDateTime" : 1583580843000,
          "Items" : [
            {
              "name" : "kk",
              "value" : "66"
            },
            {
              "name" : "gg",
              "value" : "66"
            }
          ]
        }
      }
    ]
  },
  "aggregations" : {
    "y1" : {
      "buckets" : [
        {
          "key_as_string" : "2020-03-02",
          "key" : 1583078400000,
          "doc_count" : 1
        },
        {
          "key_as_string" : "2020-03-03",
          "key" : 1583164800000,
          "doc_count" : 0
        },
        {
          "key_as_string" : "2020-03-04",
          "key" : 1583251200000,
          "doc_count" : 1
        },
        {
          "key_as_string" : "2020-03-05",
          "key" : 1583337600000,
          "doc_count" : 1
        },
        {
          "key_as_string" : "2020-03-06",
          "key" : 1583424000000,
          "doc_count" : 1
        },
        {
          "key_as_string" : "2020-03-07",
          "key" : 1583510400000,
          "doc_count" : 1
        }
      ]
    }
  }
}
View Code

查询示例

PUT devicelog_02
{
    "mappings" : {
      "log" : {
        "properties" : {
          "Items" : {
            "properties" : {
              "name" : {
                "type" : "text",
                "fields" : {
                  "keyword" : {
                    "type" : "keyword",
                    "ignore_above" : 256
                  }
                }
              },
              "value" : {
                "type" : "text",
                "fields" : {
                  "keyword" : {
                    "type" : "keyword",
                    "ignore_above" : 256
                  }
                }
              }
            }
          },
          "OperationDateTime" : {
            "type" : "date"
          },
          "systemId" : {
            "type" : "text",
            "fields" : {
              "keyword" : {
                "type" : "keyword",
                "ignore_above" : 256
              }
            }
          }
        }
      }
    }
}
View Code
PUT /devicelog_02/log/1583224443000
{
  "systemId":"2020-03-03, 16:34:03",
  "OperationDateTime": 1583224443000,
  "Items": [
    {"name":"kk","value":"2020-03-06, 22:34:03"},
    {"name":"gg","value":"11"},
    {"dd":"111" ,"value": "22"}
    ]
}
View Code
GET devicelog_02/_search
GET devicelog_02/_search
{
  "size": 0, 
  "_source": false, 
  "aggs": {
    "group_by_time": {
      "date_histogram": {
        "field": "OperationDateTime",
        "interval": "1d",
        "time_zone": "Asia/Shanghai"
      },
      "aggs": {
        "details": {
          "top_hits": {
            "size": 1,
            "_source": ["systemId","Items"],
            "sort": [{
              "OperationDateTime": {
                "order": "desc"
              }
            }]
          }
        }
      }
    }
  }
}
View Code

https://blog.csdn.net/Z446136354/article/details/97245114

 

GET devicelog_01/_search
GET devicelog_01/_search
{
  "query": {
    "bool": {
      "must": [
        {"term": {
          "Items.name": {
            "value": "yan"
          }
        }}
      ]
    }
  }
}
View Code

聚合测试

设置映射

View Code

批量插入数据

View Code

查询

View Code

java代码查询

View Code

参考资料

https://www.cnblogs.com/jatpeo/p/11767473.html

https://www.kgraph.cn/220.html

https://my.oschina.net/muziH?tab=newest&catalogId=523874

nested

https://blog.csdn.net/u012332735/article/details/62222953/

es列表对象统计

https://blog.csdn.net/muzizongheng/article/details/85228987

elasticsearch大量数据聚合优化思考

elasticsearch聚合原理分析

ElasticSearch中composite聚合的使用

 

 

posted @ 2020-04-04 20:47  弱水三千12138  阅读(295)  评论(0)    收藏  举报