ES基础
一、概念
1.NRT 近实时,基本上是实时的 2.Cluster 集群 3.Node 节点 4.document&filed 5.Index 6.Type 7shard 8replica

二、简单的集群操作和crud
查看集群将抗状态
GET /_cat/health?v
查看索引
GET /_cat/indices?v
删除索引 DELETE /test_index 添加索引和记录 PUT /test_index/test_type/1 { "name":"zhangsan", "age":12, "gender":"man" } 获取记录 GET /test_index/test_type/1 #替换必须带上所有字段 PUT /test_index/test_type/1 { "name":"lisi", "age":12, "gender":"man" }
三、搜索
PUT /ecommerce/product/1
{
"name" : "gaolujie yagao",
"desc" : "gaoxiao meibai",
"price" : 30,
"producer" : "gaolujie producer",
"tags": [ "meibai", "fangzhu" ]
}
PUT /ecommerce/product/2
{
"name" : "jiajieshi yagao",
"desc" : "youxiao fangzhu",
"price" : 25,
"producer" : "jiajieshi producer",
"tags": [ "fangzhu" ]
}
PUT /ecommerce/product/3
{
"name" : "zhonghua yagao",
"desc" : "caoben zhiwu",
"price" : 40,
"producer" : "zhonghua producer",
"tags": [ "qingxin" ]
}
GET /ecommerce/product/_search
{
"query": {"match_all": {}}
}
#page from 0
GET ecommerce/product/_search
{
"query": {"match": {
"name": "yagao"
}},
"_source": ["name","price","producer"],
"sort": [
{
"price": {
"order": "desc"
}
}
],"from": 0,"size": 2
}
#搜索商品名称包含yagao,而且售价大于25元的商品
GET /ecommerce/product/_search
{
"query" : {
"bool" : {
"must" : {
"match" : {
"name" : "yagao"
}
},
"filter" : {
"range" : {
"price" : { "gt" : 25 }
}
}
}
}
}
GET /ecommerce/product/_search
{
"query": {
"bool": {
"must": {
"match": {
"name": "yagao"
}
},
"filter":{
"range":{
"price":{"gt":25}
}
}
}
}
}
GET /ecommerce/product/_search
{
"query": {
"match": {
"producer": "zhonghua producer"
}
}
}
GET /ecommerce/product/_search
{
"query": {
"match_phrase": {
"producer": "gaolujie producer"
}
}
}
GET /ecommerce/product/_search
{
"query": {
"match": {
"producer": "zhonghua producer"
}
},
"highlight": {"fields": {"producer": {}}}
}
四、聚合搜索
GET /ecommerce/product/_search
PUT /ecommerce/_mapping/product
{
"properties": {"tags":{"type":"text","fielddata": true}}
}
GET /ecommerce/product/_search
{
"aggs": {
"group_by_tags": {
"terms": {
"field": "tags"
}
}
}
}
GET /ecommerce/product/_search
{
"query": {"match_phrase": {
"name": "yagao"
}},
"aggs": {
"groupbyprice":{
"range": {
"field": "price",
"ranges": [
{
"from": 0,
"to": 20
},{
"from": 20,
"to": 40
},{
"from": 40,
"to": 50
}
]
}
},
"groupbytags": {
"terms": {
"field": "tags","order": {
"avg_price": "asc"
}
},"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0.25811607,
"hits": [
{
"_index": "ecommerce",
"_type": "product",
"_id": "2",
"_score": 0.25811607,
"_source": {
"name": "jiajieshi yagao",
"desc": "youxiao fangzhu",
"price": 25,
"producer": "jiajieshi producer",
"tags": [
"fangzhu"
]
}
},
{
"_index": "ecommerce",
"_type": "product",
"_id": "1",
"_score": 0.25811607,
"_source": {
"name": "gaolujie yagao",
"desc": "gaoxiao meibai",
"price": 30,
"producer": "gaolujie producer",
"tags": [
"meibai",
"fangzhu"
]
}
},
{
"_index": "ecommerce",
"_type": "product",
"_id": "3",
"_score": 0.25811607,
"_source": {
"name": "zhonghua yagao",
"desc": "caoben zhiwu",
"price": 40,
"producer": "zhonghua producer",
"tags": [
"qingxin"
]
}
}
]
},
"aggregations": {
"groupbyprice": {
"buckets": [
{
"key": "0.0-20.0",
"from": 0,
"to": 20,
"doc_count": 0
},
{
"key": "20.0-40.0",
"from": 20,
"to": 40,
"doc_count": 2
},
{
"key": "40.0-50.0",
"from": 40,
"to": 50,
"doc_count": 1
}
]
},
"groupbytags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "fangzhu",
"doc_count": 2,
"avg_price": {
"value": 27.5
}
},
{
"key": "meibai",
"doc_count": 1,
"avg_price": {
"value": 30
}
},
{
"key": "qingxin",
"doc_count": 1,
"avg_price": {
"value": 40
}
}
]
}
}
}
五、字段和乐观锁
_id可以指定,不指定的话自动生成。
_source,可以指定显示字段
_type,_index索引和type
_version控制锁
#返回结果只包含name,price字段
GET /ecommerce/product/1?_source=name,price #创建索引 GET /my_index/my_type/1 { "test_field":"abc" }
#es,_version=1,?version=1,才能更新成功,两个版本号必须相同才能更新成功
#es,_version=1,?version>1&version_type=external,才能成功,比如说?version=2&version_type=external,版本号必须大于当前版本号
PUT /my_index/my_type/1?version=7&version_type=external
{ "test_field":"dad"}
六、脚本更新
GET /test_index/test_type/10
PUT /test_index/test_type/11/
{
"num":0,"tags":[],"tag":""
}
#新建test-add-tags.groovy文件,里面内容为 ctx._source.tag+=new_tag
#上面不加引号的为参数,
POST /test_index/test_type/11/_update
{
"script": {
"lang": "groovy",
"file": "test-add-tags",
"params": {
"new_tag":"tag1"
}
}
}
#如果存在10的数据,操作脚本,不存在插入。
POST /test_index/test_type/10/_update
{
"script": "ctx._source.num+=1"
,"upsert": {"num":0,"tag":[]}
}
七、批量查找和删除
#mget
GET /test_index/_mget
{
"docs":[
{ "_id":11},{ "_id":10}
]
}
PUT /_bulk
{"delete":{"_index":"test_index","_type":"test_type","_id":11}}
{"index":{"_index":"test_index","_type":"test_type","_id":"15"}}
{"num":10,"tag":[]}
PUT /test_index/test_type/_bulk
{"index":{"_id":15}}
{"num":15,"tag":[1,2]}
{"update":{"_id":15,"_retry_on_conflict":3}}
{"doc":{"num":16,"tag":[]}}
八、index的shard个数一经指定,不能改变
一般情况是hash(id)%shardNum=position 当shardNum改变时,就找不到document其位置。
九、es的数据写一致性如何保证的
在写请求时可以指定其活跃数来保证。put /index/type/id?consistency=quorum
consistency的枚举:one ,all(all shard),quorum=int( (primary + number_of_replicas) / 2 ) + 1,默认为quorum
当设置为quorum时,活跃数小于quorum时,就会等待 活跃数恢复到quorum,可以指定超时时间。put /index/type/id?consistency=quorum&timeout=30
十、mapping
es search?q=单词,es search?q=字段:单词
GET /test_index/test_type/_search?q=单词 ,当单词包含的特殊符号时,如-,等,也会对单词进行分词,所以可以full text,可以理解为部分匹配。
原理是在保存document时,会建立一个_all 字段,去匹配_all字段即可。
{
"name": "jack",
"age": 26,
"email": "jack@sina.com",
"address": "guamgzhou"
}
_all :"jack 26 jack@sina.com guangzhou",
有些字段类型,比如日期,在保存时,已经指定了为日期类型,不会分词,在es search?q=字段:单词时必须精确匹配。
PUT /website/article/1
{
"post_date": "2017-01-01",
"title": "my first article",
"content": "this is my first article in this website",
"author_id": 11400
}
PUT /website/article/2
{
"post_date": "2017-01-02",
"title": "my second article",
"content": "this is my second article in this website",
"author_id": 11400
}
PUT /website/article/3
{
"post_date": "2017-01-03",
"title": "my third article",
"content": "this is my third article in this website",
"author_id": 11400
}
GET /website/article/_search?q=2017-01-01 3个结果 (部分匹配,因为对2017-01-01进行拆分了)
GET /website/article/_search?q=post_date:2017-01-01 1 (时间为特殊类型,必须全量匹配)
GET /website/_mapping/article
{ "website": { "mappings": { "article": { "properties": { "author_id": { "type": "long" }, "content": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "post_date": { "type": "date" }, "title": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } } } } } } }
mapping,就是index的type的元数据,每个type都有一个自己的mapping,决定了数据类型,建立倒排索引的行为,还有进行搜索的行为
PUT /website { "mappings": { "article":{ "properties": { "author_id":{"type":"long"}, "title":{"type":"text","analyzer": "english"}, "content":{"type":"text","analyzer": "standard"}, "post_date":{"type":"date"}, "publisher_id":{"type":"text","index": "not_analyzed"} } } } }
只能新建mapping,和添加字段指定mapping,不能更新原有字段mapping。
object类型创建
{ "company": { "mappings": { "employee": { "properties": { "address": { "properties": { "city": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "country": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "province": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } } } }, "age": { "type": "long" }, "join_date": { "type": "date" }, "name": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } } } } } } }
本小节主要通过查询例子,然后引出mapping中的字段类型和倒排索引,full text和精确查询,来加深mapping的感性的认识,最后说明mapping如何创建。
十一、Query DSL
GET /website/article/_search { "query": { "bool": { "must": [ { "match": { "content": "this" } },{ "match": { "author_id": 11400 } } ] } } } GET /website/article/_search {"_source": "title", "query": { "bool": { "must": [ { "match": { "author_id": 11400 } } ], "should": [ { "match": { "content": "this" } } ], "must_not": [ {"match": { "_id": 3 }} ] } },"sort": [ { "post_date": { "order": "asc" } } ] }
GET /test_index/test_type/_search { "query": { "term": { "field1": "test2" } } } GET /test_index/test_type/_search { "query": { "terms": { "field": ["test2","test"] } } }
bool
must,must_not,should,filter
每个子查询都会计算一个document针对它的相关度分数,然后bool综合所有分数,合并为一个分数,当然filter是不会计算分数的
{ "bool": { "must": { "match": { "title": "how to make millions" }}, "must_not": { "match": { "tag": "spam" }}, "should": [ { "match": { "tag": "starred" }} ], "filter": { "bool": { "must": [ { "range": { "date": { "gte": "2014-01-01" }}}, { "range": { "price": { "lte": 29.99 }}} ], "must_not": [ { "term": { "category": "ebooks" }} ] } } } }
十二、scroll批量查询和使用场景(reindex)
批量查询指定页数,类似分段查找。
GET /my_index/_search/?scroll=1m { "query":{ "match_all": {} }, "sort":["_doc"], "size":1 } GET /_search/scroll { "scroll": "1m", "scroll_id" : "DnF1ZXJ5VGhlbkZldGNoBQAAAAAAAAr4Fi1qYWV6ZUtxVC1tX1B2U0J5YTNEa1EAAAAAAAAK9BYtamFlemVLcVQtbV9QdlNCeWEzRGtRAAAAAAAACvYWLWphZXplS3FULW1fUHZTQnlhM0RrUQAAAAAAAAr3Fi1qYWV6ZUtxVC1tX1B2U0J5YTNEa1EAAAAAAAAK9RYtamFlemVLcVQtbV9QdlNCeWEzRGtR" }
如果想全部index重新建立新索引。可以根据上面的查询,然后批量插入到一个新的索引中,最终通过批量操作,把原来索引的别名指向新索引即可。
PUT my_index_new/my_type/1 { "type":"2017-01-02" } PUT my_index_new/my_type/2 { "type":"2017-01-03" } POST /_bulk { {"index":{ "_index": "my_index_new", "_type": "my_type", "_id": "2" }} {"title":"2017-01-02"} } POST /_aliases { "actions": [ { "remove": { "index": "my_index", "alias": "good_index" }}, { "add": { "index": "my_index_new", "alias": "good_index" }} ] }
es流程

上图中,当机器故障时,有os cache中的数据丢失,可以冲translog回放回到os cache中。disk保存的是上一个commit点的数据。目前的数据=disk+osche数据。

浙公网安备 33010602011771号