ES基础

一、概念

1.NRT 近实时，基本上是实时的 2.Cluster 集群 3.Node 节点 4.document&filed 5.Index 6.Type 7shard 8replica

二、简单的集群操作和crud

查看集群将抗状态 
GET /_cat/health?v
查看索引 
GET /_cat/indices?v
 
删除索引
DELETE /test_index
添加索引和记录
PUT /test_index/test_type/1
{
  "name":"zhangsan",
  "age":12,
  "gender":"man"
}
获取记录
GET /test_index/test_type/1
#替换必须带上所有字段 
PUT /test_index/test_type/1
{
  "name":"lisi",
  "age":12,
  "gender":"man"
}

三、搜索

PUT /ecommerce/product/1
{
    "name" : "gaolujie yagao",
    "desc" :  "gaoxiao meibai",
    "price" :  30,
    "producer" :      "gaolujie producer",
    "tags": [ "meibai", "fangzhu" ]
}

PUT /ecommerce/product/2
{
    "name" : "jiajieshi yagao",
    "desc" :  "youxiao fangzhu",
    "price" :  25,
    "producer" :      "jiajieshi producer",
    "tags": [ "fangzhu" ]
}

PUT /ecommerce/product/3
{
    "name" : "zhonghua yagao",
    "desc" :  "caoben zhiwu",
    "price" :  40,
    "producer" :      "zhonghua producer",
    "tags": [ "qingxin" ]
}

GET /ecommerce/product/_search
{
  "query": {"match_all": {}}
}
#page from 0 
GET ecommerce/product/_search
{
  "query": {"match": {
    "name": "yagao"
  }},
  "_source": ["name","price","producer"], 
  "sort": [
    {
      "price": {
        "order": "desc"
      }
    }
  ],"from": 0,"size": 2
}
#搜索商品名称包含yagao，而且售价大于25元的商品
GET /ecommerce/product/_search
{
    "query" : {
        "bool" : {
            "must" : {
                "match" : {
                    "name" : "yagao" 
                }
            },
            "filter" : {
                "range" : {
                    "price" : { "gt" : 25 } 
                }
            }
        }
    }
}

GET /ecommerce/product/_search
{
  "query": {
    "bool": {
      "must": {
        "match": {
          "name": "yagao"
        }
      },
      
      "filter":{
        "range":{
          "price":{"gt":25}
        }
      }
      
    }
  }
}


GET /ecommerce/product/_search
{
  "query": {
    "match": {
      "producer": "zhonghua producer"
    }
  }
}
GET /ecommerce/product/_search
{
  "query": {
    "match_phrase": {
      "producer": "gaolujie producer"
    }
  }
}


GET /ecommerce/product/_search
{
  "query": {
    "match": {
      "producer": "zhonghua producer"
    }
  },
  "highlight": {"fields": {"producer": {}}}
}

四、聚合搜索

GET /ecommerce/product/_search

PUT /ecommerce/_mapping/product
{
  "properties": {"tags":{"type":"text","fielddata": true}}
}


GET /ecommerce/product/_search
{
  "aggs": {
    "group_by_tags": {
      "terms": {
        "field": "tags"
       
      }
    }
  }
}


GET /ecommerce/product/_search
{
  "query": {"match_phrase": {
    "name": "yagao"
  }}, 
  "aggs": {
    "groupbyprice":{
      "range": {
        "field": "price",
        "ranges": [
          {
            "from": 0,
            "to": 20
          },{
            "from": 20,
            "to": 40
          },{
            "from": 40,
            "to": 50
          }
        ]
      }
    },
    
    "groupbytags": {
      "terms": {
        "field": "tags","order": {
          "avg_price": "asc"
        }
      },"aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 3,
    "max_score": 0.25811607,
    "hits": [
      {
        "_index": "ecommerce",
        "_type": "product",
        "_id": "2",
        "_score": 0.25811607,
        "_source": {
          "name": "jiajieshi yagao",
          "desc": "youxiao fangzhu",
          "price": 25,
          "producer": "jiajieshi producer",
          "tags": [
            "fangzhu"
          ]
        }
      },
      {
        "_index": "ecommerce",
        "_type": "product",
        "_id": "1",
        "_score": 0.25811607,
        "_source": {
          "name": "gaolujie yagao",
          "desc": "gaoxiao meibai",
          "price": 30,
          "producer": "gaolujie producer",
          "tags": [
            "meibai",
            "fangzhu"
          ]
        }
      },
      {
        "_index": "ecommerce",
        "_type": "product",
        "_id": "3",
        "_score": 0.25811607,
        "_source": {
          "name": "zhonghua yagao",
          "desc": "caoben zhiwu",
          "price": 40,
          "producer": "zhonghua producer",
          "tags": [
            "qingxin"
          ]
        }
      }
    ]
  },
  "aggregations": {
    "groupbyprice": {
      "buckets": [
        {
          "key": "0.0-20.0",
          "from": 0,
          "to": 20,
          "doc_count": 0
        },
        {
          "key": "20.0-40.0",
          "from": 20,
          "to": 40,
          "doc_count": 2
        },
        {
          "key": "40.0-50.0",
          "from": 40,
          "to": 50,
          "doc_count": 1
        }
      ]
    },
    "groupbytags": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "fangzhu",
          "doc_count": 2,
          "avg_price": {
            "value": 27.5
          }
        },
        {
          "key": "meibai",
          "doc_count": 1,
          "avg_price": {
            "value": 30
          }
        },
        {
          "key": "qingxin",
          "doc_count": 1,
          "avg_price": {
            "value": 40
          }
        }
      ]
    }
  }
}

　五、字段和乐观锁

　　_id可以指定，不指定的话自动生成。

　　_source，可以指定显示字段

　　_type,_index索引和type

　　_version控制锁

#返回结果只包含name,price字段
GET /ecommerce/product/1?_source=name,price

#创建索引
GET /my_index/my_type/1
{
  "test_field":"abc"
}

#es，_version=1，?version=1，才能更新成功,两个版本号必须相同才能更新成功
#es，_version=1，?version>1&version_type=external，才能成功，比如说?version=2&version_type=external，版本号必须大于当前版本号

PUT /my_index/my_type/1?version=7&version_type=external
{ "test_field":"dad"}

　六、脚本更新

GET /test_index/test_type/10

PUT /test_index/test_type/11/
{
  "num":0,"tags":[],"tag":""
}
#新建test-add-tags.groovy文件，里面内容为 ctx._source.tag+=new_tag
#上面不加引号的为参数，
POST /test_index/test_type/11/_update
{
  "script": {
    "lang": "groovy",
    "file": "test-add-tags",
    "params": {
      "new_tag":"tag1"
    }
  }
}
#如果存在10的数据，操作脚本，不存在插入。
POST /test_index/test_type/10/_update
{
  "script":  "ctx._source.num+=1"
  ,"upsert": {"num":0,"tag":[]}
}

　　七、批量查找和删除

#mget   
GET /test_index/_mget
{
  "docs":[
    { "_id":11},{ "_id":10}
    
    ]
}

PUT /_bulk
{"delete":{"_index":"test_index","_type":"test_type","_id":11}}
{"index":{"_index":"test_index","_type":"test_type","_id":"15"}}
{"num":10,"tag":[]}


PUT /test_index/test_type/_bulk
{"index":{"_id":15}}
{"num":15,"tag":[1,2]}
{"update":{"_id":15,"_retry_on_conflict":3}}
{"doc":{"num":16,"tag":[]}}

　　八、index的shard个数一经指定，不能改变

　　一般情况是hash(id)%shardNum=position 当shardNum改变时，就找不到document其位置。

　九、es的数据写一致性如何保证的

　　在写请求时可以指定其活跃数来保证。put /index/type/id?consistency=quorum

　　 consistency的枚举：one ,all（all shard），quorum=int( (primary + number_of_replicas) / 2 ) + 1，默认为quorum

　　当设置为quorum时，活跃数小于quorum时，就会等待活跃数恢复到quorum，可以指定超时时间。put /index/type/id?consistency=quorum&timeout=30

　十、mapping

es search?q=单词，es search?q=字段：单词

GET /test_index/test_type/_search?q=单词，当单词包含的特殊符号时，如-，等，也会对单词进行分词，所以可以full text，可以理解为部分匹配。

原理是在保存document时，会建立一个_all 字段，去匹配_all字段即可。

{

"name": "jack",
"age": 26,
"email": "jack@sina.com",
"address": "guamgzhou"
}

_all :"jack 26 jack@sina.com guangzhou"，

有些字段类型，比如日期，在保存时，已经指定了为日期类型，不会分词，在es  search?q=字段：单词时必须精确匹配。

PUT /website/article/1
{
"post_date": "2017-01-01",
"title": "my first article",
"content": "this is my first article in this website",
"author_id": 11400
}

PUT /website/article/2
{
"post_date": "2017-01-02",
"title": "my second article",
"content": "this is my second article in this website",
"author_id": 11400
}

PUT /website/article/3
{
"post_date": "2017-01-03",
"title": "my third article",
"content": "this is my third article in this website",
"author_id": 11400
}

GET /website/article/_search?q=2017-01-01 3个结果 （部分匹配，因为对2017-01-01进行拆分了）
GET /website/article/_search?q=post_date:2017-01-01 1 （时间为特殊类型，必须全量匹配）

GET /website/_mapping/article
{
  "website": {
    "mappings": {
      "article": {
        "properties": {
          "author_id": {
            "type": "long"
          },
          "content": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "post_date": {
            "type": "date"
          },
          "title": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      }
    }
  }
}

mapping，就是index的type的元数据，每个type都有一个自己的mapping，决定了数据类型，建立倒排索引的行为，还有进行搜索的行为

PUT /website
{
  "mappings": {
    "article":{
      "properties": {
        "author_id":{"type":"long"},
        "title":{"type":"text","analyzer": "english"},
        "content":{"type":"text","analyzer": "standard"},
        "post_date":{"type":"date"},
        "publisher_id":{"type":"text","index": "not_analyzed"}
      }
    }
  }
}

只能新建mapping,和添加字段指定mapping，不能更新原有字段mapping。

object类型创建

{
  "company": {
    "mappings": {
      "employee": {
        "properties": {
          "address": {
            "properties": {
              "city": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "country": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "province": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              }
            }
          },
          "age": {
            "type": "long"
          },
          "join_date": {
            "type": "date"
          },
          "name": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      }
    }
  }
}

本小节主要通过查询例子，然后引出mapping中的字段类型和倒排索引，full text和精确查询，来加深mapping的感性的认识，最后说明mapping如何创建。

十一、Query DSL

GET /website/article/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "content": "this"
          }
        },{
          "match": {
            "author_id": 11400
          }
        }
      ] 
    }
  }
}

GET /website/article/_search
{"_source": "title", 
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "author_id": 11400
          }
        }
      ],
      "should": [
        {
          "match": {
            "content": "this"
          }
        }
      ],
      "must_not": [
        {"match": {
          "_id": 3
        }}
      ]
    }
  },"sort": [
    {
      "post_date": {
        "order": "asc"
      }
    }
  ]
}

GET /test_index/test_type/_search
{
  "query": {
    "term": {
      "field1": "test2"
    }
  }
}

GET /test_index/test_type/_search
{
  "query": {
    "terms": 
      {
        "field": ["test2","test"]
      } 
  }
}

bool
must，must_not，should，filter

每个子查询都会计算一个document针对它的相关度分数，然后bool综合所有分数，合并为一个分数，当然filter是不会计算分数的

{
    "bool": {
        "must":     { "match": { "title": "how to make millions" }},
        "must_not": { "match": { "tag":   "spam" }},
        "should": [
            { "match": { "tag": "starred" }}
        ],
        "filter": {
          "bool": { 
              "must": [
                  { "range": { "date": { "gte": "2014-01-01" }}},
                  { "range": { "price": { "lte": 29.99 }}}
              ],
              "must_not": [
                  { "term": { "category": "ebooks" }}
              ]
          }
        }
    }
}

十二、scroll批量查询和使用场景（reindex）

批量查询指定页数，类似分段查找。

GET /my_index/_search/?scroll=1m
{
"query":{
"match_all": {}
},
"sort":["_doc"],
"size":1
}
GET /_search/scroll
{
"scroll": "1m", 
"scroll_id" : "DnF1ZXJ5VGhlbkZldGNoBQAAAAAAAAr4Fi1qYWV6ZUtxVC1tX1B2U0J5YTNEa1EAAAAAAAAK9BYtamFlemVLcVQtbV9QdlNCeWEzRGtRAAAAAAAACvYWLWphZXplS3FULW1fUHZTQnlhM0RrUQAAAAAAAAr3Fi1qYWV6ZUtxVC1tX1B2U0J5YTNEa1EAAAAAAAAK9RYtamFlemVLcVQtbV9QdlNCeWEzRGtR"
}

如果想全部index重新建立新索引。可以根据上面的查询，然后批量插入到一个新的索引中，最终通过批量操作，把原来索引的别名指向新索引即可。

PUT my_index_new/my_type/1
{
"type":"2017-01-02"
}
PUT my_index_new/my_type/2
{
"type":"2017-01-03"
}


POST /_bulk
{
{"index":{ "_index": "my_index_new", "_type": "my_type", "_id": "2" }}
{"title":"2017-01-02"}
}

 


POST /_aliases
{
"actions": [
{ "remove": { "index": "my_index", "alias": "good_index" }},
{ "add": { "index": "my_index_new", "alias": "good_index" }}
]
}

es流程

上图中，当机器故障时，有os cache中的数据丢失，可以冲translog回放回到os cache中。disk保存的是上一个commit点的数据。目前的数据=disk+osche数据。

posted @ 2020-05-30 21:26 javabeginer 阅读(251) 评论(0) 收藏举报

刷新页面返回顶部

javabeginer

ES基础

公告