查询建议

自动补全

针对自动补全场景而设计的建议器。此场景下用户每输入一个字符的时候，就需要即时发送一次查询请求到后端查找匹配项，在用户输入速度较高的情况下对后端响应速度要求比较苛刻。因此实现上它和前面两个Suggester采用了不同的数据结构，索引并非通过倒排来完成，而是将analyze过的数据编码成FST和索引一起存放。对于一个open状态的索引，FST会被ES整个装载到内存里的，进行前缀查找速度极快。但是FST只能用于前缀查找，这也是Completion Suggester的局限所在

建立索引

PUT /music
{
    "mappings": {
        "properties" : {
            "suggest" : { 
                 "type" : "completion"
             },
              "title" : {
                  "type": "keyword"
              }
          }
      }
}

　　插入数据

POST /music/_doc/1
{
  "suggest":"test my book"
}

//指定不同的排序值：
PUT /music/_doc/2?refresh
{
  "suggest":[
    {
      "input":"test",
      "weight":10
    },
    {
      "input":"good",
      "weight":3
    }
  ]
}

　　查询建议根据前缀查询

POST /music/_search?pretty
{
  "suggest":{
    "song-suggest":{
      "prefix":"te",
      "completion":{
        "field":"suggest"
      }
    }
  }
}

{
  "took" : 23,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 0,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "suggest" : {
    "song-suggest" : [
      {
        "text" : "te",
        "offset" : 0,
        "length" : 2,
        "options" : [
          {
            "text" : "test",
            "_index" : "music",
            "_type" : "_doc",
            "_id" : "2",
            "_score" : 10.0,
            "_source" : {
              "suggest" : [
                {
                  "input" : "test",
                  "weight" : 10
                },
                {
                  "input" : "good",
                  "weight" : 3
                }
              ]
            }
          },
          {
            "text" : "test my book",
            "_index" : "music",
            "_type" : "_doc",
            "_id" : "1",
            "_score" : 1.0,
            "_source" : {
              "suggest" : "test my book"
            }
          }
        ]
      }
    ]
  }
}

View Code

　　对建议查询结果去重

{
    "suggest": {
        "song-suggest" : {
            "prefix" : "te", 
            "completion" : { 
                "field" : "suggest" ,
                 "skip_duplicates": true 
            }
        }
    }
}

填充数据

POST _bulk
{ "index" : { "_index" : "book"} }
{ "passage": "Lucene is cool"}
{ "index" : { "_index" : "book" } }
{ "passage": "Elasticsearch builds on top of lucene"}
{ "index" : { "_index" : "book" } }
{ "passage": "Elasticsearch rocks"}
{ "index" : { "_index" : "book" } }
{ "passage": "Elastic is the company behind ELK stack"}
{ "index" : { "_index" : "book"} }
{ "passage": "elk rocks"}
{ "index" : { "_index" : "book" } }
{  "passage": "elasticsearch is rock solid"}

Term suggester(根据编辑距离(edit distance) 给出提示、建议，给出的结果是单个词)

POST /book/_search
{
  "size": 1,
  "query": {
    "match": {
      "passage": "lucen rock"
    }
  },
  "suggest": {
    "term-suggestion": {
      "text": "lucen rock",
      "term": {
        "suggest_mode": "missing",
        "field": "passage"
      }
    }
  }
}

POST /book/_search
{
  "size": 1,
  "query": {
    "match": {
      "passage": "lucen rock"
    }
  },
  "suggest": {
    "term-suggestion": {
      "text": "lucen rock",
      "term": {
        "suggest_mode": "popular",
        "field": "passage"
      }
    }
  }
}

POST /book/_search
{
  "suggest": {
    "term-suggestion": {
      "text": "lucen rock",
      "term": {
        "suggest_mode": "always",
        "field": "passage",
        "prefix_length":0,
        "sort":"frequency"
      }
    }
  }
}


POST /book/_search
{
  "suggest": {
    "YOUR_SUGGESTION": {
      "text": "lucne and elasticsear rock hello world ",
      "phrase": {
        "field": "passage",
        "max_errors":2,
        "confidence":0,
        "direct_generator":[
          {
            "field": "passage",
            "suggest_mode": "always"
          }
        ],
        "highlight": {
          "pre_tag": "<em>",
          "post_tag": "</em>"
        }
      }
    }
  }
}

phrase suggester(自动补全短语，输入一个单词补全整个短语)

　在Term suggester的基础上，会考量多个term之间的关系，比如是否同时出现在索引的原文里，相邻程度，以及词频等等。给出的结果是一个句子

　测试用例

completion suggester(完成补全单词，输出如前半部分，补全整个单词)

定义特殊字段，用于提示不全。字段type需要定义为：completion, 数据会编码成FST，索引一起存放，FST都会加载到内存中。只能用于前缀补全。

　　 Completion Suggester提供自动完成/随类型搜索的功能。这是一种导航特性，可以在用户键入时引导他们找到相关结果，提高搜索精度

PUT articles
{
  "mappings": {
    "properties": {
      "title_completion":{
        "type":"completion"
      }
    }
  }
}

POST articles/_bulk
{ "index" : { } }
{ "title_completion": "lucene is very cool"}
{ "index" : { } }
{ "title_completion": "Elasticsearch builds on top of lucene"}
{ "index" : { } }
{ "title_completion": "Elasticsearch rocks"}
{ "index" : { } }
{ "title_completion": "elastic is the company behind ELK stack"}
{ "index" : { } }
{ "title_completion": "Elk stack rocks"}
{ "index" : {} }

POST articles/_search
{
  "size": 0,
  "suggest":{
    "article-suggester":{
      "prefix":"Elasticsearch ",
      "completion":{
        "field":"title_completion"
      }
    }
  }
}

响应

{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 0,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "suggest" : {
    "article-suggester" : [
      {
        "text" : "Elasticsearch ",
        "offset" : 0,
        "length" : 14,
        "options" : [
          {
            "text" : "Elasticsearch builds on top of lucene",
            "_index" : "articles",
            "_type" : "_doc",
            "_id" : "VnJYMYYBOroKrQQ-tMZd",
            "_score" : 1.0,
            "_source" : {
              "title_completion" : "Elasticsearch builds on top of lucene"
            }
          },
          {
            "text" : "Elasticsearch rocks",
            "_index" : "articles",
            "_type" : "_doc",
            "_id" : "V3JYMYYBOroKrQQ-tMZd",
            "_score" : 1.0,
            "_source" : {
              "title_completion" : "Elasticsearch rocks"
            }
          }
        ]
      }
    ]
  }
}

View Code

基于上下文的 suggest completion

PUT comments
PUT comments/_mappings
{
  "properties":{
    "comment_autocomplete":{
      "type":"completion",
      "contexts":[{
         "type":"category",
         "name":"comment_category"
      }]
    }
  }
}

POST comments/_doc
{
  "comment":"I love the star war movies",
  "comment_autocomplete":{
    "input":["star wars"],
    "contexts":{
      "comment_category":"movies"
    }
  }
}

POST comments/_doc
{
  "comment":"Where can I find a Starbucks",
  "comment_autocomplete":{
    "input":["starbucks"],
    "contexts":{
      "comment_category":"coffee"
    }
  }
}

GET comments/_search
{
  "suggest": {
    "YOUR_SUGGESTION": {
      "prefix": "sta",
      "completion": {
        "field": "comment_autocomplete",
        "contexts":{
          "comment_category":"coffee"
        }
      }
    }
  }
}

模糊搜索(Fuzzy queries)

POST news/_search
{
  "suggest": {
    "YOUR_SUGGESTION": {
      "text": "鄱a",
      "completion": {
        "field": "title.kw",
        "fuzzy": {
          "fuzziness": 1
        }
      }
    }
  }
}

----结果----
{
  "took" : 7,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 0,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "suggest" : {
    "YOUR_SUGGESTION" : [
      {
        "text" : "鄱a",
        "offset" : 0,
        "length" : 2,
        "options" : [
          {
            "text" : "鄱阳湖",
            "_index" : "news",
            "_type" : "_doc",
            "_id" : "6",
            "_score" : 3.0,
            "_source" : {
              "title" : "鄱阳湖",
              "body" : "鄱阳湖，古称彭蠡、彭蠡泽、彭泽，位于江西省北部，地处九江、南昌、上饶三市，是中国第一大淡水湖，也是中国第二大湖，仅次于青海湖"
            }
          }
        ]
      }
    ]
  }
}

正则查询（Regex queries）

　　完成提示符还支持正则表达式查询，这意味着可以将前缀表示为正则表达式

POST news/_search
{
  "suggest": {
    "YOUR_SUGGESTION": {
      "regex": "[p|w]y",
      "completion": {
        "field": "title.py"
      }
    }
  }
}

posted on 2020-06-13 18:14 溪水静幽阅读(149) 评论(0) 收藏举报