10 基于dis_max实现best fields策略进行多字段搜索

      TF (Term Frequency): 基于词项(term vector), 用来表示一个词项在某个文档中出现多少次。词频越高,文档得分越高

   IDF (Inveres Dcoument Frequency): 基于词项(term vector)逆文档频率越高,词项就越罕见。 评分公式利用该因子为包含罕见词项的文档加权。

   构造数据

DELETE /forum

PUT /forum
{"settings":{"number_of_shards": 1}}

POST /forum/_bulk
{ "index": { "_id": 1 }}
{ "articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden": false, "postDate": "2017-01-01" }
{ "index": { "_id": 2 }}
{ "articleID" : "KDKE-B-9947-#kL5", "userID" : 1, "hidden": false, "postDate": "2017-01-02" }
{ "index": { "_id": 3 }}
{ "articleID" : "JODL-X-1937-#pV7", "userID" : 2, "hidden": false, "postDate": "2017-01-01" }
{ "index": { "_id": 4 }}
{ "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden": true, "postDate": "2017-01-02" }

POST /forum/_bulk
{"update":{"_id":"1"}}
{"doc":{"tag":["java","hadoop"]}}
{"update":{"_id":"2"}}
{"doc":{"tag":["java"]}}
{"update":{"_id":"3"}}
{"doc":{"tag":["hadoop"]}}
{"update":{"_id":"4"}}
{"doc":{"tag":["java","elasticsearch"]}}

POST /forum/_bulk
{"update":{"_id":"1"}}
{"doc":{"view_cnt":30}}
{"update":{"_id":"2"}}
{"doc":{"view_cnt":50}}
{"update":{"_id":"3"}}
{"doc":{"view_cnt":100}}
{"update":{"_id":"4"}}
{"doc":{"view_cnt":80}}

POST /forum/_bulk
{"index":{"_id":5}}
{"articleID":"DHJK-B-1395-#Ky5","userID":3,"hidden":false,"postDate":"2019-06-01","tag":["elasticsearch"],"tag_cnt":1,"view_cnt":10}

POST /forum/_bulk
{"update":{"_id":"5"}}
{"doc":{"postDate":"2019-05-01"}}


POST /forum/_bulk
{"update":{"_id":"1"}}
{"doc":{"title":"this is java and elasticsearch blog"}}
{"update":{"_id":"2"}}
{"doc":{"title":"this is java blog"}}
{"update":{"_id":"3"}}
{"doc":{"title":"this is elasticsearch blog"}}
{"update":{"_id":"4"}}
{"doc":{"title":"this is java, elasticsearch, hadoop blog"}}
{"update":{"_id":"5"}}
{"doc":{"title":"this is spark blog"}}


POST /forum/_bulk
{"update":{"_id":"1"}}
{"doc":{"content":"i like to write best elasticsearch article"}}
{"update":{"_id":"2"}}
{"doc":{"content":"i think java is the best programming language"}}
{"update":{"_id":"3"}}
{"doc":{"content":"i am only an elasticsearch beginner"}}
{"update":{"_id":"4"}}
{"doc":{"content":"elasticsearch and hadoop are all very good solution, i am a beginner"}}
{"update":{"_id":"5"}}
{"doc":{"content":"spark is best big data solution based on scala ,an programming language similar to java"}}

  普通查询

GET /forum/_search
{
  "query":{
    "bool":{
      "should":[
        {
          "match":{
            "title":"java solution"
          }
        },
        {
          "match":{
            "content":"java solution"
          }
        }
      ],
      "minimum_should_match": 1
    }
  }
}

  结果:

{
  "took" : 11,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 4,
      "relation" : "eq"
    },
    "max_score" : 1.5041151,
    "hits" : [
      {
        "_index" : "forum",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 1.5041151,
        "_source" : {
          "articleID" : "KDKE-B-9947-#kL5",
          "userID" : 1,
          "hidden" : false,
          "postDate" : "2017-01-02",
          "tag" : [
            "java"
          ],
          "view_cnt" : 50,
          "title" : "this is java blog",
          "content" : "i think java is the best programming language"
        }
      },
      {
        "_index" : "forum",
        "_type" : "_doc",
        "_id" : "5",
        "_score" : 1.4233949,
        "_source" : {
          "articleID" : "DHJK-B-1395-#Ky5",
          "userID" : 3,
          "hidden" : false,
          "postDate" : "2019-05-01",
          "tag" : [
            "elasticsearch"
          ],
          "tag_cnt" : 1,
          "view_cnt" : 10,
          "title" : "this is spark blog",
          "content" : "spark is best big data solution based on scala ,an programming language similar to java"
        }
      },
      {
        "_index" : "forum",
        "_type" : "_doc",
        "_id" : "4",
        "_score" : 1.2715201,
        "_source" : {
          "articleID" : "QQPX-R-3956-#aD8",
          "userID" : 2,
          "hidden" : true,
          "postDate" : "2017-01-02",
          "tag" : [
            "java",
            "elasticsearch"
          ],
          "view_cnt" : 80,
          "title" : "this is java, elasticsearch, hadoop blog",
          "content" : "elasticsearch and hadoop are all very good solution, i am a beginner"
        }
      },
      {
        "_index" : "forum",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 0.47728038,
        "_source" : {
          "articleID" : "XHDK-A-1293-#fJ3",
          "userID" : 1,
          "hidden" : false,
          "postDate" : "2017-01-01",
          "tag" : [
            "java",
            "hadoop"
          ],
          "view_cnt" : 30,
          "title" : "this is java and elasticsearch blog",
          "content" : "i like to write best elasticsearch article"
        }
      }
    ]
  }
}

  id=2的数据排在了前面,其实我们希望id=5的排在前面,毕竟id=5的数据 content字段既有java又有solution. 那看下dis_max吧

GET /forum/_search
{
  "query":{
    "dis_max":{
      "queries":[
        {
          "match":{
            "title":"java solution"
          }
        },
        {
          "match":{
            "content":"java solution"
          }
        }
      ]
    }
  }
}

  结果:

{
  "took" : 22,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 4,
      "relation" : "eq"
    },
    "max_score" : 1.4233949,
    "hits" : [
      {
        "_index" : "forum",
        "_type" : "_doc",
        "_id" : "5",
        "_score" : 1.4233949,
        "_source" : {
          "articleID" : "DHJK-B-1395-#Ky5",
          "userID" : 3,
          "hidden" : false,
          "postDate" : "2019-05-01",
          "tag" : [
            "elasticsearch"
          ],
          "tag_cnt" : 1,
          "view_cnt" : 10,
          "title" : "this is spark blog",
          "content" : "spark is best big data solution based on scala ,an programming language similar to java"
        }
      },
      {
        "_index" : "forum",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 0.9395274,
        "_source" : {
          "articleID" : "KDKE-B-9947-#kL5",
          "userID" : 1,
          "hidden" : false,
          "postDate" : "2017-01-02",
          "tag" : [
            "java"
          ],
          "view_cnt" : 50,
          "title" : "this is java blog",
          "content" : "i think java is the best programming language"
        }
      },
      {
        "_index" : "forum",
        "_type" : "_doc",
        "_id" : "4",
        "_score" : 0.7942397,
        "_source" : {
          "articleID" : "QQPX-R-3956-#aD8",
          "userID" : 2,
          "hidden" : true,
          "postDate" : "2017-01-02",
          "tag" : [
            "java",
            "elasticsearch"
          ],
          "view_cnt" : 80,
          "title" : "this is java, elasticsearch, hadoop blog",
          "content" : "elasticsearch and hadoop are all very good solution, i am a beginner"
        }
      },
      {
        "_index" : "forum",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 0.48898652,
        "_source" : {
          "articleID" : "XHDK-A-1293-#fJ3",
          "userID" : 1,
          "hidden" : false,
          "postDate" : "2017-01-01",
          "tag" : [
            "java",
            "hadoop"
          ],
          "view_cnt" : 30,
          "title" : "this is java and elasticsearch blog",
          "content" : "i like to write best elasticsearch article"
        }
      }
    ]
  }
}

  best fields策略 : 搜索到的结果,应该是某一个field中匹配到了尽可能多的关键词,被排在前面;而不是尽可能多的field匹配到了少数的关键词,排在了前面.

       dis_max语法,直接取多个query中,分数最高的那一个query的分数即可


 

posted on 2020-07-21 08:14  溪水静幽  阅读(117)  评论(0)    收藏  举报