ELK七：match

PUT t1/doc/1
{
  "title": "中国是世界上人口最多的国家"
}
PUT t1/doc/2
{
  "title": "美国是世界上军事实力最强大的国家"
}
PUT t1/doc/3
{
  "title": "北京是中国的首部"
}

GET t1/doc/_search
{
  "query": {
    "match": {
      "title": "中国"
    }
  }
}

{
  "took" : 7,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 3,
      "relation" : "eq"
    },
    "max_score" : 0.74627537,
    "hits" : [
      {
        "_index" : "t1",
        "_type" : "doc",
        "_id" : "3",
        "_score" : 0.74627537,
        "_source" : {
          "title" : "北京是中国的首部"
        }
      },
      {
        "_index" : "t1",
        "_type" : "doc",
        "_id" : "1",
        "_score" : 0.6625109,
        "_source" : {
          "title" : "中国是世界上人口最多的国家"
        }
      },
      {
        "_index" : "t1",
        "_type" : "doc",
        "_id" : "2",
        "_score" : 0.11290484,
        "_source" : {
          "title" : "美国是世界上军事实力最强大的国家"
        }
      }
    ]
  }

以上查询结果，怎么会把第2条记录也查出来？

是因为分词器的原因，将“中国”两个字拆分的结果。

查看分析器：

GET _analyze
{
  "analyzer": "standard",
  "text": "中国"
}

{
  "tokens" : [
    {
      "token" : "中",
      "start_offset" : 0,
      "end_offset" : 1,
      "type" : "<IDEOGRAPHIC>",
      "position" : 0
    },
    {
      "token" : "国",
      "start_offset" : 1,
      "end_offset" : 2,
      "type" : "<IDEOGRAPHIC>",
      "position" : 1
    }
  ]
}

怎么才能将"中国"两个字作为一个短语查询呢？使用match_phrase

1.短语匹配：match_phrase

GET t1/doc/_search
{
  "query": {
    "match_phrase": {
      "title": "中国"
    }
  }
}

{
  "took" : 67,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : 0.7462754,
    "hits" : [
      {
        "_index" : "t1",
        "_type" : "doc",
        "_id" : "3",
        "_score" : 0.7462754,
        "_source" : {
          "title" : "北京是中国的首部"
        }
      },
      {
        "_index" : "t1",
        "_type" : "doc",
        "_id" : "1",
        "_score" : 0.6299123,
        "_source" : {
          "title" : "中国是世界上人口最多的国家"
        }
      }
    ]
  }

GET t1/doc/_search
{
  "query": {
    "match_phrase": {
      "title": "中国是世界上"
    }
  }
}

{
  "took" : 5,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 2.100626,
    "hits" : [
      {
        "_index" : "t1",
        "_type" : "doc",
        "_id" : "1",
        "_score" : 2.100626,
        "_source" : {
          "title" : "中国是世界上人口最多的国家"
        }
      }
    ]
  }
}

假设需要查询同时包括"中国世界"这四个字的呢？直接作为短语查询，结果为空

GET t1/doc/_search
{
  "query": {
    "match_phrase": {
      "title": "中国世界"
    }
  }
}

{
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 0,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  }
}

怎么办？将"中国世界"作为短语查询，同时使用query加slop(表示间隔)为1

GET t1/doc/_search
{
  "query": {
    "match_phrase": {
      "title" : {
        "query": "中国世界",
        "slop": 1
      }
    }
  }
}

2.最左前缀查询：match_phrase_prefix

GET t1/doc/_search
{
  "query": {
    "match_phrase_prefix": {
      "title" :  "中国"
      }
    }
  }
}

{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : 0.70484585,
    "hits" : [
      {
        "_index" : "t1",
        "_type" : "doc",
        "_id" : "3",
        "_score" : 0.70484585,
        "_source" : {
          "title" : "北京是中国的首部"
        }
      },
      {
        "_index" : "t1",
        "_type" : "doc",
        "_id" : "1",
        "_score" : 0.59047776,
        "_source" : {
          "title" : "中国是世界上人口最多的国家"
        }
      }
    ]
  }

GET t1/doc/_search
{
  "query": {
    "match_phrase_prefix": {
      "title" :  "中国是世界上"
      }
    }
  }
}

{
  "took" : 163,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 2.100626,
    "hits" : [
      {
        "_index" : "t1",
        "_type" : "doc",
        "_id" : "1",
        "_score" : 2.100626,
        "_source" : {
          "title" : "中国是世界上人口最多的国家"
        }
      }
    ]
  }
}

3.同时匹配多个字段：multi_match

PUT t3/doc/1
{
  "t1": "beautiful china",
  "t2": "beautiful bj"
}
GET t3/doc/_search
{
  "query": {
    "multi_match": {
      "query" :  "beautiful",
      "fields": ["t1", "t2"]
      }
    }
  }
}

在使用multi_match时，使用type同时支持短语查询、前缀查询

GET t3/doc/_search
{
  "query": {
    "multi_match": {
      "query" :  "beautiful",
      "fields": ["t1", "t2"],
      "type": "phrase_prefix"
      }
    }
  }
}

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 0.2876821,
    "hits" : [
      {
        "_index" : "t3",
        "_type" : "doc",
        "_id" : "1",
        "_score" : 0.2876821,
        "_source" : {
          "t1" : "beautiful china",
          "t2" : "beautiful bj"
        }
      }
    ]
  }
}

posted on 2018-04-07 13:33 myworldworld 阅读(289) 评论(0) 收藏举报

刷新页面返回顶部

myworldworld

ELK七：match

1.短语匹配：match_phrase

2.最左前缀查询：match_phrase_prefix

3.同时匹配多个字段：multi_match

导航

公告