Elasticsearch拼音和ik分词器的结合应用

一、创建索引时，自定义拼音分词和ik分词

PUT /my_index
{
    "index": {
        "analysis": {
            "analyzer": {
                "ik_pinyin_analyzer": {  自定义分词name
                    "type": "custom",
                    "tokenizer": "ik_smart",
                    "filter": ["my_pinyin", "word_delimiter"]
                },
                "pinyin_analyzer": {
                    "type": "custom",
                    "tokenizer": "ik_max_word",
                    "filter": ["my_pinyin", "word_delimiter"]
                }
            },
            "filter": {
                "my_pinyin": {
                    "type" : "pinyin",
                    "keep_separate_first_letter" : false, 启用该选项时，将保留第一个字母分开，例如：刘德华> l，d，h，默认：false，注意：查询结果也许是太模糊，由于长期过频
                    "keep_full_pinyin" : true,  当启用该选项，例如：刘德华> [ liu，de，hua]，默认值：true
                    "keep_original" : true, 启用此选项时，也将保留原始输入，默认值：false
                    "limit_first_letter_length" : 16, 设置first_letter结果的最大长度，默认值：16
                    "lowercase" : true,  小写非中文字母，默认值：true
                    "remove_duplicated_term" : true  启用此选项后，将删除重复的术语以保存索引，例如：de的> de，default：false，注意：位置相关的查询可能会受到影响
} 
} 
} 
} 
}

二、创建mapping时，设置字段分词(注：相同索引下建不同的type时，相同字段名属性必须设一样)

POST /my_index/user/_mapping
{
    "user": {
        "properties": {
          "id":{
            "type":"integer"
          },
            "userName": {
              "type": "text",
              "store": "no",
              "term_vector": "with_positions_offsets",
              "analyzer": "ik_pinyin_analyzer",   自定义分词器name
              "boost": 10,
              "fielddata" : true,
              "fields": {
                    "raw": {
                        "type": "keyword"    设置keyword时，对该字段不进行分析
                    }
                }
            },
            "reason":{
              "type": "text",
              "store": "no",  字段store为true，这意味着这个field的数据将会被单独存储。这时候，如果你要求返回field1（store：yes），es会分辨出field1已经被存储了，因此不会从_source中加载，而是从field1的存储块中加载。
              "term_vector": "with_positions_offsets",
              "analyzer": "ik_pinyin_analyzer",
              "boost": 10
            }
        }
    }
}

测试

PUT /my_index/user/1
{
  "id":1,
  "userName":"刘德华",
  "reason":"大帅哥"
}

PUT /my_index/user/2
{
  "id":2,
  "userName":"刘德华",
  "reason":"中华人民"
}

不分词查询

GET /my_index/user/_search
{
  "query": {
    "match": {
      "userName.raw": "刘德华"
    }
  }
}


{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 0.2876821,
    "hits": [
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "2",
        "_score": 0.2876821,
        "_source": {
          "id": 2,
          "userName": "刘德华",
          "reason": "中华人民"
        }
      },
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "1",
        "_score": 0.2876821,
        "_source": {
          "id": 1,
          "userName": "刘德华",
          "reason": "大帅哥"
        }
      }
    ]
  }
}

分词查询

GET /my_index/user/_search
{
  "query": {
    "match": {
      "userName": "刘"
    }
  }
}

{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 0.31331712,
    "hits": [
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "2",
        "_score": 0.31331712,
        "_source": {
          "id": 2,
          "userName": "刘德华",
          "reason": "中华人民"
        }
      },
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "1",
        "_score": 0.31331712,
        "_source": {
          "id": 1,
          "userName": "刘德华",
          "reason": "大帅哥"
        }
      }
    ]
  }
}

拼音分词

GET /my_index/user/_search
{
  "query": {
    "match": {
      "reason": "shuai"
    }
  }
}


{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 3.4884284,
    "hits": [
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "1",
        "_score": 3.4884284,
        "_source": {
          "id": 1,
          "userName": "刘德华",
          "reason": "大帅哥"
        }
      }
    ]
  }
}

分组聚合

GET /my_index/user/_search
{ 
  "size":2,
  "query": {
    "match": {
      "userName": "liu"
    }
  },
  "aggs": {
    "group_by_meetingType": {
      "terms": {
        "field": "userName.raw"
      }
    }
  }
}

{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 3.133171,
    "hits": [
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "2",
        "_score": 3.133171,
        "_source": {
          "id": 2,
          "userName": "刘德华",
          "reason": "中华人民"
        }
      },
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "1",
        "_score": 3.133171,
        "_source": {
          "id": 1,
          "userName": "刘德华",
          "reason": "大帅哥"
        }
      }
    ]
  },
  "aggregations": {
    "group_by_meetingType": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "刘德华",
          "doc_count": 2
        }
      ]
    }
  }
}

大神们这些都是个人理解哪里有一样的想法或建议欢迎评论！！！！！！！

posted @ 2018-10-31 16:35 Be_Your_Sun 阅读(3029) 评论(1) 收藏举报

刷新页面返回顶部

远方的风啊

Elasticsearch拼音和ik分词器的结合应用

大神们这些都是个人理解哪里有一样的想法或建议欢迎评论！！！！！！！

公告