Elasticsearch拼音分词器使用指南

Elasticsearch拼音分词器使用指南

es的拼音分词器是在数据写入的时候将字段的数据转换为拼音 然后再进行分词。

如何使用

** 索引结构 **

{
  "settings": {
    "index": {
      "analysis": {
        "analyzer": {
          "my_pinyin_analyzer": {
            "tokenizer": "my_pinyin_tokenizer"
          }
        },
        "tokenizer": {
          "my_pinyin_tokenizer": {
            "lowercase": "true",
            "keep_original": "false",
            "keep_first_letter": "false",
            "keep_separate_first_letter": "false",
            "type": "pinyin",
            "limit_first_letter_length": "16",
            "keep_full_pinyin": "true"
          }
        }
      },
      "number_of_shards": "4"
    }
  },
  "mappings": {
    "_source": {
      "enabled": true
    },
    "properties": {
      "create_time": {
        "format": "yyyy-MM-dd HH:mm:ss Z||yyyy-MM-dd HH:mm:ss||yyyy-MM-dd HH:mm:ss.SSS Z||yyyy-MM-dd HH:mm:ss.SSS||yyyy-MM-dd HH:mm:ss,SSS||yyyy/MM/dd HH:mm:ss||yyyy-MM-dd HH:mm:ss,SSS Z||yyyy/MM/dd HH:mm:ss,SSS Z||epoch_millis||yyyy-MM-dd",
        "index": true,
        "type": "date"
      },
      "title_py": {
        "index": true,
        "type": "text",
        "analyzer": "my_pinyin_analyzer"
      },
      "title": {
        "index": true,
        "type": "text"
      }
    }
  },
  "aliases": {}
}

** 插入数据 **

POST /cn_taoym_pinyin_analyzer/_bulk?refresh
{"index":{}}
{"create_time": "2025-08-01 08:30:00", "title": "第一条数据", "title_py": "第一条数据"}
{"index":{}}
{"create_time": "2025-08-01 08:30:00", "title": "文档示例", "title_py": "文档示例"}
{"index":{}}
{"create_time": "2025-08-01 08:30:00", "title": "测试数据", "title_py": "测试数据"}
{"index":{}}
{"create_time": "2025-08-01 08:30:00", "title": " Elasticsearch教程", "title_py": "Elasticsearch教程"}
{"index":{}}
{"create_time": "2025-08-01 08:30:00", "title": "中文拼音分析器", "title_py": "中文拼音分析器"}

** 查询 **

GET cn_taoym_pinyin_analyzer/_search
{
  "query": {
    "match": {
      "title_py": "wendang"
    }
  }
}

# 或者 (因为分会被拼音分词为 fen)
GET cn_taoym_pinyin_analyzer/_search
{
  "query": {
    "match": {
      "title_py": "分"
    }
  }
}
# match 可以换为match_phrase

** 结果 **

{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 4,
    "successful" : 4,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 2,
    "max_score" : 2.345461,
    "hits" : [ {
      "_index" : "cn_taoym_pinyin_analyzer_2025-08",
      "_type" : "_doc",
      "_id" : "F7_up5gBCQbF-O0GoRHk",
      "_score" : 2.345461,
      "_source" : {
        "create_time" : "2025-08-01 08:30:00",
        "title_py" : "文档示例",
        "title" : "文档示例"
      }
    }, {
      "_index" : "cn_taoym_pinyin_analyzer_2025-08",
      "_type" : "_doc",
      "_id" : "Gr_up5gBCQbF-O0GoRHk",
      "_score" : 0.2876821,
      "_source" : {
        "create_time" : "2025-08-01 08:30:00",
        "title_py" : "中文拼音分析器",
        "title" : "中文拼音分析器"
      }
    } ]
  }
}

** 手动分词 **

GET /cn_taoym_pinyin_analyzer_2025-08/_analyze
{
  "field": "title_py",
  "text": "分"
}
posted @ 2025-08-14 18:56  实习小生  阅读(56)  评论(0)    收藏  举报