Elasticsearch拼音分词器使用指南
Elasticsearch拼音分词器使用指南
es的拼音分词器是在数据写入的时候将字段的数据转换为拼音 然后再进行分词。
如何使用
** 索引结构 **
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"my_pinyin_analyzer": {
"tokenizer": "my_pinyin_tokenizer"
}
},
"tokenizer": {
"my_pinyin_tokenizer": {
"lowercase": "true",
"keep_original": "false",
"keep_first_letter": "false",
"keep_separate_first_letter": "false",
"type": "pinyin",
"limit_first_letter_length": "16",
"keep_full_pinyin": "true"
}
}
},
"number_of_shards": "4"
}
},
"mappings": {
"_source": {
"enabled": true
},
"properties": {
"create_time": {
"format": "yyyy-MM-dd HH:mm:ss Z||yyyy-MM-dd HH:mm:ss||yyyy-MM-dd HH:mm:ss.SSS Z||yyyy-MM-dd HH:mm:ss.SSS||yyyy-MM-dd HH:mm:ss,SSS||yyyy/MM/dd HH:mm:ss||yyyy-MM-dd HH:mm:ss,SSS Z||yyyy/MM/dd HH:mm:ss,SSS Z||epoch_millis||yyyy-MM-dd",
"index": true,
"type": "date"
},
"title_py": {
"index": true,
"type": "text",
"analyzer": "my_pinyin_analyzer"
},
"title": {
"index": true,
"type": "text"
}
}
},
"aliases": {}
}
** 插入数据 **
POST /cn_taoym_pinyin_analyzer/_bulk?refresh
{"index":{}}
{"create_time": "2025-08-01 08:30:00", "title": "第一条数据", "title_py": "第一条数据"}
{"index":{}}
{"create_time": "2025-08-01 08:30:00", "title": "文档示例", "title_py": "文档示例"}
{"index":{}}
{"create_time": "2025-08-01 08:30:00", "title": "测试数据", "title_py": "测试数据"}
{"index":{}}
{"create_time": "2025-08-01 08:30:00", "title": " Elasticsearch教程", "title_py": "Elasticsearch教程"}
{"index":{}}
{"create_time": "2025-08-01 08:30:00", "title": "中文拼音分析器", "title_py": "中文拼音分析器"}
** 查询 **
GET cn_taoym_pinyin_analyzer/_search
{
"query": {
"match": {
"title_py": "wendang"
}
}
}
# 或者 (因为分会被拼音分词为 fen)
GET cn_taoym_pinyin_analyzer/_search
{
"query": {
"match": {
"title_py": "分"
}
}
}
# match 可以换为match_phrase
** 结果 **
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 4,
"successful" : 4,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 2,
"max_score" : 2.345461,
"hits" : [ {
"_index" : "cn_taoym_pinyin_analyzer_2025-08",
"_type" : "_doc",
"_id" : "F7_up5gBCQbF-O0GoRHk",
"_score" : 2.345461,
"_source" : {
"create_time" : "2025-08-01 08:30:00",
"title_py" : "文档示例",
"title" : "文档示例"
}
}, {
"_index" : "cn_taoym_pinyin_analyzer_2025-08",
"_type" : "_doc",
"_id" : "Gr_up5gBCQbF-O0GoRHk",
"_score" : 0.2876821,
"_source" : {
"create_time" : "2025-08-01 08:30:00",
"title_py" : "中文拼音分析器",
"title" : "中文拼音分析器"
}
} ]
}
}
** 手动分词 **
GET /cn_taoym_pinyin_analyzer_2025-08/_analyze
{
"field": "title_py",
"text": "分"
}

浙公网安备 33010602011771号