初试ElasticSearch做菜谱搜索 整理思路
1.docker部署elasticSearch集群及kibana服务
借鉴https://blog.csdn.net/ctwy291314/article/details/111313419这位博主的部署方式。
2.学习es的DSL语法
推荐【慕课】ElasticSearch+Spark 构建高匹配度搜索服务+千人千面推荐系统
3.logstash的logstash-input-jdbc插件对数据初始化全量索引构建
4.阿里canal中间件完成准实时增量索引构建

5.业务功能开发(中文IK分词器插件安装、定制化分词、同义词扩展、相关性重塑)
6.总结一些东西
GET cookbook/_search { "explain": true, "query": { "function_score": { "query": { "bool": { "must": [ { "multi_match": { "query": "国庆佳节", "fields": [ "name^10", #权重 "introduction", "description", "materials", "seasons", "categories", "platforms", "themes", "tags" ], "type": "most_fields" #还有别的类型 } #如果term放到这里也可以过滤但是会有计分 } ], "filter": [ #filter不参与计分 标签 状态 等建议放在这里 { "term": { "verified": { "value": "true" } } }, { "term": { "grounding": { "value": "true" } } }, { "term": { "tags": "家常菜" } }, { "term": { "tags": "夜宵" } } ] } }, "functions": [ #额外自定义计分 { "field_value_factor": { "field": "collect_count" }, "weight": 0.00002 #权重 }, { "field_value_factor": { "field": "view_count" }, "weight": 0.00002 #权重 } ], "score_mode": "sum", "boost_mode": "sum" #replace function score 可以替换 query score } }, "sort": [ #sort 非_source下的字段依然有分数 否则不计分 { "_score": { "order": "desc" } } ], "aggs": { "group_by_tags": { "terms": { "field": "tags" } } } app内的排序可以使用sort不计分排序 也可以用boost_mode = replace ,自定义分数来排序 #分析索引分词 GET cookbook/_analyze { "field": "tags", "text": ["创意菜 甜 西餐 甜品 电烤箱"] } #分析搜索分词 GET _analyze?pretty {"text": ["ROKI"],"analyzer": "ik_max_word"} GET _analyze?pretty {"text": ["创意菜 甜 西餐 甜品 电烤箱"],"analyzer": "ik_smart"}
2020-12-23 增 name字段支持中文拼音搜索
PUT /cookbook/
{
"settings": {
"number_of_shards": 10,
"number_of_replicas": 3
}
}
POST cookbook/_close
PUT cookbook/_settings
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"ik_pinyin_analyzer": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter": ["my_pinyin"]
}
},
"filter": {
"my_pinyin": {
"type": "pinyin",
"keep_separate_first_letter": false,
"keep_full_pinyin": true,
"keep_original": false,
"limit_first_letter_length": 10,
"lowercase": true,
"remove_duplicated_term": true
}
}
}
}
}
}
#字段映射
PUT cookbook/_mappings
{
"dynamic": false,
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "text",
"analyzer": "ik_pinyin_analyzer",
"search_analyzer": "ik_pinyin_analyzer"
},
"introduction": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word"
},
"description": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word"
},
"collect_count": {
"type": "integer"
},
"view_count": {
"type": "integer"
},
"difficulty": {
"type": "integer"
},
"need_time": {
"type": "integer"
},
"prepare_desc": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word"
},
"type": {
"type": "integer"
},
"cookbook_type": {
"type": "integer"
},
"recommend": {
"type": "boolean"
},
"verified": {
"type": "boolean"
},
"grounding": {
"type": "boolean"
},
"allow_distribution": {
"type": "boolean"
},
"tags": {
"type": "text",
"analyzer": "whitespace",
"fielddata": true
},
"materials": {
"type": "text",
"analyzer": "ik_smart",
"search_analyzer": "ik_smart"
},
"seasons": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word"
},
"category_tags": {
"type": "text",
"analyzer": "whitespace",
"fielddata": true
},
"categories": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word"
},
"platforms": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word"
},
"themes": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word"
},
"pub_time": {
"type": "date"
}
}
}
#打开索引
POST cookbook/_open
2020-12-24 记elasticsearch-rest-client 下的httpclient jar包冲突。查看7.9.3下引用的是

直接覆盖同版本的引用

2020-12-28
Ik分词 同时支持 拼音和同义词,可以根据自定义filter 多层嵌套
# 先定义同义词分词器
PUT cookbook/_settings
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"ik_synonym_pinyin_max_word": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter": [
"my_synonym_filter",
"my_pinyin_filter"
]
},
"ik_synonym_pinyin_smart": {
"type": "custom",
"tokenizer": "ik_smart",
"filter": [
"my_synonym_filter",
"my_pinyin_filter"
]
}
},
"filter": {
"my_synonym_filter": {
"type": "synonym",
"synonyms_path": "analysis-ik/synonyms.txt"
},
"my_pinyin_filter": {
"type": "pinyin",
"keep_separate_first_letter": false,
"keep_full_pinyin": true,
"keep_original": false,
"limit_first_letter_length": 10,
"lowercase": true,
"remove_duplicated_term": true
}
}
}
}
}
}

浙公网安备 33010602011771号