elasticsearch相关概念及常用操作汇总
背景
我本来是想把我的写的es的平时总结dsl发出来的,但是我发现只搞那个意义大不.干脆多写点吧.
索引的结构化和非结构
我们经常用数据库,当然会经常用到索引.
然后从索引的维度去分析,系统分为结构化索引和非结构化索引.
结构化索引,就是可以预设索引,比如id,可以预见的. 维护方便,性能高.
非结构化索引,就是数据进来之前,我可能都不知道要进来什么. 然后数据进来后,我要进行分析. 比如,挨个扫描节点,看是否包含,比较墨迹. 或者将非结构化的数据的一部分信息提取出来,使其变得有结构,感觉很像抽象,有了结构,那就好搞很多.
elasticSearch简述
elasticSearch又称es,是一种分布式的搜索引擎工具,之前我们比较熟悉的应该是luence,然后再往前推进,就是solr;
三者的关系
luence是一个信息检索工具包,类似SDK,提供了全面的api,也可以做高级查询.但是作为一个搜索引擎来讲的话,并不太完整,因为缺乏了一些获取数据,解析,分词等功能.而且,用起来较为复杂.所以,我们可能更需要完整而且更简洁的搜索引用服务. 简而言之,很全面,很基础,用起来麻烦.
solr是基于luence,是对lucen的进一步封装,补全.提供了对应的api供我们调用,很容易就能实现检索服务,而且更为完整,可以根据配置文件解析数据等.但是因为是更高一层的封装,所以luence很多新特性不能及时透传,也就是luence能查,但是solr查不了.简而言之,solr是独立的,面向企业级的,搜索引擎.
es也是基于luence,并提供了更高层次的封装的搜索引擎.特点对索引进行分片,将一个大的索引拆分多个,分布到不同节点上,降低服务器压力,构成分布式搜索,从大大提升搜索效率.是一款近乎实时分析的搜索引擎,非常强力.而且采用了restful风格,更易于上手.简而言之,分片机制,分布式搜索,实时,很强力.
实际开发中的dsl语句
记得装了es之后,再装个kibana,然后再kibana上跑这些程序.
#处理下hyd_test的全文导入
DELETE hyd_test
GET /hyd_test/_doc/_search
#批量更新
POST /hyd_test/_doc/_bulk
{"update":{"_id":1}}
{"doc":{"reference_law":"《中华人民共和国民事诉讼法》第二百一十条 、第二百三十三条 、第二百零七条 、第一百七十条","trial_court":"最高人民法院","assistan t_judge":"齐召财","clerk":"齐召财","lawyer":"","law_firm":"","legal_basis":"","created_time":"2020-09-02 15:32:00","created_by":"admi n","updated_time":"2020-09-02 15:34:12","updated_by":"","is_delete":0},"doc_as_upsert":true}
#批量写入 POST test_demo/_doc/_bulk {"create":{"_id":1}} {"case_name":"你好你好 ,嗨美女","count":18} {"create":{"_id":2}} {"case_name":"好的好的,大美女","count":19} #查询结构 GET /test_demo/_mapping #查询分词_analyze POST /test_demo/_analyze { "analyzer": "ik_max_word", "text": "你好你好 ,嗨美女" } #match查询 GET /test_demo/_doc/_search { "query": { "match": { "case_name": { "query": "是个美女" } } } } #精确匹配term GET /test_demo/_doc/_search { "query": { "term": { "case_name": { "value": "是个美女" } } } } #and 查询 GET hyd_test/_search { "from": 0, "size": 10, "query": { "bool": { "must": [ { "match": { "trial_year": "2018" } }, { "match": { "case_cause_id": "100" } } ] } } } #统计 GET hyd_test/_search { "size": 0, "query": { "match": { "case_cause_name": "相邻通行纠纷" } }, "aggs": { "document_type_name":{ "terms": { "field": "document_type_name", "size": 10 } } } } #清空所有数据 POST authoritative_case/_delete_by_query { "query":{ "match_all":{ } } } #高亮. 注意, 高亮必须match也要带上同样的字段. GET hyd_test/_search { "query": { "match": { "case_cause_name": "相邻通行纠纷" } }, "highlight": { "fields": { "case_cause_name":{ } } } } #高亮,加颜色,并且不要求 number_of_fragments 分段. 默认为5 ,就是划分为5断. GET hyd_test/_search { "query": { "bool": { "must": [ { "query_string": { "query": "一审" } }, { "match": { "id": { "query": 1 } } } ] } }, "highlight": { "pre_tags": [ "<span style='color:red'>" ], "post_tags": [ "</span>" ], "fragment_size": 1000, "number_of_fragments": 0, "require_field_match": false, "fields": { "*": {} } } } #id为1的全文高亮 GET hyd_test/_search { "query": { "bool": { "must": [ { "query_string": { "query": "民事" } }, { "query_string": { "query": "理由" } }, { "match": { "id": "1" } } ] } }, "highlight": { "require_field_match": "false", "fields": { "*": {} } } } #查询嵌套对象: path里面是对象visible_user query最里面是实际字段visible_user.visible_user_id GET gzmx_clue/_search { "from": 0, "size": 10, "query": { "bool": { "must": [ { "nested": { "path": "visible_user", "query": { "terms": { "visible_user.visible_user_id": [ 185 ] } } } } ] } } } #复杂查询 先取交集,再取并集. should(must,must) . GET gzmx_clue/_search { "query": { "bool": { "should": [ { "bool": { "must": { "match": { "procuratorate_code": "2" } }, "must_not": { "match": { "source": "9" } } } }, { "bool": { "must": [ { "match": { "source": "9" } }, { "nested": { "path": "visible_user", "query": { "terms": { "visible_user.visible_user_id": [ 186 ] } } } }, { "match": { "procuratorate_code": "2" } } ] } } ] } } } #must和shoule交集使用会导致should不起效. 所以, 尽量把should放在must里面. { "from": 0, "size": 10, "query": { "bool": { "must": [ { "terms": { "clue_feature": [ "0" ], "boost": 1 } }, { "bool": { "should": [ { "bool": { "must": [ { "match": { "procuratorate_code": { "query": 2, "operator": "OR", "prefix_length": 0, "max_expansions": 50, "fuzzy_transpositions": true, "lenient": false, "zero_terms_query": "NONE", "auto_generate_synonyms_phrase_query": true, "boost": 1 } } } ], "must_not": [ { "match": { "source": { "query": 9, "operator": "OR", "prefix_length": 0, "max_expansions": 50, "fuzzy_transpositions": true, "lenient": false, "zero_terms_query": "NONE", "auto_generate_synonyms_phrase_query": true, "boost": 1 } } } ], "adjust_pure_negative": true, "boost": 1 } }, { "bool": { "must": [ { "nested": { "query": { "terms": { "visible_user.visible_user_id": [ 432 ], "boost": 1 } }, "path": "visible_user", "ignore_unmapped": false, "score_mode": "none", "boost": 1 } }, { "match": { "procuratorate_code": { "query": 2, "operator": "OR", "prefix_length": 0, "max_expansions": 50, "fuzzy_transpositions": true, "lenient": false, "zero_terms_query": "NONE", "auto_generate_synonyms_phrase_query": true, "boost": 1 } } }, { "match": { "source": { "query": 9, "operator": "OR", "prefix_length": 0, "max_expansions": 50, "fuzzy_transpositions": true, "lenient": false, "zero_terms_query": "NONE", "auto_generate_synonyms_phrase_query": true, "boost": 1 } } } ], "adjust_pure_negative": true, "boost": 1 } } ], "adjust_pure_negative": true, "boost": 1 } } ], "adjust_pure_negative": true, "boost": 1 } }, "sort": [ { "created_time": { "order": "desc" } }, { "id": { "order": "desc" } } ], "track_total_hits": 2147483647 } #查询所有数量 GET gzmx_clue/_search { "track_total_hits":true, "query":{ "match_all":{} } }

浙公网安备 33010602011771号