关于es查询dsl的filter与must,term与match的区别
2022-06-15 15:15 假面Wilson 阅读(3238) 评论(0) 收藏 举报【数据】
【1】创建es7 索引
put localhost:9200/pdi_cust , 注意 PUB_CUST_LABEL 字段分词了。 es7 不支持type ,所以 无需指定type。
{ "mappings" :{ "properties":{ "RCRD_ID":{ "type":"keyword" } , "BUSI_CODE":{ "type":"keyword" } , "STATE":{ "type":"keyword" } , "LOANS":{ "type":"nested" , "properties" :{ "LOAN_NUM":{ "type":"keyword" } , "PUB_CUST_LABEL":{ "type":"text" } , "DATA_SRC":{ "type":"keyword" } , "CUST_NUM": { "type":"keyword" } , "LOAN_BAL_SUM":{ "type":"double" } , "OVD_MONEY_SUM": { "type": "double" } } } } } }
【2】插入索引文档: post localhost:9200/pdi_cust/_doc/tr_rd_01
{ "RCRD_ID":"tr_rd_01" , "STATE":"PDOS" , "BUSI_CODE":"pdi" , "LOANS":[ {"CUST_NUM":"CUST_NUM_01", "DATA_SRC":"VALD", "LOAN_BAL_SUM":"1111", "LOAN_NUM":"LOAN_NUM_01", "OVD_MONEY_SUM":"1111", "PUB_CUST_LABEL":"张三 李四 王五"} ] }
【3】查询索引所有文档 :
post localhost:9200/pdi_cust/_search{}
// 查询es所有文档 { "took": 4, "timed_out": false, "_shards": { "total": 1, "successful": 1, "skipped": 0, "failed": 0 }, "hits": { "total": { "value": 3, "relation": "eq" }, "max_score": 1.0, "hits": [ { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_01", "_score": 1.0, "_source": { "RCRD_ID": "tr_rd_01", "STATE": "PDOS", "BUSI_CODE": "pdi", "LOANS": [ { "CUST_NUM": "CUST_NUM_01", "DATA_SRC": "VALD", "LOAN_BAL_SUM": "1111", "LOAN_NUM": "LOAN_NUM_01", "OVD_MONEY_SUM": "1111", "PUB_CUST_LABEL": "张三 李四 王五" }, { "CUST_NUM": "CUST_NUM_01", "DATA_SRC": "VALD", "LOAN_BAL_SUM": "2222", "LOAN_NUM": "LOAN_NUM_02", "OVD_MONEY_SUM": "2222", "PUB_CUST_LABEL": "张三2 李四2 王五2" } ] } }, { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_02", "_score": 1.0, "_source": { "RCRD_ID": "tr_rd_02", "STATE": "PDOS", "BUSI_CODE": "pdi", "LOANS": [ { "CUST_NUM": "CUST_NUM_02", "DATA_SRC": "VALD", "LOAN_BAL_SUM": "1113", "LOAN_NUM": "LOAN_NUM_3", "OVD_MONEY_SUM": "1113", "PUB_CUST_LABEL": "李四" }, { "CUST_NUM": "CUST_NUM_02", "DATA_SRC": "VALD", "LOAN_BAL_SUM": "1114", "LOAN_NUM": "LOAN_NUM_4", "OVD_MONEY_SUM": "1114", "PUB_CUST_LABEL": "张三" } ] } }, { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_03", "_score": 1.0, "_source": { "RCRD_ID": "tr_rd_03", "STATE": "PDOS", "BUSI_CODE": "pdi", "LOANS": [ { "CUST_NUM": "CUST_NUM_03", "DATA_SRC": "VALD", "LOAN_BAL_SUM": "1115", "LOAN_NUM": "LOAN_NUM_5", "OVD_MONEY_SUM": "1115", "PUB_CUST_LABEL": "李四 王五" }, { "CUST_NUM": "CUST_NUM_03", "DATA_SRC": "VALD", "LOAN_BAL_SUM": "1116", "LOAN_NUM": "LOAN_NUM_6", "OVD_MONEY_SUM": "1116", "PUB_CUST_LABEL": "张三" } ] } } ] } }
【4】根据嵌套类型查询 (filter 与 must 是属于同一个级别的查询方式,都可以作为 query->bool 的属性)
4.1、filter: 不计算评分, 查询效率高;有缓存; (推荐)
+ term: 精确匹配;
+ match: 模糊匹配, 倒排索引;
4.2、must: 要计算评分,查询效率低;无缓存;
+term: 精确匹配 , 要评分;
+match:模糊匹配, 要评分;
【列子】
1、使用 filter+term实现精确匹配不计分查询;
// dsl { "_source":["RCRD_ID", "STATE", "BUSI_CODE"] , "query":{ "bool":{ "filter":[ { "term":{ "STATE":"PDOS" } } , { "nested":{ "path":"LOANS" , "query":{ "bool":{ "filter":[ { "term": { "LOANS.LOAN_NUM": "LOAN_NUM_01" } } ] } } , "inner_hits": { } } } ] } } } // 查询结果 { "took": 8, "timed_out": false, "_shards": { "total": 1, "successful": 1, "skipped": 0, "failed": 0 }, "hits": { "total": { "value": 1, "relation": "eq" }, "max_score": 0.0, "hits": [ { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_01", "_score": 0.0, "_source": { "RCRD_ID": "tr_rd_01", "STATE": "PDOS", "BUSI_CODE": "pdi" }, "inner_hits": { "LOANS": { "hits": { "total": { "value": 1, "relation": "eq" }, "max_score": 0.0, "hits": [ { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_01", "_nested": { "field": "LOANS", "offset": 0 }, "_score": 0.0, "_source": { "CUST_NUM": "CUST_NUM_01", "DATA_SRC": "VALD", "LOAN_BAL_SUM": "1111", "LOAN_NUM": "LOAN_NUM_01", "OVD_MONEY_SUM": "1111", "PUB_CUST_LABEL": "张三 李四 王五" } } ] } } } } ] } }
score 为0则表示没有计算得分;
2、使用filter+match 使用不计算得分,但模糊匹配;
// dsl { "_source":["RCRD_ID", "STATE", "BUSI_CODE"] , "query":{ "bool":{ "filter":[ { "match":{ "STATE":"PDOS" } } , { "nested":{ "path":"LOANS" , "query":{ "bool":{ "filter":[ { "match": { "LOANS.PUB_CUST_LABEL": "张三 李四" } } ] } } , "inner_hits": { } } } ] } } } // 结果 { "took": 35, "timed_out": false, "_shards": { "total": 1, "successful": 1, "skipped": 0, "failed": 0 }, "hits": { "total": { "value": 3, "relation": "eq" }, "max_score": 0.0, "hits": [ { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_01", "_score": 0.0, "_source": { "RCRD_ID": "tr_rd_01", "STATE": "PDOS", "BUSI_CODE": "pdi" }, "inner_hits": { "LOANS": { "hits": { "total": { "value": 2, "relation": "eq" }, "max_score": 0.0, "hits": [ { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_01", "_nested": { "field": "LOANS", "offset": 0 }, "_score": 0.0, "_source": { "CUST_NUM": "CUST_NUM_01", "DATA_SRC": "VALD", "LOAN_BAL_SUM": "1111", "LOAN_NUM": "LOAN_NUM_01", "OVD_MONEY_SUM": "1111", "PUB_CUST_LABEL": "张三 李四 王五" } }, { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_01", "_nested": { "field": "LOANS", "offset": 1 }, "_score": 0.0, "_source": { "CUST_NUM": "CUST_NUM_01", "DATA_SRC": "VALD", "LOAN_BAL_SUM": "2222", "LOAN_NUM": "LOAN_NUM_02", "OVD_MONEY_SUM": "2222", "PUB_CUST_LABEL": "张三2 李四2 王五2" } } ] } } } }, { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_02", "_score": 0.0, "_source": { "RCRD_ID": "tr_rd_02", "STATE": "PDOS", "BUSI_CODE": "pdi" }, "inner_hits": { "LOANS": { "hits": { "total": { "value": 2, "relation": "eq" }, "max_score": 0.0, "hits": [ { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_02", "_nested": { "field": "LOANS", "offset": 0 }, "_score": 0.0, "_source": { "CUST_NUM": "CUST_NUM_02", "DATA_SRC": "VALD", "LOAN_BAL_SUM": "1113", "LOAN_NUM": "LOAN_NUM_3", "OVD_MONEY_SUM": "1113", "PUB_CUST_LABEL": "李四" } }, { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_02", "_nested": { "field": "LOANS", "offset": 1 }, "_score": 0.0, "_source": { "CUST_NUM": "CUST_NUM_02", "DATA_SRC": "VALD", "LOAN_BAL_SUM": "1114", "LOAN_NUM": "LOAN_NUM_4", "OVD_MONEY_SUM": "1114", "PUB_CUST_LABEL": "张三" } } ] } } } }, { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_03", "_score": 0.0, "_source": { "RCRD_ID": "tr_rd_03", "STATE": "PDOS", "BUSI_CODE": "pdi" }, "inner_hits": { "LOANS": { "hits": { "total": { "value": 2, "relation": "eq" }, "max_score": 0.0, "hits": [ { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_03", "_nested": { "field": "LOANS", "offset": 0 }, "_score": 0.0, "_source": { "CUST_NUM": "CUST_NUM_03", "DATA_SRC": "VALD", "LOAN_BAL_SUM": "1115", "LOAN_NUM": "LOAN_NUM_5", "OVD_MONEY_SUM": "1115", "PUB_CUST_LABEL": "李四 王五" } }, { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_03", "_nested": { "field": "LOANS", "offset": 1 }, "_score": 0.0, "_source": { "CUST_NUM": "CUST_NUM_03", "DATA_SRC": "VALD", "LOAN_BAL_SUM": "1116", "LOAN_NUM": "LOAN_NUM_6", "OVD_MONEY_SUM": "1116", "PUB_CUST_LABEL": "张三" } } ] } } } } ] } }
以上dsl,把 match 换位 term, 查询不到任何结果,因为 PUB_CUST_LABEL 是 text 类型,分词了,term是精确匹配;
3、使用 must+term, 实现计算得分并精确匹配(查询效率低于 filter+term),因为 must要计算匹配得分
// dsl { "_source":["RCRD_ID", "STATE", "BUSI_CODE"] , "query":{ "bool":{ "must":[ { "term":{ "STATE":"PDOS" } } , { "nested":{ "path":"LOANS" , "query":{ "bool":{ "must":[ { "term": { "LOANS.LOAN_NUM": "LOAN_NUM_01" } } ] } } , "inner_hits": { } } } ] } } } // 结果 { "took": 13, "timed_out": false, "_shards": { "total": 1, "successful": 1, "skipped": 0, "failed": 0 }, "hits": { "total": { "value": 1, "relation": "eq" }, "max_score": 1.6739764, "hits": [ { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_01", "_score": 1.6739764, "_source": { "RCRD_ID": "tr_rd_01", "STATE": "PDOS", "BUSI_CODE": "pdi" }, "inner_hits": { "LOANS": { "hits": { "total": { "value": 1, "relation": "eq" }, "max_score": 1.5404451, "hits": [ { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_01", "_nested": { "field": "LOANS", "offset": 0 }, "_score": 1.5404451, "_source": { "CUST_NUM": "CUST_NUM_01", "DATA_SRC": "VALD", "LOAN_BAL_SUM": "1111", "LOAN_NUM": "LOAN_NUM_01", "OVD_MONEY_SUM": "1111", "PUB_CUST_LABEL": "张三 李四 王五" } } ] } } } } ] } }
4、使用must+match实现计算得分且走倒排索引查询或模糊查询
// dsl { "_source":["RCRD_ID", "STATE", "BUSI_CODE"] , "query":{ "bool":{ "must":[ { "match":{ "STATE":"PDOS" } } , { "nested":{ "path":"LOANS" , "query":{ "bool":{ "must":[ { "match": { "LOANS.LOAN_NUM": "LOAN_NUM_01" } } ] } } , "inner_hits": { } } } ] } } } //结果 { "took": 10, "timed_out": false, "_shards": { "total": 1, "successful": 1, "skipped": 0, "failed": 0 }, "hits": { "total": { "value": 1, "relation": "eq" }, "max_score": 1.6739764, "hits": [ { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_01", "_score": 1.6739764, "_source": { "RCRD_ID": "tr_rd_01", "STATE": "PDOS", "BUSI_CODE": "pdi" }, "inner_hits": { "LOANS": { "hits": { "total": { "value": 1, "relation": "eq" }, "max_score": 1.5404451, "hits": [ { "_index": "pdi_cust", "_type": "_doc", "_id": "tr_rd_01", "_nested": { "field": "LOANS", "offset": 0 }, "_score": 1.5404451, "_source": { "CUST_NUM": "CUST_NUM_01", "DATA_SRC": "VALD", "LOAN_BAL_SUM": "1111", "LOAN_NUM": "LOAN_NUM_01", "OVD_MONEY_SUM": "1111", "PUB_CUST_LABEL": "张三 李四 王五" } } ] } } } } ] } }
【小结】
1、对于 keyword类型的字段而言, 用 term 和 match 都是可以查询的;但对于 text 类型的分词字段而言,只能用match 才能够查询到结果;
2、根据嵌套类型查询 (filter 与 must 是属于同一个级别的查询方式,都可以作为 query->bool 的属性,区别是filter无评分,Must有评分,所以filter效率高)
2.1、filter: 不计算评分, 查询效率高;有缓存; (推荐)
+ term: 精确匹配;
+ match: 模糊匹配, 倒排索引;
2.2、must: 要计算评分,查询效率低;无缓存;
+term: 精确匹配 , 要评分;
+match:模糊匹配, 要评分;
 
                    
                     
                    
                 
                    
                 
                
            
         浙公网安备 33010602011771号
浙公网安备 33010602011771号