ES使用C# NEST创建索引支持nGram+MatchPhrase查询

ES写法: 

POST index_test
{
  "settings": {
    "index.max_ngram_diff":5,
    "analysis": {
      "analyzer": {
        "ngram_analyzer_short": {
          "filter": "lowercase",
          "tokenizer": "ngram_tokenizer_short"
        },
        "ngram_analyzer_long": {
          "filter": "lowercase",
          "tokenizer": "ngram_tokenizer_long"
        }
      },
      "tokenizer": {
        "ngram_tokenizer_short": {
          "type": "nGram",
          "min_gram": "1",
          "max_gram": "4"
        },
        "ngram_tokenizer_long": {
          "type": "nGram",
          "min_gram": "5",
          "max_gram": "5"
        }
      }
    }
  },
  "mappings": {
    "properties": {
        "vcContent": {
          "type": "keyword",
          "fields": {
            "long_text": {
              "type": "text",
              "analyzer": "ngram_analyzer_long"
            },
            "short_text": {
              "type": "text",
              "analyzer": "ngram_analyzer_short"
            }
          }
        }
      }
  }
}

C#写法:

1、创建索引

public static async Task CreateIndexAsync<T>(this ElasticClient elasticClient, string indexName = "", int numberOfShards = 5, int numberOfReplicas = 1, int refreshInterval = 5) where T : class
        {
            if (string.IsNullOrWhiteSpace(indexName)) throw new ArgumentException("索引名称不可为空");

            if (!(await elasticClient.Indices.ExistsAsync(indexName)).Exists)
            {
                var dict = new Dictionary<string, object>();
                dict.Add("index.number_of_shards", numberOfShards);
                dict.Add("index.number_of_replicas", numberOfReplicas);
                dict.Add("index.refresh_interval", refreshInterval + "s");
                dict.Add("index.max_result_window", 2000000000);
                dict.Add("index.max_ngram_diff", 5);
                var indsettings = new IndexSettings(dict);
                indsettings.Analysis = new Analysis();
                indsettings.Analysis.Analyzers = new Analyzers();
                indsettings.Analysis.Tokenizers = new Tokenizers();
                //短内容分析设置5个字符以内
                var an1 = new CustomAnalyzer();
                an1.Tokenizer = "ngram_tokenizer_short";
                an1.Filter = new List<string>() { "lowercase" };
                indsettings.Analysis.Analyzers.Add("ngram_analyzer_short", an1);
                indsettings.Analysis.Tokenizers.Add("ngram_tokenizer_short", new Nest.NGramTokenizer { MaxGram = 4, MinGram = 1 });
                //长内容分析设置5个字符以上
                var an2 = new CustomAnalyzer();
                an2.Tokenizer = "ngram_tokenizer_long";
                an2.Filter = new List<string>() { "lowercase" };
                indsettings.Analysis.Analyzers.Add("ngram_analyzer_long", an2);
                indsettings.Analysis.Tokenizers.Add("ngram_tokenizer_long", new Nest.NGramTokenizer { MaxGram = 5, MinGram = 5 });
                var indexState = new IndexState { Settings = indsettings };
                var response = await elasticClient.Indices.CreateAsync(indexName, p => p.InitializeUsing(indexState)
                .Map<T>(x => x.AutoMap<T>()
                //属性配置ngram搜索
                .Properties<T>(pp => pp.Keyword(t => t.Name("name").Fields(f =>
                         f.Text(t1 => t1.Name("long_text").Analyzer("ngram_analyzer_long"))
                         .Text(t2 => t2.Name("short_text").Analyzer("ngram_analyzer_short"))
                       )))));
                if (!response.IsValid)
                {
                    throw new Exception($"创建索引失败:{response.OriginalException.Message}");
                }
            }

 

2、搜索条件按字符长度指定搜索方式

//查询数据
            var mustFilters = new List<Func<QueryContainerDescriptor<TempList>, QueryContainer>>();
            if (!string.IsNullOrEmpty(name))
            {
                //条件查询
                if (name.Length >= 5)
                {
                    //长字符
                    mustFilters.Add(t => t.MatchPhrase(t => t.Field("name.long_text").Query(name)));
                }
                else
                {
                    //短字符
                    mustFilters.Add(t => t.Term(t => t.Field("name.short_text").Value(name)));
                }
            }//列表
            var idxName = "index_test";
            var result = await _esClientService.Client.SearchAsync<TempList>(q =>
             q.Index(idxName)
            .Query(rq => rq.Bool(b => b.Must(mustFilters)))
            .From(0).Size(10000));
            return (List<TempList>)result.Documents;

 

3、实体映射

public class TempList
    {public int id { get; set; }
public string name { get; set; }
public string description { get; set; }
public int type { get; set; }
    }

 

参考:https://www.elastic.co/guide/en/elasticsearch/client/net-api/7.x/multi-fields.html

NEST Nuget库版本:7.x.x

 

posted @ 2022-04-13 21:26  吉姆杨  阅读(750)  评论(0编辑  收藏  举报