Path Hierarchy Tokenizer对文件系统进行数据建模以及文件搜索

   模拟:文件系统数据构造

PUT /filesystem
{
  "settings": {
    "analysis": {
      "analyzer": {
        "paths":{
          "tokenizer":"path_hierarchy"
        }
      }
    }
  }
}

  测试

POST /filesystem/_analyze
{
  "tokenizer": "path_hierarchy",
  "text":"/home/elasticsearch/image"
}

-------结果-----
{
  "tokens" : [
    {
      "token" : "/home",
      "start_offset" : 0,
      "end_offset" : 5,
      "type" : "word",
      "position" : 0
    },
    {
      "token" : "/home/elasticsearch",
      "start_offset" : 0,
      "end_offset" : 19,
      "type" : "word",
      "position" : 0
    },
    {
      "token" : "/home/elasticsearch/image",
      "start_offset" : 0,
      "end_offset" : 25,
      "type" : "word",
      "position" : 0
    }
  ]
}

  path_hierarchy tokenizer: 会把/a/b/c/d路径通过path_hierarchy 分词为 /a/b/c/d, /a/b/c, /a/b, /a

  需求一: 查找一份,内容包括ES,在/workspace/projects/helloworld这个目录下的文件

PUT /filesystem/_mapping
{
  "properties": {
    "name":{
      "type":"keyword"
    },
    "path":{
      "type":"keyword",
      "fields": {
        "tree":{
          "type":"text",
          "analyzer":"paths"
        }
      }
    }
  }
}

  写入数据

PUT /filesystem/_doc/1
{
  "name":"readme.txt",
  "path":"/workspace/projects/helloworld",
  "contents":"简单介绍一点点ES"
}

  需求:查找一份,内容包括ES,在/workspace/projects/helloworld这个目录下的文件

GET /filesystem/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "contents": "ES"
          }
        }
      ],
      "filter": {
        "term": {
          "path": "/workspace/projects/helloworld"
        }
      }
    }
  }
}

  需求二: 搜索/workspace目录下,内容包含ES的所有的文件

       造数据

PUT /filesystem/_doc/1
{
  "name":"readme.txt",
  "path":"/workspace/projects/helloworld",
  "contents":"简单介绍一点点ES"
}

PUT /filesystem/_doc/2
{
  "name":"readme.txt",
  "path":"/workspace/projects/helloworld",
  "contents":"简单介绍一点点ES"
}


PUT /filesystem/_doc/3
{
  "name":"readme.txt",
  "path":"/workspace/projects/helloworld",
  "contents":"简单介绍一点点ES"
}

PUT /filesystem/_doc/4
{
  "name":"readme.txt",
  "path":"/workspace/projects/helloworld",
  "contents":"简单介绍一点点ES"
}

PUT /filesystem/_doc/5
{
  "name":"readme.txt",
  "path":"/workspace/projects/helloworld",
  "contents":"简单介绍一点点ES"
}

  查询

GET /filesystem/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "contents": "ES"
          }
        }
      ],
      "filter": {
        "term": {
          "path.tree": "/workspace"
        }
      }
    }
  }
}

 

posted on 2021-09-25 08:09  溪水静幽  阅读(71)  评论(0)    收藏  举报