elasticsearch-script-painless

官网地址

https://www.elastic.co/guide/en/elasticsearch/painless/7.2/painless-walkthrough.html

Painless学习攻略

为了举例说明Painless是如何工作的,让我们加载一些曲棍球统计数据到ElasticSearch的索引中。

PUT hockey/_bulk?refresh
{"index":{"_id":1}}
{"first":"johnny","last":"gaudreau","goals":[9,27,1],"assists":[17,46,0],"gp":[26,82,1],"born":"1993/08/13"}
{"index":{"_id":2}}
{"first":"sean","last":"monohan","goals":[7,54,26],"assists":[11,26,13],"gp":[26,82,82],"born":"1994/10/12"}
{"index":{"_id":3}}
{"first":"jiri","last":"hudler","goals":[5,34,36],"assists":[11,62,42],"gp":[24,80,79],"born":"1984/01/04"}
{"index":{"_id":4}}
{"first":"micheal","last":"frolik","goals":[4,6,15],"assists":[8,23,15],"gp":[26,82,82],"born":"1988/02/17"}
{"index":{"_id":5}}
{"first":"sam","last":"bennett","goals":[5,0,0],"assists":[8,1,0],"gp":[26,1,0],"born":"1996/06/20"}
{"index":{"_id":6}}
{"first":"dennis","last":"wideman","goals":[0,26,15],"assists":[11,30,24],"gp":[26,81,82],"born":"1983/03/20"}
{"index":{"_id":7}}
{"first":"david","last":"jones","goals":[7,19,5],"assists":[3,17,4],"gp":[26,45,34],"born":"1984/08/10"}
{"index":{"_id":8}}
{"first":"tj","last":"brodie","goals":[2,14,7],"assists":[8,42,30],"gp":[26,82,82],"born":"1990/06/07"}
{"index":{"_id":39}}
{"first":"mark","last":"giordano","goals":[6,30,15],"assists":[3,30,24],"gp":[26,60,63],"born":"1983/10/03"}
{"index":{"_id":10}}
{"first":"mikael","last":"backlund","goals":[3,15,13],"assists":[6,24,18],"gp":[26,82,82],"born":"1989/03/17"}
{"index":{"_id":11}}
{"first":"joe","last":"colborne","goals":[3,18,13],"assists":[6,20,24],"gp":[26,67,82],"born":"1990/01/30"}

获取文档值

文档值可以从一个名称为doc的Map中获取

举例说明,下面的脚本计算一个球员的总进球数,这个例子使用了int强类型和for循环。

## 自定义评分计算方式,数组求和,值返回到_score上
GET hockey/_search
{
  "query": {
    "function_score": {
      "script_score": {
        "script": {
          "lang": "painless",
          "source": """
            int total = 0;
            for(int i = 0; i < doc['goals'].length; ++i){
              total += doc['goals'][i];
            }
            return total;
          """
        }
      }
    }
  }
  , "size": 2
}

结果

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 12,
      "relation" : "eq"
    },
    "max_score" : 87.0,
    "hits" : [
      {
        "_index" : "hockey",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 87.0,
        "_source" : {
          "first" : "sean",
          "last" : "monohan",
          "goals" : [
            7,
            54,
            26
          ],
          "assists" : [
            11,
            26,
            13
          ],
          "gp" : [
            26,
            82,
            82
          ],
          "born" : "1994/10/12"
        }
      },
      {
        "_index" : "hockey",
        "_type" : "_doc",
        "_id" : "3",
        "_score" : 75.0,
        "_source" : {
          "first" : "jiri",
          "last" : "hudler",
          "goals" : [
            5,
            34,
            36
          ],
          "assists" : [
            11,
            62,
            42
          ],
          "gp" : [
            24,
            80,
            79
          ],
          "born" : "1984/01/04"
        }
      }
    ]
  }
}

或者,你可以使用脚本字段完成同样的事情而不是一个function score

## 返回添加一个字段total_goals,数组求和,返回数组
GET hockey/_search
{
  "_source": true, 
  "script_fields": {
    "total_goals": {
      "script": {
        "lang": "painless",
        "source": """
            int total = 0;
            for(int i = 0; i < doc['goals'].length; ++i){
              total += doc['goals'][i];
            }
            return total;
"""
      }
    }
  }
  , "size": 2
}

结果

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 12,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "hockey",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 1.0,
        "_source" : {
          "first" : "johnny",
          "last" : "gaudreau",
          "goals" : [
            9,
            27,
            1
          ],
          "assists" : [
            17,
            46,
            0
          ],
          "gp" : [
            26,
            82,
            1
          ],
          "born" : "1993/08/13"
        },
        "fields" : {
          "total_goals" : [
            37
          ]
        }
      },
      {
        "_index" : "hockey",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 1.0,
        "_source" : {
          "first" : "sean",
          "last" : "monohan",
          "goals" : [
            7,
            54,
            26
          ],
          "assists" : [
            11,
            26,
            13
          ],
          "gp" : [
            26,
            82,
            82
          ],
          "born" : "1994/10/12"
        },
        "fields" : {
          "total_goals" : [
            87
          ]
        }
      }
    ]
  }
}

下面这个例子是使用Painless脚本对球员姓和名组合的名字,名字可以通过doc['first'].valuedoc['last'].value 获取

注意

如果要拼接的字段不存在,将抛出异常:A document doesn't have a value for a field! Use doc[<field>].size()==0 to check if a document is missing a field!

如果使用doc['last'].value, 将抛异常:Fielddata is disabled on text fields by default. Set fielddata=true on [last] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead.

GET hockey/_search
{
  "size": 2, 
  "_source": true, 
  "script_fields": {
    "fullname": {
      "script": {
        "lang": "painless",
        "source": """
          if(doc['last.keyword'].size()==0 || doc['first.keyword'].size()==0){
            return "";
          } else {
            return doc['first.keyword'].value +' '+ doc['last.keyword'].value;
          }
        """
      }
    }
  }
}

结果

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 12,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "hockey",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 1.0,
        "_source" : {
          "first" : "johnny",
          "last" : "gaudreau",
          "goals" : [
            9,
            27,
            1
          ],
          "assists" : [
            17,
            46,
            0
          ],
          "gp" : [
            26,
            82,
            1
          ],
          "born" : "1993/08/13"
        },
        "fields" : {
          "fullname" : [
            "johnny gaudreau"
          ]
        }
      },
      {
        "_index" : "hockey",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 1.0,
        "_source" : {
          "first" : "sean",
          "last" : "monohan",
          "goals" : [
            7,
            54,
            26
          ],
          "assists" : [
            11,
            26,
            13
          ],
          "gp" : [
            26,
            82,
            82
          ],
          "born" : "1994/10/12"
        },
        "fields" : {
          "fullname" : [
            "sean monohan"
          ]
        }
      }
    ]
  }
}

更新字段

你可以非常容易的更新字段值,你可以使用ctx._source.<field-name>获取原始字段。

把球员_id=1的last name改为hockey

POST hockey/_update/1
{
  "script": {
    "lang": "painless",
    "source": "ctx._source.last = params.last",
    "params": {
      "last": "hockey"
    }
  }
}

结果

{
  "_index" : "hockey",
  "_type" : "_doc",
  "_id" : "1",
  "_version" : 2,
  "result" : "updated",
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "failed" : 0
  },
  "_seq_no" : 14,
  "_primary_term" : 1
}

新增字段

新增球员昵称字段nick

POST hockey/_update/1
{
  "script": {
    "lang": "painless",
    "source": """
      ctx._source.last = params.last;
      ctx._source.nick = params.nick
    """,
    "params": {
      "last": "gaudreau",
      "nick": "hockey"
    }
  }
}

结果

{
  "_index" : "hockey",
  "_type" : "_doc",
  "_id" : "1",
  "_version" : 3,
  "result" : "updated",
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "failed" : 0
  },
  "_seq_no" : 15,
  "_primary_term" : 1
}

处理Date字段

date字段被解释为ZonedDateTime类,所以它支持像getYear,getDayOfWeek或者getMillis。为了在脚本中使用它们,去掉get前缀并且继续小写其余的方法名称,比如下面的例子

## 从出生日期中获取年份
GET hockey/_search
{
  "script_fields": {
    "birth_year": {
      "script": {
        "source": "doc.born.value.year"
      }
    }
  }
}
## 从出生日期中获取月份
GET hockey/_search
{
  "_source": ["born"], 
  "script_fields": {
    "birth_year": {
      "script": {
        "source": "doc.born.value.monthValue"
      }
    }
  }
}
## 从出生日期中获取日
GET hockey/_search
{
  "_source": ["born"], 
  "script_fields": {
    "birth_day": {
      "script": {
        "source": "doc.born.value.dayOfMonth"
      }
    }
  }
}

正则表达式

Painless原生就支持拥有语法结构的正则表达式:

  • \pattern\

  • =~: 这个查找操作返回的是boolean值,如果文本的一个字序列匹配上了就返回true,否则返回false

  • ==~: 这个匹配操作返回的是boolean值,如果文本匹配上返回true,否则返回false

示例1:

POST hockey/_update_by_query
{
  "script": {
    "lang": "painless",
    "source": """
      if (ctx._source.last =~ /b/) {
        ctx._source.last += "matched";
      } else {
        ctx.op = "noop";
      }
    """
  }
}

示例2:

POST hockey/_update_by_query
{
  "script": {
    "lang": "painless",
    "source": """
      if (ctx._source.last ==~ /[^aeiou].*[aeiou]/) {
        ctx._source.last += "matched";
      } else {
        ctx.op = "noop";
      }
    """
  }
}

 

posted on 2021-03-15 22:38  0x153_小波  阅读(338)  评论(0编辑  收藏  举报