elasticsearch通过logstash数据迁移

问题描述

自建es,数据迁移到aws上es

问题解决

对每个索引进行同步,写入到aws上es

env

  • centos7.x es (slef building)
  • aws es

step1: check index && version

curl -s -u xxx:'yyy' https://xxxxx:9200  //view version
curl -s -u xxx:'yyy' https://xxxxx:9200/_cat/indices?h=index

step2: logstash

01、openjdk

#aws vpc 内准备一台虚机
wget https://download.java.net/java/GA/jdk11/9/GPL/openjdk-11.0.2_linux-x64_bin.tar.gz

export JAVA_HOME=/tmp/reindex/jdk-11.0.2
export CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
export PATH=$JAVA_HOME/bin:$PATH

https://artifacts.elastic.co/downloads/logstash/logstash-7.4.2.tar.gz   #迁移对logstash的版本没有太大的要求一般与主版本相同即可 7.x = 7.x

02、logstash conf
1.tmpl

input {
    elasticsearch {
    hosts => ["xxx:9200"]
    index => "${INDEX}"
    size =>5000
    scroll =>"50m"
    docinfo => true
  }
}

filter {
}

output {
  elasticsearch {
    hosts => ["xxxx-1.es.amazonaws.com:443"]
    ssl => true
    user => "xxx"
    password => "xxxx"
    pool_max => 5000
    pool_max_per_route =>500
    index => "%{[@metadata][_index]}_fix"   #新建的索引加上_fix或者跟原索引相同去掉_fix
    document_type => "%{[@metadata][_type]}"
    document_id => "%{[@metadata][_id]}"
    ilm_enabled => false
  }
}

03、根据索引执行迁移数据

export INDEX="ui_click" && envsubst < ./1.tmpl >1.conf
./bin/logstash -f 1.conf  -w 50 -b 5000 -u 120

step3: validify data

get /login/_count   #数据量

get /login/_search  #数据

get /login/_search  #范围查询
{
  "size": 10,
  "query":
     {
       "range": {
         "time": {      #field time in login
           "gte": "1592409600000",  #ms
           "lte": "1591200000000"
         }
       }
     }
}

get /login/_search?sort=time:desc&size=1    #字段排序降序time
posted @ 2020-07-20 21:40  mvpbang  阅读(1547)  评论(0编辑  收藏  举报