LogStash 同步 MySQL 数据到 Elasticsearch

版本

LogStash 版本:7.6.2
Elasticsearch版本:7.6.2
kibana版本:7.6.2
操作系统:Windows 10

配置

本地默认配置启动,不需要关心其他配置,只是修改了jvm.options配置文件,把内存调小了一点

在logstash目录下建了一个connector的文件夹存MySQL的连接jar包,记得把jar包放进去

创建MySQL的脚本,在logstash目录下建了一个script的文件夹存放脚本

一次性同步数据脚本

input {

  jdbc {

    # MySQL jar包的位置

    jdbc_driver_library => "D:/Serves/logstash-7.6.2/connector/mysql-connector-java-8.0.19.jar"

    # MySQL 驱动类,我用的是MySQL8

    jdbc_driver_class => "com.mysql.cj.jdbc.Driver"

    # MySQL 的jdbc地址

    jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/solo?useUnicode=true&characterEncoding=utf-8&serverTimezone=GMT%2B8"

    # MySQL 用户名,该用户要开启远程访问权限

    jdbc_user => "root"

    # MySQL 密码

    jdbc_password => "Root.123"

    # 要执行的sql

    statement => "SELECT * from TEST"

  }

}


output {

        elasticsearch {

        #elasticsearch集群地址,不用列出所有节点,默认端口号也可省略

        hosts => [ "localhost:9200" ]

        #索引值,查询的时候会用到;需要先在elasticsearch中创建对应的mapping,也可以采用默认的mapping

        index => "role"

        # 类型

        document_type => "_doc"

        #指定插入elasticsearch文档ID,对应input中sql字段id

        document_id => "%{id}"

    }

    stdout {

     # JSON格式输出,配置该项,在命令行界面打印sql执行的日志

        codec => json_lines

    }

}

Kibana查看索引

GET _cat/indices

yellow open megacorp                 b75hLxe3RBauFMWKnltqbg 1 1  3  1 13.2kb 13.2kb
green  open .kibana_task_manager_1   zjA3t6CWRTWE0VOPQeYHYw 1 0  2  2 56.7kb 56.7kb
green  open .apm-agent-configuration 6RmJvSAPRE6xuLX1GUERKg 1 0  0  0   283b   283b
green  open .kibana_1                mBaMHmjzSBSnwlEMTMm57w 1 0 32 13 64.9kb 64.9kb

同步数据

启动Logstash,把MySQL的数据同步到Elasticsearch

D:\Serves\logstash-7.6.2
λ bin\logstash -f .\script\mysql-test.conf
#打印日志
Java HotSpot(TM) 64-Bit Server VM warning: Option UseConcMarkSweepGC was deprecated in version 9.0 and will likely be removed in a future release.
......
[2020-04-30T09:36:23,488][INFO ][logstash.inputs.jdbc     ][main] (0.049250s) SELECT * from TEST
{"name":"lisi","@timestamp":"2020-04-30T01:36:23.566Z","pwd":"123456","id":"2","@version":"1"}
{"name":"wangwu","@timestamp":"2020-04-30T01:36:23.567Z","pwd":"123456","id":"3","@version":"1"}
{"name":"zhangsan","@timestamp":"2020-04-30T01:36:23.556Z","pwd":"123456","id":"1","@version":"1"}
[2020-04-30T09:36:25,230][INFO ][logstash.runner          ] Logstash shut down.

Kibana查看同步结果

GET _cat/indices

#发现多了一个索引 role
yellow open megacorp                 b75hLxe3RBauFMWKnltqbg 1 1  3  1 13.2kb 13.2kb
yellow open role                     olBw8F1LTWmrBBmKAYbmmg 1 1  3  0 15.8kb 15.8kb
green  open .kibana_task_manager_1   zjA3t6CWRTWE0VOPQeYHYw 1 0  2  0 31.6kb 31.6kb
green  open .apm-agent-configuration 6RmJvSAPRE6xuLX1GUERKg 1 0  0  0   283b   283b
green  open .kibana_1                mBaMHmjzSBSnwlEMTMm57w 1 0 32 15 42.9kb 42.9kb

#查看索引数据
GET role/_search
{
  "query": {
    "match_all": {}
  }
}

#结果
{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 3,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "role",
        "_type" : "_doc",
        "_id" : "3",
        "_score" : 1.0,
        "_source" : {
          "name" : "wangwu",
          "@timestamp" : "2020-04-30T01:36:23.567Z",
          "pwd" : "123456",
          "id" : "3",
          "@version" : "1"
        }
      },
      {
        "_index" : "role",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 1.0,
        "_source" : {
          "name" : "zhangsan",
          "@timestamp" : "2020-04-30T01:36:23.556Z",
          "pwd" : "123456",
          "id" : "1",
          "@version" : "1"
        }
      },
      {
        "_index" : "role",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 1.0,
        "_source" : {
          "name" : "lisi",
          "@timestamp" : "2020-04-30T01:36:23.566Z",
          "pwd" : "123456",
          "id" : "2",
          "@version" : "1"
        }
      }
    ]
  }
}

附件-增量循环脚本

如果出现存储sql_last_value的txt文件不存储每次的id的情况,请检查数据库的id字段是不是number类型,目前追踪字段的类型,目前只有数字(numeric)和时间类型(timestamp),默认是数字类型。

input {

  jdbc {

    # MySQL jar包的位置

    jdbc_driver_library => "D:/Serves/logstash-7.6.2/connector/mysql-connector-java-8.0.19.jar"

    # MySQL 驱动类,我用的是MySQL8

    jdbc_driver_class => "com.mysql.cj.jdbc.Driver"

    # MySQL 的jdbc地址

    jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/solo?useUnicode=true&characterEncoding=utf-8&serverTimezone=GMT%2B8"

    # MySQL 用户名,该用户要开启远程访问权限

    jdbc_user => "root"

    # MySQL 密码

    jdbc_password => "Root.123"

    #连接池引发PoolTimeoutError之前等待获取连接的秒数

    jdbc_pool_timeout => 5

    #分页-开启

    jdbc_paging_enabled => true

    #分页大小

    jdbc_page_size => 10000

    #处理中文乱码问题

    codec => plain { charset => "UTF-8"}

    #使用其它字段追踪,而不是用时间

    use_column_value => true

    #追踪的字段

    tracking_column => "id"

    #追踪字段的类型,目前只有数字(numeric)和时间类型(timestamp),默认是数字类型

    tracking_column_type => "numeric"

    #记录最后一次运行的结果,用于增量更新

    record_last_run => true

    #上一个sql_last_value值的存放文件路径, 必须要在文件中指定字段的初始值

    last_run_metadata_path => "D:/Serves/logstash-7.6.2/script/last_value/increase.txt"

    #强制使用标识符字段的小写

    lowercase_column_names => true

    #定时字段,各字段含义(由左至右)分、时、天、月、年,一分钟更新一次,类似Linux的crontab

    schedule => "* * * * *"

    # 要执行的sql

    statement => "SELECT * from TEST where id > :sql_last_value"

    # 每次运行是否清除

    clean_run => false

  }

}


filter {

  json {

  source => "message"

  remove_field => ["message"]

  }

  #处理时间少8个小时问题,抄袭的

  ruby { 

    code => "event.set('timestamp', event.get('@timestamp').time.localtime + 8*60*60)" 

  }

  ruby {

    code => "event.set('@timestamp',event.get('timestamp'))"

  }

  mutate {

    remove_field => ["timestamp"]

  }

}


output {

        elasticsearch {

        #elasticsearch集群地址,不用列出所有节点,默认端口号也可省略

        hosts => [ "localhost:9200" ]

        #索引值,查询的时候会用到;需要先在elasticsearch中创建对应的mapping,也可以采用默认的mapping

        index => "role"

        # 类型

        document_type => "_doc"

        #指定插入elasticsearch文档ID,对应input中sql字段id

        document_id => "%{id}"

    }

    stdout {

     # JSON格式输出,配置该项,在命令行界面打印sql执行的日志

        codec => json_lines

    }

}
posted @ 2023-05-11 12:01  i小鱼儿  阅读(58)  评论(0)    收藏  举报