ELK深入
ELK深入
一、filbeat使用module收集普通格式nginx日志
1.filbeat模块module作用
#作用:
可以将特定的服务的普通日志转成json格式
(针对老大就是不让你用json写和有些不支持json格式的)
#收集:
- nginx
- mysql
- mongo
- redis
会收集一个其他的都会 只是路径不一样
2.配置
# 1.查看filbeat模块路径与配置文件
[root@db01 ~]# rpm -qc filebeat
[root@db01 /etc/filebeat/modules.d]# ll
-rw-r--r-- 1 root root 371 Jan 24 2019 apache2.yml.disabled
-rw-r--r-- 1 root root 175 Jan 24 2019 auditd.yml.disabled
-rw-r--r-- 1 root root 845 Jan 24 2019 elasticsearch.yml.disabled
-rw-r--r-- 1 root root 269 Jan 24 2019 haproxy.yml.disabled
-rw-r--r-- 1 root root 546 Jan 24 2019 icinga.yml.disabled
-rw-r--r-- 1 root root 371 Jan 24 2019 iis.yml.disabled
-rw-r--r-- 1 root root 396 Jan 24 2019 kafka.yml.disabled
-rw-r--r-- 1 root root 188 Jan 24 2019 kibana.yml.disabled
-rw-r--r-- 1 root root 361 Jan 24 2019 logstash.yml.disabled
-rw-r--r-- 1 root root 189 Jan 24 2019 mongodb.yml.disabled
-rw-r--r-- 1 root root 368 Jan 24 2019 mysql.yml.disabled
-rw-r--r-- 1 root root 369 Jan 24 2019 nginx.yml.disabled
-rw-r--r-- 1 root root 388 Jan 24 2019 osquery.yml.disabled
-rw-r--r-- 1 root root 192 Jan 24 2019 postgresql.yml.disabled
-rw-r--r-- 1 root root 463 Jan 24 2019 redis.yml.disabled
-rw-r--r-- 1 root root 190 Jan 24 2019 suricata.yml.disabled
-rw-r--r-- 1 root root 574 Jan 24 2019 system.yml.disabled
-rw-r--r-- 1 root root 195 Jan 24 2019 traefik.yml.disabled
# 2.配置模块 (如果之前用了精简模板,要把这个加上 没用精简模板打开注释)
[root@db01 ~]# vim /etc/filebeat/filebeat.yml
filebeat.config.modules:
path: ${path.config}/modules.d/*.yml
reload.enabled: true
reload.period: 10s
# 3.查看并激活模块 (这个激活相当于/etc/filebeat/modules.d/下文件的改名,就是把后缀disabled去掉了)
[root@db01 ~]# filebeat modules list
Enabled:
Disabled:
apache2
auditd
elasticsearch
...
[root@db01 ~]# filebeat modules enable nginx
Enabled nginx
[root@db01 ~]# filebeat modules list
Enabled:
nginx
Disabled:
apache2
auditd
elasticsearch
...
# 4.配置filebeat的nginx模块
[root@db01 ~]# cat /etc/filebeat/modules.d/nginx.yml
- module: nginx
access:
enabled: true
var.paths: ["/var/log/nginx/bbs.log"]
error:
enabled: true
var.paths: ["/var/log/nginx/error.log"]
# 5.配置filebeat根据日志类型做判断 (用了模块就不需要input了)
[root@db01 ~]# cat /etc/filebeat/filebeat.yml
filebeat.config.modules:
path: ${path.config}/modules.d/*.yml
reload.enabled: true
reload.period: 10s
output.elasticsearch:
hosts: ["10.0.0.51:9200"]
indices:
- index: "nginx_bbs_access-%{[beat.version]}-%{+yyyy.MM}"
when.contains:
fileset.name: "access"
- index: "nginx_error-%{[beat.version]}-%{+yyyy.MM}"
when.contains:
fileset.name: "error"
setup.template.name: "nginx"
setup.template.pattern: "nginx_*"
setup.template.enabled: false
setup.template.overwrite: true
# 6.配置nginx日志为正常日志
[root@db01 /etc/nginx/conf.d]# cat bbs.conf
server {
listen 80;
server_name bbs.oldmeng.com;
access_log /var/log/nginx/bbs.log main;
location / {
root /usr/share/nginx/html;
index index.html index.htm;
}
}
[root@db01 ~]# nginx -t
[root@db01 ~]# systemctl restart nginx
[root@db01 ~]# > /var/log/nginx/bbs.log
[root@db01 ~]# tail -1 /var/log/nginx/bbs.log (正常格式)
10.0.0.1 - - [31/Jan/2020:13:26:24 +0800] "GET /oldmeng HTTP/1.1" 404 555 "-" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36" "-"
# 7.安装es插件 (如果多节点 就要每个节点都装插件)不安装会报错如下:
[root@db01 /data/soft]# ll
-rw-r--r-- 1 root root 33255554 Jan 9 18:29 ingest-geoip-6.6.0.zip
-rw-r--r-- 1 root root 62173 Jan 9 18:29 ingest-user-agent-6.6.0.zip
[root@db01 /data/soft]# cd /usr/share/elasticsearch/
[root@db01 /usr/share/elasticsearch]# ./bin/elasticsearch-plugin install file:///data/soft/ingest-geoip-6.6.0.zip
[root@db01 /usr/share/elasticsearch]# ./bin/elasticsearch-plugin install file:///data/soft/ingest-user-agent-6.6.0.zip
报错:
2019-09-11T09:04:40.562+0800 ERROR pipeline/output.go:100 Failed to connect to backoff(elasticsearch(http://10.0.0.51:9200)): Connection marked as failed because the onConnect callback failed: Error loading pipeline for fileset nginx/access: This module requires the following Elasticsearch plugins: ingest-user-agent, ingest-geoip. You can install them by running the following commands on all the Elasticsearch nodes:
sudo bin/elasticsearch-plugin install ingest-user-agent
sudo bin/elasticsearch-plugin install ingest-geoip
# 8.重启es
[root@db01 ~]# systemctl restart elasticsearch
删除以前的旧索引
# 9.重启filebeat
[root@db01 ~]# systemctl restart filebeat
# 10.kibana添加索引
注意:
error添加的时候选择 read_timestamp
# 11.查看日志是否被解析成了json格式
3.网页查看
二、filbeat使用module收集mysql慢日志和错误日志
1.mysql配置
# 1.查询慢日志时间及路径
mysql> show variables like 'long_query_time%';
+-----------------+-----------+
| Variable_name | Value |
+-----------------+-----------+
| long_query_time | 10.000000 |
+-----------------+-----------+
1 row in set (0.08 sec)
mysql> show variables like 'slow_query%';
+---------------------+----------------------------------------------+
| Variable_name | Value |
+---------------------+----------------------------------------------+
| slow_query_log | OFF |
| slow_query_log_file | /application/mysql-5.6.40/data/db01-slow.log |
+---------------------+----------------------------------------------+
2 rows in set (0.00 sec)
# 2.配置mysql错误日志和慢日志路径
[root@db01 ~]# vim /etc/my.cnf
binlog_format=row
log-bin=mysql-bin
server_id=1
skip_name_resolve
log-error=log-error=/application/mysql-5.6.40/data/db01-error.log
slow_query_log=ON
slow_query_log_file=/application/mysql-5.6.40/data/db01-slow.log
long_query_time=3
# 3.重启mysql并制造慢日志
[root@db01 ~]# systemctl restart mysqld
慢日志制造语句
mysql> select sleep(2) user,host from mysql.user ;
# 4.确认慢日志和错误日志确实有生成
[root@db01 ~]# ls /application/mysql-5.6.40/data/db01-slow.log
/application/mysql-5.6.40/data/db01-slow.log
# 5.激活filebeat的mysql模块
[root@db01 ~]# filebeat modules enable mysql
Enabled mysql
# 6.配置mysql的模块
[root@db01 ~]# vim /etc/filebeat/modules.d/mysql.yml
- module: mysql
error:
enabled: true
var.paths: ["/application/mysql-5.6.40/data/db01-error.log"](错误日志路径)
slowlog:
enabled: true
var.paths: ["/application/mysql-5.6.40/data/db01-slow.log"](慢日志路径)
2.filebeat配置
# 7.配置filebeat根据日志类型做判断
[root@db01 ~]# vim /etc/filebeat/filebeat.yml
filebeat.config.modules:
path: ${path.config}/modules.d/*.yml
reload.enabled: true
reload.period: 10s
output.elasticsearch:
hosts: ["10.0.0.51:9200"]
indices:
- index: "nginx_access-%{[beat.version]}-%{+yyyy.MM}"
when.contains:
fileset.module: "nginx"
fileset.name: "access"
- index: "nginx_error-%{[beat.version]}-%{+yyyy.MM}"
when.contains:
fileset.module: "nginx"
fileset.name: "error"
- index: "mysql_slowlog-%{[beat.version]}-%{+yyyy.MM}"
when.contains:
fileset.module: "mysql"
fileset.name: "slowlog"
- index: "mysql_error-%{[beat.version]}-%{+yyyy.MM}"
when.contains:
fileset.module: "mysql"
fileset.name: "error"
setup.template.name: "nginx"
setup.template.pattern: "nginx_*"
setup.template.enabled: false
setup.template.overwrite: true
# 8.重启filebeat
[root@db01 ~]# systemctl restart filebeat
3.网页查看
三、使用input的docker类型收集docker日志
1.docker环境安装
# 1.docker安装命令
[root@db01 ~]# rm -fr /etc/yum.repos.d/local.repo
[root@db01 ~]# curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo
[root@db01 ~]# wget -O /etc/yum.repos.d/docker-ce.repo https://mirrors.ustc.edu.cn/docker-ce/linux/centos/docker-ce.repo
[root@db01 ~]# sed -i 's#download.docker.com#mirrors.tuna.tsinghua.edu.cn/docker-ce#g' /etc/yum.repos.d/docker-ce.repo
[root@db01 ~]# yum install docker-ce -y
# 2.启动2个nginx容器
[root@db01 ~]# docker run -d -p 80:80 nginx
[root@db01 ~]# docker run -d -p 8080:80 nginx
2.docker容器日志路径
# 找到这个目录是关于log的
[root@db01 ~]# docker inspect 29bdfb77b0cf (容器id)
/var/lib/docker/containers/
# 发现每启动一个容器 就会以容器id生成一个目录
[root@db01 ~]# cd /var/lib/docker/containers/
[root@db01 /var/lib/docker/containers]# ll
total 0
drwx------ 4 root root 237 Jan 31 18:43 0193c5d3c94f7600a7edbbb74f75c44e6c754a1bdb1aad76e32b14f858b0036
drwx------ 4 root root 237 Jan 31 18:40 29bdfb77b0cfe61488bee04eb8a60821b449dc471e9d2c493ab5242c507076c
3.官网docker收集日志配置
4.filebeat配置
# 1.修改filebeat配置文件
[root@db01 ~]# vim /etc/filebeat/filebeat.yml
filebeat.inputs:
- type: docker
containers.ids:
- '*' (这里写的是容器的id 用*代替也可以)
output.elasticsearch:
hosts: ["10.0.0.51:9200"]
indices:
- index: "docker_access-%{[beat.version]}-%{+yyyy.MM}"
when.contains:
stream: "stdout"
- index: "docker_error-%{[beat.version]}-%{+yyyy.MM}"
when.contains:
stream: "stderr"
setup.template.name: "docker"
setup.template.pattern: "docker-*"
setup.template.enabled: false
setup.template.overwrite: true
# 2.重启filebeat
[root@db01 ~]# systemctl restart filebeat
# 3.访问nginx制造日志
http://10.0.0.51/meng
http://10.0.0.51:8080/sssssssssssssssssssssss
5.网页查看
四、收集docker日志可以早下班版
收集docker容器日志是所有容器的日志 不好区分 不知道每个容器跑的是什么业务
1.跑多个容器收集多个日志
容器1: nginx
容器2: mysql
2.理想中的情况
docker_nginx-6.6.0-2020.02
docker_mysql-6.6.0-2020.02
3.前提条件
存在可以唯一区分容器业务类型的key
4.容器编排
docker-compose
5.docker-compose配置
# 1.安装docker-compose
[root@db01 ~]# yum install -y python2-pip
[root@db01 ~]# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple pip -U
[root@db01 ~]# pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
[root@db01 ~]# pip install docker-compose
# 2.编写docker-compose文件
##这里我启两个nginx容器 80端口做ngonx 8080端口做mysql
##原理就是打标签 并上传到日志里
[root@db01 ~]# cat docker-compose.yml
version: '3'
services:
nginx:
image: nginx:latest
labels:
service: nginx
logging:
options:
labels: "service"
ports:
- "80:80"
db:
image: nginx:latest
labels:
service: db
logging:
options:
labels: "service"
ports:
- "8080:80"
# 3.删除以前的容器!谨慎操作!
[root@db01 ~]# docker rm -f $(docker ps -a -q)
# 4.使用docker-compose启动docker容器
[root@db01 ~]# docker-compose up -d
6.filebeat配置
# 5.配置filebeat配置文件
[root@db01 ~]# vim /etc/filebeat/filebeat.yml
filebeat.inputs:
- type: log
enabled: true
paths:
- /var/lib/docker/containers/*/*-json.log
json.keys_under_root: true
json.overwrite_keys: true
output.elasticsearch:
hosts: ["10.0.0.51:9200"]
indices:
- index: "docker_nginx_access-%{[beat.version]}-%{+yyyy.MM}"
when.contains:
attrs.service: "nginx"
stream: "stdout"
- index: "docker_nginx_error-%{[beat.version]}-%{+yyyy.MM}"
when.contains:
attrs.service: "nginx"
stream: "stderr"
- index: "docker_db_access-%{[beat.version]}-%{+yyyy.MM}"
when.contains:
attrs.service: "db"
stream: "stdout"
- index: "docker_db_error-%{[beat.version]}-%{+yyyy.MM}"
when.contains:
attrs.service: "db"
stream: "stderr"
setup.template.name: "docker"
setup.template.pattern: "docker_*"
setup.template.enabled: false
setup.template.overwrite: true
# 6.重启filebeat
[root@db01 ~]# systemctl restar filebeat
# 7.访问nginx制造日志
http://10.0.0.51/nginx
http://10.0.0.51:8080/dbdbdbdb
7.网页查看
五、收集docker日志涨薪版
虽然跑在不同容器的不同业务可以区分,但是所收集到的日志还是一整条,不好找
可以给每个业务挂在一个单独的目录
# 0.创建容器日志目录
[root@db01 ~]# mkdir /opt/{nginx,mysql}
# 1.将容器的日志目录挂载到宿主机
[root@db01 ~]# docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
370710cc514a nginx:latest "nginx -g 'daemon of…" 43 minutes ago Up 40 minutes 0.0.0.0:80->80/tcp root_nginx_1
418b1a41faea nginx:latest "nginx -g 'daemon of…" 43 minutes ago Up 40 minutes 0.0.0.0:8080->80/tcp root_db_1
[root@db01 ~]# docker cp /etc/nginx/nginx.conf 370710cc514a:/etc/nginx/nginx.conf
[root@db01 ~]# docker commit 370710cc514a nginx:v2
[root@db01 ~]# docker rm -f `docker ps -a -q`
[root@db01 ~]# docker run -d -p 80:80 -v /opt/nginx:/var/log/nginx nginx:v2
[root@db01 ~]# docker run -d -p 8080:80 -v /opt/mysql:/var/log/nginx nginx:v2
# 2.修改filebeat配置文件
filebeat.inputs:
- type: log
enabled: true
paths:
- /opt/nginx/access.log
json.keys_under_root: true
json.overwrite_keys: true
tags: ["nginx_access"]
- type: log
enabled: true
paths:
- /opt/nginx/error.log
tags: ["nginx_error"]
- type: log
enabled: true
paths:
- /opt/mysql/access.log
json.keys_under_root: true
json.overwrite_keys: true
tags: ["mysql_access"]
- type: log
enabled: true
paths:
- /opt/mysql/error.log
tags: ["mysql_error"]
output.elasticsearch:
hosts: ["10.0.0.51:9200"]
indices:
- index: "docker_nginx_access-%{[beat.version]}-%{+yyyy.MM}"
when.contains:
tags: "nginx_access"
- index: "docker_nginx_error-%{[beat.version]}-%{+yyyy.MM}"
when.contains:
tags: "nginx_error"
- index: "docker_db_access-%{[beat.version]}-%{+yyyy.MM}"
when.contains:
tags: "mysql_access"
- index: "docker_db_error-%{[beat.version]}-%{+yyyy.MM}"
when.contains:
tags: "mysql_error"
setup.template.name: "docker"
setup.template.pattern: "docker_*"
setup.template.enabled: false
setup.template.overwrite: true
# 3.重启filebeat
[root@db01 ~]# systemctl restart filebeat
# 4.访问nginx制造日志
http://10.0.0.51/nginxnginxnginx
http://10.0.0.51:8080/dbdbdbdbdbdbd
# 5.查看日志
##如果后面停掉了容器 重新启动新容器 日志会继续追加 所以相同的业务比如nginx 就放在同个挂载目录就好
[root@db01 ~]# ll /opt/nginx
total 8
-rw-r--r-- 1 root root 1876 Jan 31 20:49 access.log
-rw-r--r-- 1 root root 657 Jan 31 20:49 error.log
[root@db01 ~]# ll /opt/mysql
total 8
-rw-r--r-- 1 root root 1870 Jan 31 20:50 access.log
-rw-r--r-- 1 root root 660 Jan 31 20:50 error.log
六、filebeat使用redis作为缓存来缓解ES压力
1.加缓存方法一
原来架构是fiebeat直接将数据丢给es,不管es能不能处理的完,如果数据量大,日志一下十万多条,es处理不过来就会丢数据。这个时候在中间加上redis缓存去缓解es压力。
注意:
1.redis只能做单机版,不能做集群或者哨兵
2.redis和es一样只能被动接收数据,不能主动拉取
3.logstash一次拉取的数据是es能一次存好的数据,要多少拉多少,剩下的存在redis
2.加缓存方法二
用kafka数据库一样也可以做缓存,但是kafka需要和zookeeper配合,用这种可以做集群。
3.redis配置
# 1.安装配置redis
[root@db01 ~]# yum install redis -y
[root@db01 ~]# systemctl start redis
[root@db01 ~]# redis-cli set k1 v1
[root@db01 ~]# redis-cli get k1
# 2.配置nginx为json格式
[root@db01 ~]# vim /etc/nginx/nginx.conf
...
access_log /var/log/nginx/access.log json;
...
# 3.配置filebeat
[root@db01 ~]# vim /etc/filebeat/filebeat.yml
filebeat.inputs:
- type: log
enabled: true
paths:
- /var/log/nginx/bbs.log
json.keys_under_root: true
json.overwrite_keys: true
tags: ["access"]
- type: log
enabled: true
paths:
- /var/log/nginx/error.log
tags: ["error"]
output.redis:
hosts: ["10.0.0.51"]
keys:
- key: "nginx_access"
when.contains:
tags: "access"
- key: "nginx_error"
when.contains:
tags: "error"
setup.template.name: "nginx"
setup.template.pattern: "nginx_*"
setup.template.enabled: false
setup.template.overwrite: true
# 4.确保nginx日志为json格式
[root@db01 ~]# vim /etc/nginx/conf.d/bbs.conf
server {
listen 80;
server_name bbs.oldmeng.com;
access_log /var/log/nginx/bbs.log json;
location / {
root /usr/share/nginx/html;
index index.html index.htm;
}
}
[root@db01 ~]# nginx -t
[root@db01 ~]# systemctl restart nginx
[root@db01 ~]# >/var/log/nginx/bbs.log
# 5.启动filebeat并测试是否能存到redis里
[root@db01 ~]# ab -c 10 -n 100 http://10.0.0.51/mmm
[root@db01 ~]# systemctl restart filebeat
[root@db01 ~]# redis-cli
10.0.0.51:6379> keys *
1) "nginx_access"
2) "nginx_error"
3) "k2"
10.0.0.51:6379> TYPE nginx_access
list
10.0.0.51:6379> TYPE nginx_error
list
10.0.0.51:6379> LLEN nginx_access
(integer) 200
10.0.0.51:6379> LLEN nginx_error
(integer) 311
LRANGE nginx_access 1 2
【收集到的日志拿去在线json转换看看】
# 6.安装配置logstash (大笨慢)
[root@db01 ~]# ll /data/soft/
-rw-r--r-- 1 root root 170703770 Jan 9 18:30 logstash-6.6.0.rpm
[root@db01 /data/soft]# rpm -ivh logstash-6.6.0.rpm
# 7.编辑Logstash配置文件
[root@db01 /data/soft]# vim /etc/logstash/conf.d/redis.conf
input {
redis {
host => "10.0.0.51"
port => "6379"
db => "0"
key => "nginx_access"
data_type => "list"
}
redis {
host => "10.0.0.51"
port => "6379"
db => "0"
key => "nginx_error"
data_type => "list"
}
}
filter {
mutate {
convert => ["upstream_time", "float"] #nginx和php解析消耗的时间
convert => ["request_time", "float"] #nginx接收用户请求再返给用户的时间
}
}
output {
stdout {}
if "access" in [tags] {
elasticsearch {
hosts => "http://localhost:9200"
manage_template => false
index => "nginx_access-%{+yyyy.MM}"
}
}
if "error" in [tags] {
elasticsearch {
hosts => "http://localhost:9200"
manage_template => false
index => "nginx_error-%{+yyyy.MM}"
}
}
}
# 8.启动Logstash (这个地方要耐心的冲一杯咖啡 他可能才能启动成功)
[root@db01 ~]# /usr/share/logstash/bin/logstash -f /etc/logstash/conf.d/redis.conf
# 9.检查redis里是否被取走了
[root@db01 ~]# redis-cli
127.0.0.1:6379> keys *
1) "k2"
[root@db01 ~]# systemctl restart filebeat
# 10.打开网页访问 数据被取走
http://10.0.0.51
http://10.0.0.51/bbbbbbb
[root@db01 ~]# redis-cli
127.0.0.1:6379> keys *
4.网页查看
超级难启动的logstash的拉取日志过程
七、存入redis优化方案
工作效率提升100% 时间减少一倍
# 1.优化filebeat,将所有的日志存入一个key中
[root@db01 ~]# vim /etc/filebeat/filebeat.yml
filebeat.inputs:
- type: log
enabled: true
paths:
- /var/log/nginx/bbs.log
json.keys_under_root: true
json.overwrite_keys: true
tags: ["access"]
- type: log
enabled: true
paths:
- /var/log/nginx/error.log
tags: ["error"]
output.redis:
hosts: ["10.0.0.51"]
key: "all"
setup.template.name: "nginx"
setup.template.pattern: "nginx_*"
setup.template.enabled: false
setup.template.overwrite: true
# 2.logstash从一个key里读取,根据tag标签判断
input {
redis {
host => "10.0.0.51"
port => "6379"
db => "0"
key => "all"
data_type => "list"
}
}
filter {
mutate {
convert => ["upstream_time", "float"]
convert => ["request_time", "float"]
}
}
output {
stdout {}
if "access" in [tags] {
elasticsearch {
hosts => "http://localhost:9200"
manage_template => false
index => "nginx_access-%{+yyyy.MM}"
}
}
if "error" in [tags] {
elasticsearch {
hosts => "http://localhost:9200"
manage_template => false
index => "nginx_error-%{+yyyy.MM}"
}
}
}
# 3.测试数据
[root@db01 ~]# systemctl restart filebeat
[root@db01 ~]# redis-cli
127.0.0.1:6379> keys *
1) "all"
2) "k2"
127.0.0.1:6379> LLEN all
(integer) 2000
# 4.启动logstash
[root@db01 ~]# /usr/share/logstash/bin/logstash -f /etc/logstash/conf.d/redis.conf
八、filebeat终极架构-nginx+keepalived代理多台redis
1.发现问题
上面我用了单台 redis 作为收集日志的缓存层,但是单台 redis 存在一个问题,就是单点故障,虽然可以做持久化处理,但是加入服务器坏掉或者修复时间未知的情况下还是有可能会丢数据。 redis 集群方案有哨兵和集群,但可惜的是 filebeat 和 logstash 都不支持这两种方案。
redis做主从是支持的,但是主挂了,从库是不会自动切换的。
2.解决方案
1.使用 nginx+keepalived 反向代理负载均衡到后面的多台 redis
2.考虑到 redis 故障切换中数据一致性的问题,所以最好我们只使用 2 台 redis,并且只工作一台,另外一台作为
backup,只有第一台坏掉后,第二台才会工作。
3.filebeat 的 oputut 的 redis 地址为 keepalived 的虚拟 IP
4.logstash 可以启动多个节点来加速读取 redis 的数据
5.后端可以采用多台 es 集群来做支撑
拓扑图如下:
3.nginx 配置文件
注意:添加 stream 模块,要在 nginx.conf 里最后添加,而不是在 conf.d 里面添加子配置
[root@db01 ~]# cat /etc/nginx/nginx.conf
..........................
stream {
upstream redis {
server 10.0.0.51:6379 max_fails=2 fail_timeout=10s;
server 10.0.0.52:6379 max_fails=2 fail_timeout=10s backup;
}
server {
listen 6380; #访问6380端口代理到6379
proxy_connect_timeout 1s;
proxy_timeout 3s;
proxy_pass redis;
}
}
[root@db01 ~]# nginx -t
[root@db01 ~]# systemctl restart nginx
4.漂移测试
# 1.进6380的redis
[root@db01 ~]# redis-cli -h 10.0.0.51 -p 6380
10.0.0.51:6380>
# 2.实际上没有启动6380端口 只是启动了客户端
[root@db01 ~]# ps -ef|grep redis
root 2158 1 0 13:24 ? 00:00:05 redis-server 127.0.0.1:6379
root 5820 1997 0 14:28 pts/0 00:00:00 redis-cli -h 10.0.0.51 -p 6380
root 5873 5840 2 14:28 pts/1 00:00:00 grep --color=auto redis
# 3.测试到底有没有启动6380
[root@db01 ~]# redis-cli -h 10.0.0.51 -p 6379
10.0.0.51:6379> set key_6379 v_6379
OK
10.0.0.51:6379> get key_6379
"v_6379"
10.0.0.51:6379> keys *
1) "key_6379"
# 4.进6380客户端 实际上进的是6379 可以查到6379插入的数据 代理成功
[root@db01 ~]# redis-cli -h 10.0.0.51 -p 6380
10.0.0.51:6380> keys *
1) "key_6379"
# 5.db02服务器上的6379端口没有数据 因为他作为备份服务器 不存储数据
[root@db01 ~]# redis-cli -h 10.0.0.52 -p 6379
10.0.0.52:6379> keys *
(empty list or set)
# 6.模拟redis服务器宕机 杀死db01 redis6379
[root@db01 ~]# pkill redis
# 7.查看db01的6380是否正常插入数据
[root@db01 ~]# redis-cli -h 10.0.0.51 -p 6380
10.0.0.51:6380> set k2 msy
OK
10.0.0.51:6380> keys *
1) "k2"
# 8.db02的6379查看数据
[root@db02 ~]# redis-cli -h 10.0.0.52 -p 6379
10.0.0.52:6379> keys *
1) "k2"
10.0.0.52:6379> get k2
"msy"
# 9.恢复db01的6379 查看6380代理客户端数据
[root@db01 ~]# bash redis_shell.sh start 6379 (之前写的脚本)
[root@db01 ~]# redis-cli -h 10.0.0.51 -p 6380 (数据变回db01的6379数据)
10.0.0.51:6380> keys *
1) "key_6379"
5.filebeat 配置
[root@db01 ~]# vim /etc/filebeat/filebeat.yml
filebeat.inputs:
- type: log
enabled: true
paths:
- /var/log/nginx/bbs.log
json.keys_under_root: true
json.overwrite_keys: true
tags: ["access"]
- type: log
enabled: true
paths:
- /var/log/nginx/error.log
tags: ["error"]
output.redis:
hosts: ["10.0.0.51:6380"]
key: "all"
setup.template.name: "nginx"
setup.template.pattern: "nginx_*"
setup.template.enabled: false
setup.template.overwrite: true
6.logstash配置
[root@db01 ~]# vim /etc/logstash/conf.d/redis.conf
input {
redis {
host => "10.0.0.51"
port => "6380"
db => "0"
key => "all"
data_type => "list"
}
}
filter {
mutate {
convert => ["upstream_time", "float"] #nginx和php解析消耗的时间
convert => ["request_time", "float"] #nginx接收用户请求再返给用户的时间
}
}
output {
stdout {}
if "access" in [tags] {
elasticsearch {
hosts => "http://localhost:9200"
manage_template => false
index => "nginx_access-%{+yyyy.MM}"
}
}
if "error" in [tags] {
elasticsearch {
hosts => "http://localhost:9200"
manage_template => false
index => "nginx_error-%{+yyyy.MM}"
}
}
}
[root@db01 ~]# systemctl restart filebeat.service
7.整体测试
# 1.登录代理6380 把数据删掉
[root@db01 ~]# redis-cli -h 10.0.0.51 -p 6380
10.0.0.51:6380> keys *
(empty list or set)
# 2.插入测试数据 并查看两台redis各自有没有数据
[root@db01 ~]# ab -c 10 -n 1000 http://10.0.0.51/mmmmmmmm
[root@db01 ~]# bash redis_shell.sh login 6379
10.0.0.51:6379> keys *
1) "all"
10.0.0.51:6379> LLEN all
(integer) 2000
[root@db02 ~]# bash redis_shell.sh login 6379
10.0.0.52:6379> keys *
1) "k2"
# 3.启动logstash
[root@db01 ~]# /usr/share/logstash/bin/logstash -f /etc/logstash/conf.d/redis.conf
# 4.查看数据有没有被取走
[root@db01 ~]# redis-cli -h 10.0.0.51 -p 6379
10.0.0.51:6379> keys *
1) "all"
10.0.0.51:6379> LLEN all
(integer) 2000
10.0.0.51:6379> LLEN all
(integer) 1625
10.0.0.51:6379> LLEN all
(integer) 1250
10.0.0.51:6379> LLEN all
(integer) 0
# 5.杀掉db01的redis 查看代理6380数据 实际上他已经飘移到db02了
10.0.0.51:6379> shutdown
[root@db01 ~]# redis-cli -h 10.0.0.51 -p 6380
10.0.0.51:6380> keys *
(empty list or set)
[root@db02 ~]# bash redis_shell.sh login 6379
10.0.0.52:6379> set k1 v1
OK
[root@db01 ~]# redis-cli -h 10.0.0.51 -p 6380
10.0.0.51:6380> keys *
1) "k1"
# 6.再次插入数据测试
[root@db01 ~]# ab -c 10 -n 1000 http://10.0.0.51/ssssssss
# 7.登录代理端口 查看数据是否被取走
[root@db01 ~]# redis-cli -h 10.0.0.51 -p 6380
10.0.0.51:6380> keys *
1) "k1"
2) "all"
10.0.0.51:6380> LLEN all
(integer) 0
注意:当一台redis宕机时,会自动漂移到另一台redis,在数据都拉取完的情况下都是可以的,但是数据没有拉取完,主redis修复好了启动,又会自动漂移回去,没有拉取完的数据就会丢失。
注意:当redis漂移回去时,logstash需要重启一次才能重新拉取数据,这也是一个小小bug。
后台启动:
systemctl start logstash
前台启动:
/usr/share/logstash/bin/logstash -f /etc/logstash/conf.d/redis.conf
8.数据没拉取完就漂移的解决方法
# 1.登录主redis
[root@db01 ~]# bash redis_shell.sh start 6379
[root@db01 ~]# redis-cli -h 10.0.0.51 -p 6380
10.0.0.51:6380> keys *
(empty list or set)
# 2.插入数据 看6380代理端口是否有数据
[root@db01 ~]# ab -c 10 -n 1000 http://10.0.0.51/jjjj
[root@db01 ~]# redis-cli -h 10.0.0.51 -p 6380
10.0.0.51:6380> keys *
1) "all"
10.0.0.51:6380> LLEN all
(integer) 2000
# 3.再次插入数据 并重启logstash
[root@db01 ~]# ab -c 10 -n 10000 http://10.0.0.51/hhhhhhh
[root@db01 ~]# /usr/share/logstash/bin/logstash -f /etc/logstash/conf.d/redis.conf
# 4.查看数据被拉取过程 并模拟数据没拉取完宕机
[root@db01 ~]# redis-cli -h 10.0.0.51 -p 6380
10.0.0.51:6380> keys *
1) "all"
10.0.0.51:6380> LLEN all
(integer) 24048
10.0.0.51:6380> LLEN all
(integer) 22423
10.0.0.51:6380> LLEN all
(integer) 22173
10.0.0.51:6380> shutdown
# 5.再次插入数据 其实是插入到db02上了 那之前剩下的主redis上的20000条数据就没了
[root@db01 ~]# ab -c 10 -n 1000 http://10.0.0.51/hhhhhhh
[root@db01 ~]# redis-cli -h 10.0.0.51 -p 6380
10.0.0.51:6380> keys *
1) "all"
10.0.0.51:6380> LLEN all
(integer) 17923
10.0.0.51:6380> LLEN all
(integer) 0
# 6.redis数据会保存在rdb里 然后用迁移工具把db01没有拉取完的数据迁移到db02上
[root@db01 ~]# cd /data/redis_6379/
[root@db01 /data/redis_6379]# ll
-rw-r--r-- 1 root root 18884450 Feb 2 15:31 redis_6379.aof
-rw-r--r-- 1 root root 1558268 Feb 2 15:29 redis_6379.rdb
# 7.配置迁移工具文件
[root@db01 /data/redis_6379]# vim db01_to_db02.conf
[source]
type: rdb file
servers:
- /data/redis_6379/redis_6379.rdb
[target]
type: single
servers:
- 10.0.0.52:6379
[common]
listen: 0.0.0.0:8888
step: 2
mbuf_size: 512
source_safe: false
# 8.发送数据 然后db02上出现数据 并且被取走
[root@db01 /data/redis_6379]# redis-migrate-tool -c db01_to_db02.conf
[root@db02 ~]# bash redis_shell.sh login 6379
10.0.0.52:6379> keys *
1) "k1"
2) "all"
10.0.0.52:6379> LLEN all
1217
等数据全部传完之后,短暂的停下vip,(让logstash把redis-buckup的数据全部取走),这个时候filebeat的数据就传不过来,但是filebeat是可以短暂的存储数据的。停下vip之后,启动修复好的redis。
九、filebeat使用kafka作为缓存收集日志
正常架构 三台kafka
一台也能做
kafka和zookeeper依赖java环境 ,这俩比较吃资源,内存得够
测试发现可坏两台
1.配置hosts与密钥
[root@db01 ~]# cat >/etc/hosts<<EOF
> 10.0.0.51 db01
> 10.0.0.52 db02
> 10.0.0.53 db03
> EOF
[root@db01 ~]# ssh-keygen
[root@db01 ~]# ssh-copy-id 10.0.0.52
[root@db01 ~]# ssh-copy-id 10.0.0.53
2.安装配置zookeeper
# db01操作
cd /data/soft
tar zxf zookeeper-3.4.11.tar.gz -C /opt/
ln -s /opt/zookeeper-3.4.11/ /opt/zookeeper
mkdir -p /data/zookeeper
cat >/opt/zookeeper/conf/zoo.cfg<<EOF
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/data/zookeeper
clientPort=2181
server.1=10.0.0.51:2888:3888
server.2=10.0.0.52:2888:3888
server.3=10.0.0.53:2888:3888
EOF
echo "1" > /data/zookeeper/myid
cat /data/zookeeper/myid
rsync -avz /opt/zookeeper* 10.0.0.52:/opt/ (发给其他两台)
rsync -avz /opt/zookeeper* 10.0.0.53:/opt/
###db02操作
mkdir -p /data/zookeeper
echo "2" > /data/zookeeper/myid
cat /data/zookeeper/myid
###db03操作
mkdir -p /data/zookeeper
echo "3" > /data/zookeeper/myid
cat /data/zookeeper/myid
注意!ID每台机器不一样
echo "1" > /data/zookeeper/myid (这是1的)
3.启动zookeeper
# 所有节点都启动
/opt/zookeeper/bin/zkServer.sh start
4.所有节点都检查
# 有两个follower 一个leader就是启动成功
(Mode: leader )
(Mode: follower)
(Mode: follower)
/opt/zookeeper/bin/zkServer.sh status
5.测试zookeeper
在一个节点上执行,创建一个频道
/opt/zookeeper/bin/zkCli.sh -server 10.0.0.51:2181(按个回车)
create /test "hello"
在其他节点上看能否接收到
/opt/zookeeper/bin/zkCli.sh -server 10.0.0.52:2181 (按个回车)
get /test (出现hello就成功)
6.安装部署kafka
- kafka依赖于zookeeper,所以如果zookeeper不正常,kafka不能工作
- kafka配置文件里要配上zook的所有IP的列表
- kafka配置文件里要注意,写自己的IP地址
- kafka配置文件里要注意,自己的ID是zook里配置的myid
db01操作
cd /data/soft/
tar zxf kafka_2.11-1.0.0.tgz -C /opt/
ln -s /opt/kafka_2.11-1.0.0/ /opt/kafka
mkdir /opt/kafka/logs
cat >/opt/kafka/config/server.properties<<EOF
broker.id=1
listeners=PLAINTEXT://10.0.0.51:9092
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
log.dirs=/opt/kafka/logs
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=24
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
zookeeper.connect=10.0.0.51:2181,10.0.0.52:2181,10.0.0.53:2181
zookeeper.connection.timeout.ms=6000
group.initial.rebalance.delay.ms=0
EOF
db02操作
cd /data/soft/
tar zxf kafka_2.11-1.0.0.tgz -C /opt/
ln -s /opt/kafka_2.11-1.0.0/ /opt/kafka
mkdir /opt/kafka/logs
cat >/opt/kafka/config/server.properties<<EOF
broker.id=2
listeners=PLAINTEXT://10.0.0.52:9092
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
log.dirs=/opt/kafka/logs
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=24
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
zookeeper.connect=10.0.0.51:2181,10.0.0.52:2181,10.0.0.53:2181
zookeeper.connection.timeout.ms=6000
group.initial.rebalance.delay.ms=0
EOF
db03操作
cd /data/soft/
tar zxf kafka_2.11-1.0.0.tgz -C /opt/
ln -s /opt/kafka_2.11-1.0.0/ /opt/kafka
mkdir /opt/kafka/logs
cat >/opt/kafka/config/server.properties<<EOF
broker.id=3
listeners=PLAINTEXT://10.0.0.53:9092
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
log.dirs=/opt/kafka/logs
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=24
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
zookeeper.connect=10.0.0.51:2181,10.0.0.52:2181,10.0.0.53:2181
zookeeper.connection.timeout.ms=6000
group.initial.rebalance.delay.ms=0
EOF
7.每台节点前台启动测试
# 出现started就是成功了
/opt/kafka/bin/kafka-server-start.sh /opt/kafka/config/server.properties
8.每台节点验证进程
# 出现上面3个就是启动成功了
jps
(3632 QuorumPeerMain)
(7664 Jps)
(7288 Kafka)
9.测试创建topic
/opt/kafka/bin/kafka-topics.sh --create --zookeeper 10.0.0.51:2181,10.0.0.52:2181,10.0.0.53:2181 --partitions 3 --replication-factor 3 --topic kafkatest
10.测试获取toppid
/opt/kafka/bin/kafka-topics.sh --describe --zookeeper 10.0.0.51:2181,10.0.0.52:2181,10.0.0.53:2181 --topic kafkatest
11.测试删除topic
/opt/kafka/bin/kafka-topics.sh --delete --zookeeper 10.0.0.51:2181,10.0.0.52:2181,10.0.0.53:2181 --topic kafkatest
12.kafka测试命令发送消息
创建命令
/opt/kafka/bin/kafka-topics.sh --create --zookeeper 10.0.0.51:2181,10.0.0.52:2181,10.0.0.53:2181 --partitions 3 --replication-factor 3 --topic messagetest
测试发送消息
# 回车之后出现>号
/opt/kafka/bin/kafka-console-producer.sh --broker-list 10.0.0.51:9092,10.0.0.52:9092,10.0.0.53:9092 --topic messagetest
其他节点测试接收
# 出现[zookeeper].
/opt/kafka/bin/kafka-console-consumer.sh --zookeeper 10.0.0.51:2181,10.0.0.52:2181,10.0.0.53:2181 --topic messagetest --from-beginning
测试是否同步
# db01的>号输入
>msy
>123
# db02,db03同步出现
[zookeeper].
msy
123
13.测试成功之后,可以放在后台启动
# 按ctrl+c停止 然后后台启动
/opt/kafka/bin/kafka-server-start.sh -daemon /opt/kafka/config/server.properties
# jps检查有没有三个文件
jps
(10364 Kafka)
(10397 Jps)
(3486 QuorumPeerMain)
# 其中一个节点测试接收到数据没有
/opt/kafka/bin/kafka-console-consumer.sh --zookeeper 10.0.0.51:2181,10.0.0.52:2181,10.0.0.53:2181 --topic messagetest --from-beginning
[zookeeper].
msy
123
14.修改filebeat配置文件
output要配上kafka的所有的IP列表
cat >/etc/filebeat/filebeat.yml <<EOF
filebeat.inputs:
- type: log
enabled: true
paths:
- /var/log/nginx/www.log
json.keys_under_root: true
json.overwrite_keys: true
tags: ["access"]
- type: log
enabled: true
paths:
- /var/log/nginx/error.log
tags: ["error"]
output.kafka:
hosts: ["10.0.0.51:9092", "10.0.0.52:9092", "10.0.0.53:9092"]
topic: 'filebeat'
setup.template.name: "nginx"
setup.template.pattern: "nginx_*"
setup.template.enabled: false
setup.template.overwrite: true
EOF
# 重启filebeat
systemctl restart filebeat
15.访问并检查kafka里有没有收到日志
curl 10.0.0.51
/opt/kafka/bin/kafka-topics.sh --list --zookeeper 10.0.0.51:2181,10.0.0.52:2181,10.0.0.53:2181
messagetest
# 出现日志就对了
/opt/kafka/bin/kafka-console-consumer.sh --zookeeper 10.0.0.51:2181,10.0.0.52:2181,10.0.0.53:2181 --topic filebeat --from-beginning
16.修改logstash配置文件
input要写上所有的kafka的IP列表,别忘了[]
前台启动测试成功后再后台启动
cat > /etc/logstash/conf.d/kafka.conf<<EOF
input {
kafka{
bootstrap_servers=>["10.0.0.51:9092,10.0.0.52:9092,10.0.0.53:9092"]
topics=>["filebeat"]
group_id=>"logstash"
codec => "json"
}
}
filter {
mutate {
convert => ["upstream_time", "float"]
convert => ["request_time", "float"]
}
}
output {
stdout {}
if "access" in [tags] {
elasticsearch {
hosts => "http://10.0.0.51:9200"
manage_template => false
index => "nginx_access-%{+yyyy.MM}"
}
}
if "error" in [tags] {
elasticsearch {
hosts => "http://10.0.0.51:9200"
manage_template => false
index => "nginx_error-%{+yyyy.MM}"
}
}
}
EOF
17.前台启动logstash并测试
# 先清空ES以前生成的索引
/usr/share/logstash/bin/logstash -f /etc/logstash/conf.d/kafka.conf
# 生成访问日志
curl 127.0.0.1
十、如何在公司推广ELK
- 优先表达对别人的好处,可以让别人早下班
- 实验环境准备充足,可以随时打开演示,数据和画图丰富一些
- 开发组,后端组,前端组,运维组,DBA组 单独定制面板
- 单独找组长,说优先给咱们组解决问题
- 你看,你有问题还得这么麻烦跑过来,我给你调好之后,你直接点点鼠标就可以了,如果还有问题,您一句话,我过去
- 事先说清楚,能用json格式最好用json格式