部署keepalived服务以及出现的脑裂问题

部署基于keepalived实现的高可用性wordpress集群

1、部署master5机器

1、下载keepalived服务
yum install keepalived -y
2、编辑配置文件
[root@slb-5 ~]#cat /etc/keepalived/keepalived.conf
global_defs {
    router_id slb-5
}

vrrp_instance VIP_1 {
    state MASTER
    interface eth0
    virtual_router_id 50
    priority 150
    advert_int 1
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    virtual_ipaddress {
        10.0.0.11
    }
}
3、启动服务
systemctl start keepalived

2、部署backup6机器

1、下载keepalived服务
yum install keepalived -y
2、编辑配置文件
[root@slb-6 ~]#cat /etc/keepalived/keepalived.conf
global_defs {
    router_id slb-6
}

vrrp_instance VIP_1 {
    state BACKUP
    interface eth0
    virtual_router_id 50
    priority 100
    advert_int 1
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    virtual_ipaddress {
        10.0.0.11
    }
}

3、启动服务
systemctl start keepalived

3、查看网卡情况

[root@slb-5 /etc/nginx/conf.d]#ip addr show eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 00:0c:29:c0:5b:ff brd ff:ff:ff:ff:ff:ff
    inet 10.0.0.5/24 brd 10.0.0.255 scope global eth0
       valid_lft forever preferred_lft forever
    inet 10.0.0.11/32 scope global eth0
       valid_lft forever preferred_lft forever
    inet6 fe80::20c:29ff:fec0:5bff/64 scope link 
       valid_lft forever preferred_lft forever



[root@slb-6 /etc/nginx/conf.d]#ip addr show eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 00:0c:29:93:45:d6 brd ff:ff:ff:ff:ff:ff
    inet 10.0.0.6/24 brd 10.0.0.255 scope global eth0
       valid_lft forever preferred_lft forever
    inet6 fe80::20c:29ff:fe93:45d6/64 scope link 
       valid_lft forever preferred_lft forever


4、编辑5机器的反向代理

[root@slb-5 /etc/nginx/conf.d]#cat wordpress.conf 
upstream web-pools{
  server 172.16.1.7:80 weight=4;
  server 172.16.1.8:80 weight=1;

}

server{

  listen 80;
  server_name wordpress.afeitt.cn;
  rewrite ^(.*) https://$server_name$1 redirect;
}  


server {
  
  listen 443 ssl;
  server_name wordpress.afeitt.cn;

  ssl_certificate ssl_key/server.crt;
  ssl_certificate_key ssl_key/server.key;

  location / {
    proxy_pass http://web-pools;
    include /etc/nginx/proxy_params.conf;

  }

}

[root@slb-5 /etc/nginx/conf.d]#systemctl restart nginx

5、编辑6机器的反向代理

[root@slb-6 /etc/nginx/conf.d]#cat wordpress.conf 
upstream web-pools{
  server 172.16.1.7:80 weight=4;
  server 172.16.1.8:80 weight=1;

}

server{

  listen 80;
  server_name wordpress.afeitt.cn;
  rewrite ^(.*) https://$server_name$1 redirect;
}  


server {
  
  listen 443 ssl;
  server_name wordpress.afeitt.cn;

  ssl_certificate ssl_key/server.crt;
  ssl_certificate_key ssl_key/server.key;

  location / {
    proxy_pass http://web-pools;
    include /etc/nginx/proxy_params.conf;

  }

}

[root@slb-6 /etc/nginx/conf.d]#systemctl restart nginx

6、web集群

[root@web-7 /etc/nginx/conf.d]#cat wordpress.conf 
server{
    listen 80;
    server_name wordpress.afeitt.cn;

    root /code/wordpress;
    index index.php index.html;

    location ~*  \.php$ {

        root /code/wordpress;
        fastcgi_index index.php;
        fastcgi_pass 127.0.0.1:9000;
        fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name;
        include /etc/nginx/fastcgi_params;
    }
}
[root@web-7 /etc/nginx/conf.d]#systemctl restart nginx


[root@web-8 /etc/nginx/conf.d]#cat wordpress.conf 
server{
    listen 80;
    server_name wordpress.afeitt.cn;

    root /code/wordpress;
    index index.php index.html;

    location ~*  \.php$ {

        root /code/wordpress;
        fastcgi_index index.php;
        fastcgi_pass 127.0.0.1:9000;
        fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name;
        include /etc/nginx/fastcgi_params;
    }
}
[root@web-8 /etc/nginx/conf.d]#systemctl restart nginx

7、访问测试

8、关闭5机器的keepalived服务,查看vip是否迁移

[root@slb-5 /etc/nginx]#systemctl stop keepalived
[root@slb-5 /etc/nginx]#ip addr show eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 00:0c:29:c0:5b:ff brd ff:ff:ff:ff:ff:ff
    inet 10.0.0.5/24 brd 10.0.0.255 scope global eth0
       valid_lft forever preferred_lft forever
    inet6 fe80::20c:29ff:fec0:5bff/64 scope link 
       valid_lft forever preferred_lft forever

[root@slb-6 /etc/nginx/conf.d]#ip addr show eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 00:0c:29:93:45:d6 brd ff:ff:ff:ff:ff:ff
    inet 10.0.0.6/24 brd 10.0.0.255 scope global eth0
       valid_lft forever preferred_lft forever
    inet 10.0.0.11/32 scope global eth0
       valid_lft forever preferred_lft forever
    inet6 fe80::20c:29ff:fe93:45d6/64 scope link 
       valid_lft forever preferred_lft forever

脑裂问题

backup6机器问题

lb-5安装tcpdump
[root@lb-5 ~]#yum install tcpdump -y
lb-5抓包
# 参数 
#  -nn:    指定将每个监听到的数据包中的域名转换成IP、端口从应用名称转换成端口号后显示
# -i      指定监听的网络接口;
# any 抓取所有网络接口
# host 指定抓取的主机地址,这里是组播地址。

tcpdump -nn -i any host 224.0.0.18
lb-6开启防火墙
[root@slb-6 /etc/nginx/conf.d]#systemctl start firewalld

此时两边都出现了VIP,无法漂移切换VIP
停止5机器master的工作
6机器也无法正常工作了
恢复方法
关闭防火墙
[root@slb-6 /etc/nginx/conf.d]#systemctl stop firewalld
允许放行

解决方案
# 只要启动服务就会抢夺VIP
systemctl restart firewalld

# VIP应该会消失,VRRP正常
iptables -I INPUT -i eth0 -d 224.0.0.0/8 -p vrrp -j ACCEPT
防止backup脑裂脚本
整体思路
针对backup服务器

1. backup定期检查master的nginx是否运行
2. backup定期检查自己是否有VIP
3. 如果有如下情况,backup却也有VIP,就是脑裂
- master的nginx正常
- master有VIP
- backup有VIP

4. 如果backup有脑裂,就干掉自己的keepalived
5. 告知管理员。
backup6机器的脚本
[root@slb-6 /etc/keepalived]#cat check_vip.sh 
#!/bin/bash
MASTER_VIP=$(ssh 10.0.0.5 ip a|grep 10.0.0.11|wc -l)
MY_VIP=$(ip a|grep 10.0.0.11|wc -l)
# 如果远程有VIP并且自己本地也存在了VIP,就干掉自己
if [ ${MASTER_VIP} == 1 -a ${MY_VIP} == 1 ]
then
   systemctl stop keepalived
fi

echo '我被执行了 keepalived' >> /tmp/kp.log


backup服务调用自杀脚本
[root@slb-6 /etc/keepalived]#cat keepalived.conf
global_defs {
    script_user root
    enable_script_security
    router_id lb-6
}

# 定义脚本
vrrp_script check_vip {
    script "/etc/keepalived/check_vip.sh"
    interval 5 # 脚本执行的时间间隔
}

vrrp_instance VIP_1 {
    state BACKUP 
    interface eth0
    virtual_router_id 50
    priority 100
    advert_int 1
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    virtual_ipaddress {
        10.0.0.11
    }
    # 调动脚本
    track_script {
        check_vip
    }
}


测试backup的监控脚本
[root@slb-6 /etc/keepalived]#chmod +x dy-vip.sh 
创建免密登录
[root@slb-6 /etc/keepalived]#ssh-keygen 
[root@slb-6 /etc/keepalived]#ls /root/.ssh/
id_rsa  id_rsa.pub

发送公钥到5机器
[root@slb-6 /etc/keepalived]#ssh-copy-id root@10.0.0.5

启动5、6机器的keepalived服务
systemctl restart keepalived
开启6bakcup的防火墙
[root@slb-6 /etc/keepalived]#systemctl restart firewalld

重启keepalived服务,自动暂停keepalived

master5机器出现问题

背景
刚才的脚本解决了backup抢夺VIP的问题;
但是如果master出了问题呢?比如master的nginx挂了,那keepalived没啥用了,服务还是挂了。
监控master故障的脚本
思路
master机器脚本
1. 如果自己的nginx已经不存在了,keepalived还活着,尝试重启nginx
2. 如果重启nginx失败,干掉自己的keepalived,放弃master资源,让backup继续干活
脚本
[root@slb-5 ~]#cat /etc/keepalived/check_web.sh 
#!/bin/bash
NGINX_STATUS=$(ps -ef|grep ngin[x]|wc -l)
# 如果nginx挂了
if [ ${NGINX_STATUS} == 0 ]
then
   systemctl restart nginx
   # 如果重启失败
   if [ $? == 1 ]
   then
         # keepalived没必要活着了
      systemctl stop keepalived
   fi
fi

keepalived调用脚本
[root@slb-5 ~]#cat /etc/keepalived/keepalived.conf
global_defs {
    router_id lb-5
}

vrrp_script check_web {
    script "/etc/keepalived/check_web.sh"
    interval 5
}

vrrp_instance VIP_1 {
    state MASTER
        interface eth0
        virtual_router_id 50
        priority 150
        advert_int 1
        authentication {
            auth_type PASS
            auth_pass 1111
        }
        virtual_ipaddress {
            10.0.0.11
        }
        track_script {
            check_web
        }
}

给脚本加权限
[root@slb-5 /etc/keepalived]#chmod +x /etc/keepalived/web-vip.sh 
[root@slb-5 /etc/keepalived]#ll
total 12
-rw-r--r-- 1 root root  320 Jun  6 23:59 keepalived.conf
-rw-r--r-- 1 root root 3598 Jun  5 17:48 keepalived.conf.cn
-rwxr-xr-x 1 root root  265 Jun  6 23:57 web-vip.sh

测试
1、确保master5机器与backup6机器的keepalived服务正常
systemctl restart keepalived

2、停止master5机器的nginx服务
[root@slb-5 /etc/nginx/conf.d]#systemctl stop nginx
[root@slb-5 /etc/nginx/conf.d]#ip add show eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 00:0c:29:c0:5b:ff brd ff:ff:ff:ff:ff:ff
    inet 10.0.0.5/24 brd 10.0.0.255 scope global eth0
       valid_lft forever preferred_lft forever
    inet 10.0.0.11/32 scope global eth0
       valid_lft forever preferred_lft forever
    inet6 fe80::20c:29ff:fec0:5bff/64 scope link 
       valid_lft forever preferred_lft forever


3、如果发现nginx没停止,修改他的配置文件

4、确保可以切换到backup6机器

修复方法
先在保证了服务正常运行状态了,再来寻找master5机器的问题

[root@slb-5 /etc/nginx/conf.d]#mv wordpress.conf.cn wordpress.conf
[root@slb-5 /etc/nginx/conf.d]#systemctl restart nginx
[root@slb-5 /etc/nginx/conf.d]#systemctl restart keepalived
[root@slb-5 /etc/nginx/conf.d]#ls
ssl.conf.cn  wecenter.conf.cn  wordpress.conf

查看是否归还VIP到master5机器上
[root@slb-5 /etc/nginx/conf.d]#ip addr show eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 00:0c:29:c0:5b:ff brd ff:ff:ff:ff:ff:ff
    inet 10.0.0.5/24 brd 10.0.0.255 scope global eth0
       valid_lft forever preferred_lft forever
    inet 10.0.0.11/32 scope global eth0
       valid_lft forever preferred_lft forever
    inet6 fe80::20c:29ff:fec0:5bff/64 scope link 
       valid_lft forever preferred_lft forever

posted @ 2022-07-26 20:16  张开嘴  阅读(319)  评论(0)    收藏  举报