30.keepalived高可用深入透彻


1.keepalived高可用环境部署和说明
官网:http://www.keepalived.org/
(1)
  
(2)centos系统及nginx代理环境
lb02和lb01配置保持一致:
[root@lb02 ~]# cat /application/nginx/conf/nginx.conf
#lb01部署
worker_processes 1;
events {
worker_connections 1024;
}
http {
include mime.types;
default_type application/octet-stream;
sendfile on;
keepalive_timeout 65;
#服务器web池
upstream server_pools {
server 10.0.0.7:80 weight=1;
server 10.0.0.8:80 weight=1;
}
server {
listen 80;
server_name bbs.etiantian.org;
location / {
proxy_pass http://server_pools;
proxy_set_header Host $host;
proxy_set_header X-Forwarded-For $remote_addr;
}
}
server {
listen 80;
server_name www.etiantian.org;
location / {
proxy_pass http://server_pools;
proxy_set_header Host $host;
proxy_set_header X-Forwarded-For $remote_addr;
}
}
}
[root@lb02 ~]# /application/nginx/sbin/nginx -t
[root@lb02 ~]# /application/nginx/sbin/nginx -s reload

测试功能正常:
[root@lb02 ~]# curl -H Host:www.etiantian.org 10.0.0.5/oldboy.html
web02:www.etiantian.org
[root@lb02 ~]# curl -H Host:www.etiantian.org 10.0.0.5/oldboy.html
web01:www.etiantian.org
[root@lb02 ~]# curl -H Host:www.etiantian.org 10.0.0.5/oldboy.html
web02:www.etiantian.org
[root@lb02 ~]# curl -H Host:www.etiantian.org 10.0.0.5/oldboy.html
web01:www.etiantian.org
[root@lb02 ~]#
[root@lb02 ~]# curl -H Host:bbs.etiantian.org 10.0.0.5/oldboy.html
web02:bbs.etiantian.org
[root@lb02 ~]# curl -H Host:bbs.etiantian.org 10.0.0.5/oldboy.html
web01:bbs.etiantian.org
[root@lb02 ~]# curl -H Host:bbs.etiantian.org 10.0.0.6/oldboy.html
web02:bbs.etiantian.org
[root@lb02 ~]# curl -H Host:bbs.etiantian.org 10.0.0.6/oldboy.html
web01:bbs.etiantian.org
[root@lb02 ~]# curl -H Host:bbs.etiantian.org 10.0.0.6/oldboy.html
web02:bbs.etiantian.org
[root@lb02 ~]# curl -H Host:www.etiantian.org 10.0.0.6/oldboy.html
web01:www.etiantian.org
[root@lb02 ~]# curl -H Host:www.etiantian.org 10.0.0.6/oldboy.html
web02:www.etiantian.org

安装keepalived软件:
[root@lb01 ~]# yum install -y keepalived
[root@lb02 ~]# yum install -y keepalived

启动keepalived软件服务:
[root@lb01 ~]# /etc/init.d/keepalived start
[root@lb02 ~]# /etc/init.d/keepalived start

自动创建vip(像小心脏一样漂移,进行主备切换)
[root@lb01 ~]# ip a
[root@lb02 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:f9:55:18 brd ff:ff:ff:ff:ff:ff
inet 10.0.0.6/24 brd 10.0.0.255 scope global eth0
inet 192.168.200.16/32 scope global eth0 //启动自动创建这3个,不过没用!
inet 192.168.200.17/32 scope global eth0 //
inet 192.168.200.18/32 scope global eth0 //
inet6 fe80::20c:29ff:fef9:5518/64 scope link
valid_lft forever preferred_lft forever
3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UNKNOWN qlen 1000
link/ether 00:0c:29:f9:55:22 brd ff:ff:ff:ff:ff:ff
inet 172.16.1.6/24 brd 172.16.1.255 scope global eth1
inet6 fe80::20c:29ff:fef9:5522/64 scope link
valid_lft forever preferred_lft forever

2.keepalived配置文件通过配置实践keepalived软件服务功能的使用
keepalived软件服务是通过配置文件提供相应服务的!起初是专门配置lvs使用的。
这里具备高可用功能的keepalived.conf配置文件包含了两个重要区块。
(1)全局定义部分
这部分主要用来设置keepalived的故障通知机制和Router ID标识。
a.vrrp_实例的配置部分
b.lvs配置

  
  

(2)VRRP实例定义区块(VRRP instance)部分
这部分主要用来定义具体服务的实例配置,包括keepalived主备状态、接口、优先权、认证方式和IP信息等。
  
  

  
  
备机不断向主机发送信息(你还活着没?),主机不断回复(我还活着!)......

我们在这里配置:
[root@lb01 ~]# cp /etc/keepalived/keepalived.conf{,.ori}
[root@lb01 ~]# vim /etc/keepalived/keepalived.conf
#主机10.0.0.5的keepalived配置文件示例详解
global_defs {
router_id LB01 #每台服务器上keepalived软件唯一标识的身份证号、身份标记,同一个局域网唯一。
}

vrrp_instance VI_1 {
state MASTER
interface eth0
virtual_router_id 51
priority 150
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
10.0.0.3/24 dev eth0 label eth0:1 #设备;网卡;标签label名为eth0:1
}
}

[root@lb02 ~]# cp /etc/keepalived/keepalived.conf{,.ori}
[root@lb02 ~]# vim /etc/keepalived/keepalived.conf
[root@lb02 ~]# cat /etc/keepalived/keepalived.conf
#备机10.0.0.6的keepalived配置文件示例详解
global_defs {
router_id LB02 #每台服务器上keepalived软件唯一标识的身份证号、身份标记,同一个局域网唯一。
}

vrrp_instance VI_1 {
state BACKUP
interface eth0
virtual_router_id 51
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
10.0.0.3/24 dev eth0 label eth0:1 #设备;网卡;标签label名为eth0:1
}
}


[root@lb01 ~]# /etc/init.d/keepalived restart
[root@lb01 ~]# ip a s eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:aa:74:37 brd ff:ff:ff:ff:ff:ff
inet 10.0.0.5/24 brd 10.0.0.255 scope global eth0
inet 10.0.0.3/24 scope global secondary eth0:1 //
inet6 fe80::20c:29ff:feaa:7437/64 scope link
valid_lft forever preferred_lft forever

[root@lb02 ~]# /etc/init.d/keepalived restart
[root@lb02 ~]# ip a s eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:f9:55:18 brd ff:ff:ff:ff:ff:ff
inet 10.0.0.6/24 brd 10.0.0.255 scope global eth0
inet6 fe80::20c:29ff:fef9:5518/64 scope link
valid_lft forever preferred_lft forever

将网站解析到10.0.0.3来测试:
#10.0.0.5 www.etiantian.org blog.etiantian.org bbs.etiantian.org status.etiantian.org
10.0.0.3 www.etiantian.org blog.etiantian.org bbs.etiantian.org status.etiantian.org

测试:
输入:http://www.etiantian.org/oldboy.html
显示和刷新:web01:www.etiantian.org和web02:www.etiantian.org
输入:http://bbs.etiantian.org/oldboy.html
显示和刷新:web01:bbs.etiantian.org和web02:bbs.etiantian.org
成功!

现在是lb01主机接管工作,查看lb01日志:
[root@lb01 ~]# tail -f /application/nginx/logs/access.log
10.0.0.253 - - [02/Mar/2018:10:36:44 +0800] "GET /oldboy.html HTTP/1.1" 200 24 "-" "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"

突然,现在lb01的keepalived服务出现故障,停止了:
[root@lb01 ~]# /etc/init.d/keepalived stop

测试:
输入:http://www.etiantian.org/oldboy.html
显示和刷新:web01:www.etiantian.org和web02:www.etiantian.org
输入:http://bbs.etiantian.org/oldboy.html
显示和刷新:web01:bbs.etiantian.org和web02:bbs.etiantian.org
成功!

[root@lb01 ~]# ip a s eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:aa:74:37 brd ff:ff:ff:ff:ff:ff
inet 10.0.0.5/24 brd 10.0.0.255 scope global eth0
inet6 fe80::20c:29ff:feaa:7437/64 scope link
valid_lft forever preferred_lft forever
[root@lb02 ~]# ip a s eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:f9:55:18 brd ff:ff:ff:ff:ff:ff
inet 10.0.0.6/24 brd 10.0.0.255 scope global eth0
inet 10.0.0.3/24 scope global secondary eth0:1 //
inet6 fe80::20c:29ff:fef9:5518/64 scope link
valid_lft forever preferred_lft forever
现在vrp漂移到备机lb02上。

查看lb02日志有变化,进一步说明现在是lb02主机接管工作:
[root@lb02 ~]# tail -f /application/nginx/logs/access.log
10.0.0.253 - - [01/Mar/2018:19:34:38 +0800] "GET /favicon.ico HTTP/1.1" 404 571 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"

突然,现在lb01的keepalived服务恢复正常了:
[root@lb01 ~]# /etc/init.d/keepalived start
[root@lb01 ~]# ip a s eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:aa:74:37 brd ff:ff:ff:ff:ff:ff
inet 10.0.0.5/24 brd 10.0.0.255 scope global eth0
inet 10.0.0.3/24 scope global secondary eth0:1
inet6 fe80::20c:29ff:feaa:7437/64 scope link
valid_lft forever preferred_lft forever
[root@lb02 ~]# ip a s eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:f9:55:18 brd ff:ff:ff:ff:ff:ff
inet 10.0.0.6/24 brd 10.0.0.255 scope global eth0
inet6 fe80::20c:29ff:fef9:5518/64 scope link
valid_lft forever preferred_lft forever
测试:
输入:http://www.etiantian.org/oldboy.html
显示和刷新:web01:www.etiantian.org和web02:www.etiantian.org
输入:http://bbs.etiantian.org/oldboy.html
显示和刷新:web01:bbs.etiantian.org和web02:bbs.etiantian.org
成功!

现在是lb01主机重新接管工作,查看lb01日志:
[root@lb01 ~]# tail -f /application/nginx/logs/access.log
10.0.0.253 - - [02/Mar/2018:10:40:51 +0800] "GET /oldboy.html HTTP/1.1" 200 24 "-" "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"

3.高可用软件常见脑裂原因及解决方案
(1)脑裂定义:由于某些原因,导致两台高可用服务器对在指定时间内,无法检测到对方的心跳消息,各自取得资源及服务的所有权,而此时的两台高可用服务器对都还活着并在正常运行,这样就会导致同一个IP或服务在两端同时存在而发生冲突,最严重的是两台服务器占用同一个VIP地址,当用户写入数据时可能会分别写入到两端,这样可能会导致服务器两端的数据不一致或造成数据丢失,这种情况被称为脑裂。

(2)导致脑裂发生的原因:
<1>高可用服务器对 之间的心跳线链路故障,导致无法正常通信。
a.心跳线坏了(断了、老化)。
b.网卡及相关驱动坏了,IP配置及冲突问题(网卡直连)。
c.心跳线间连接的设备故障(网卡及交换机)。
d.仲裁的机器出问题(采用仲裁的方案)。
<2>高可用服务器对 上开启iptables防火墙阻挡了心跳消息传输。
<3>高可用服务器对 上心跳网卡地址等信息配置不正确,导致发送心跳失败。
<4>其他服务配置不当等原因,如心跳方式不同,心跳广播冲突、软件bug等。
  

(3)解决脑裂的常见方案:
<1>同时使用串行电缆和以太网电缆连接,同时用两条心跳线路,这样一条线路坏了另一个还是好的,依然能传送心跳消息。
<2>当检测到脑裂时强行关闭一个心跳节点(该功能需要特殊设备支持,如stonith,fence)。相当于备节点接收不到心跳消息,发送关机命令通过单独的线路关闭主节点的电源。(可写监控脚本检测备节点是否有vip)
<3>做好对脑裂的监控报警(如邮件及手机短信等或值班),在问题发生时人为第一时间介入仲裁,降低损失。例如:百度的监控报警短信就有上行和下行的区别。报警信息报到管理员手机上,可以通过手机回复对应数字或者简单的字符串操作返回给服务器,让服务器根据指令自动处理相应故障,这样解决故障的时间更短。

  

4.解决keepalived基于服务器 的故障,我们要求keepalived跟随主备机的nginx服务正常启动与否而让vip漂移。
nginx软件服务停了后vip却还在原主机服务器上,没有漂移到备机服务器。现象如下:
[root@lb01 ~]# killall nginx
[root@lb01 ~]# killall nginx
nginx: no process killed
[root@lb01 ~]# ps -ef|grep nginx
root 1304 1236 0 16:53 pts/0 00:00:00 grep nginx
[root@lb01 ~]# ip a s eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:aa:74:37 brd ff:ff:ff:ff:ff:ff
inet 10.0.0.5/24 brd 10.0.0.255 scope global eth0
inet 10.0.0.3/24 scope global secondary eth0:1
inet6 fe80::20c:29ff:feaa:7437/64 scope link
valid_lft forever preferred_lft forever
原因:keepalived软件是基于服务器级别的,只有服务器挂了(网断了、服务器断电了)vip才会漂移。除非把keepalived关了、网断了、服务器断电了vip才会漂移。
这不符合需求,因为这里keepalived是给nginx做高可用的,nginx关闭势必要求keepalived服务紧跟着关闭让vip漂移到备机。

解决:
keepalived是支持脚本配置的,可写监控脚本。

利用nginx进程数判断是否开启服务:
[root@lb01 ~]# ps -ef|grep nginx|grep -v grep
root 1330 1 0 17:02 ? 00:00:00 nginx: master process /application/nginx/sbin/nginx
www 1331 1330 0 17:02 ? 00:00:00 nginx: worker process
[root@lb01 ~]# ps -ef|grep nginx|grep -v grep|wc -l
2

[root@lb01 ~]# killall nginx
[root@lb01 ~]# killall nginx
nginx: no process killed
[root@lb01 ~]# ps -ef|grep nginx|grep -v grep|wc -l
0

可以让该脚本定时任务执行以监测nginx:
[root@lb01 ~]# mkdir -p /service/scripts/
[root@lb01 ~]# vim /service/scripts/check_lb.sh
[root@lb01 ~]# cat /service/scripts/check_lb.sh
#!/bin/bash
if [ `ps -ef|grep nginx|grep -v grep|wc -l` -eq 0 ];then
/etc/init.d/keepalived stop
fi
[root@lb01 ~]# chmod +x /service/scripts/check_lb.sh

备机lb02:
[root@lb02 ~]# mkdir -p /service/scripts/
[root@lb02 ~]# vim /service/scripts/check_lb.sh
[root@lb02 ~]# cat /service/scripts/check_lb.sh
#!/bin/bash
if [ `ps -ef|grep nginx|grep -v grep|wc -l` -eq 0 ];then
/etc/init.d/keepalived stop
fi
[root@lb02 ~]# chmod +x /service/scripts/check_lb.sh


添加keepalived内置脚本执行语句:
[root@lb01 ~]# cat /etc/keepalived/keepalived.conf
#主机keepalived配置文件示例详解
global_defs {
router_id LB01 #每台服务器上keepalived软件的身份证号、身份标记,同一个局域网唯一。
}

vrrp_script check_lb { #定义vrrp脚本,检测http端口。
script "/service/scripts/check_lb.sh" #执行脚本,当nginx服务有问题则停掉keepalived服务。
interval 2 #间隔2s
weight 2 #权重为2
}

vrrp_instance VI_1 {
state MASTER
interface eth0
virtual_router_id 51
priority 150
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
10.0.0.3/24 dev eth0 label eth0:1 #设备;网卡;标签label名为eth0:1
}
#触发检查
track_script {
check_lb
}
}

备机lb02:
[root@lb02 ~]# cat /etc/keepalived/keepalived.conf
#备机keepalived配置文件示例详解
global_defs {
router_id LB02 #每台服务器上keepalived软件唯一标识的身份证号、身份标记,同一个局域网唯一。
}

script check_lb {
script "/service/scripts/check_lb.sh"
interval 2
weight 2
}

vrrp_instance VI_1 {
state BACKUP
interface eth0
virtual_router_id 51
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
10.0.0.3/24 dev eth0 label eth0:1 #设备;网卡;标签label名为eth0:1
}
track_script {
check_lb
}
}

测试:
[root@lb01 ~]# /application/nginx/sbin/nginx
[root@lb01 ~]# /etc/init.d/keepalived restart

[root@lb02 ~]# /application/nginx/sbin/nginx
[root@lb02 ~]# /etc/init.d/keepalived restart

[root@lb01 ~]# ip a s eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:aa:74:37 brd ff:ff:ff:ff:ff:ff
inet 10.0.0.5/24 brd 10.0.0.255 scope global eth0
inet 10.0.0.3/24 scope global secondary eth0:1 //
inet6 fe80::20c:29ff:feaa:7437/64 scope link
valid_lft forever preferred_lft forever
[root@lb02 ~]# ip a s eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:f9:55:18 brd ff:ff:ff:ff:ff:ff
inet 10.0.0.6/24 brd 10.0.0.255 scope global eth0
inet6 fe80::20c:29ff:fef9:5518/64 scope link
valid_lft forever preferred_lft forever
实现结果:vip漂移到备机了!
[root@lb01 ~]# killall nginx
[root@lb01 ~]# killall nginx
nginx: no process killed
[root@lb01 ~]# ip a s eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:aa:74:37 brd ff:ff:ff:ff:ff:ff
inet 10.0.0.5/24 brd 10.0.0.255 scope global eth0
inet6 fe80::20c:29ff:feaa:7437/64 scope link
valid_lft forever preferred_lft forever
[root@lb02 ~]# ip a s eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:f9:55:18 brd ff:ff:ff:ff:ff:ff
inet 10.0.0.6/24 brd 10.0.0.255 scope global eth0
inet 10.0.0.3/24 scope global secondary eth0:1 //
inet6 fe80::20c:29ff:fef9:5518/64 scope link
valid_lft forever preferred_lft forever

查看状态,keepalived开启失败,除非将nginx服务成功开启。
[root@lb01 ~]# /etc/init.d/keepalived status
keepalived is stopped
[root@lb01 ~]# /etc/init.d/keepalived start //
Starting keepalived: [ OK ]
[root@lb01 ~]# /etc/init.d/keepalived status
keepalived is stopped
[root@lb01 ~]# /application/nginx/sbin/nginx //
[root@lb01 ~]# /etc/init.d/keepalived status
keepalived is stopped
[root@lb01 ~]# /etc/init.d/keepalived start
Starting keepalived: [ OK ]
[root@lb01 ~]# /etc/init.d/keepalived status
keepalived (pid 2181) is running...

实现结果:vip漂移重新回到主机了!
[root@lb01 ~]# ip a s eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:aa:74:37 brd ff:ff:ff:ff:ff:ff
inet 10.0.0.5/24 brd 10.0.0.255 scope global eth0
inet 10.0.0.3/24 scope global secondary eth0:1 //
inet6 fe80::20c:29ff:feaa:7437/64 scope link
valid_lft forever preferred_lft forever
[root@lb02 ~]# ip a s eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:f9:55:18 brd ff:ff:ff:ff:ff:ff
inet 10.0.0.6/24 brd 10.0.0.255 scope global eth0
inet6 fe80::20c:29ff:fef9:5518/64 scope link
valid_lft forever preferred_lft forever
  

实现bbs功能,bbs论坛搭建:discuz
Discuz! 官方站-PHP 开源论坛 - Powered by Discuz!

实现www功能,www网站搭建:dedecms
织梦CMS 官方网站 - 内容管理系统 - 上海卓卓网络科...官网
http://www.dedecms.com/

接下来,使用db01上面的数据库,把用户的上传目录挂载到nfs01上。

 

posted @ 2018-03-02 18:03  bkycrmn  阅读(214)  评论(0)    收藏  举报