NFS+Keepalived 高可用部署
1、部署前提准备工作
# 所有节点执行 yum install -y nfs-utils keepalived rsync rpcbind inotify-tools
# 节点间相互配置免密
2、NFS 服务配置
所有节点一致
# 1. 配置NFS共享规则 cat > /etc/exports << EOF /data/nfs 192.168.189.0/24(rw,sync,no_root_squash,no_all_squash,fsid=0,anonuid=0,anongid=0) EOF # 2. 启动NFS服务并设置开机自启 systemctl enable --now rpcbind nfs-server # 重载配置 exportfs -rv
3、Keepalived 配置
1. 主节点(192.168.189.155)配置
vi /etc/keepalived/keepalived.conf global_defs { router_id NFS_MASTER_155 script_user root enable_script_security } # NFS健康检查脚本 vrrp_script chk_nfs { script "/usr/local/bin/chk_nfs.sh" interval 2 weight -50 fall 2 rise 2 } vrrp_instance VI_NFS { state BACKUP interface ens3 # 替换为实际网卡名 virtual_router_id 51 priority 150 advert_int 1 nopreempt preempt_delay 300 garp_master_delay 1 # 单播,防止脑裂 unicast_src_ip 192.168.189.155 unicast_peer { 192.168.189.163 192.168.189.164 } authentication { auth_type PASS auth_pass NFS_HA_2026_Prod } virtual_ipaddress { 192.168.189.156/24 dev ens3 noprefixroute # vip地址 } track_script { chk_nfs } # 状态切换通知脚本 notify_master "/usr/local/bin/vip_master.sh" notify_backup "/usr/local/bin/vip_backup.sh" notify_fault "/usr/local/bin/vip_fault.sh" }
2、备节点(192.168.189.163)配置
vi /etc/keepalived/keepalived.conf global_defs { router_id NFS_BACKUP_163 script_user root enable_script_security } vrrp_script chk_nfs { script "/usr/local/bin/chk_nfs.sh" interval 2 weight -50 fall 2 rise 2 } vrrp_instance VI_NFS { state BACKUP interface ens3 virtual_router_id 51 priority 120 advert_int 1 nopreempt preempt_delay 300 garp_master_delay 1 # 单播,防止脑裂 unicast_src_ip 192.168.189.163 unicast_peer { 192.168.189.155 192.168.189.164 } authentication { auth_type PASS auth_pass NFS_HA_2026_Prod } virtual_ipaddress { 192.168.189.156/24 dev ens3 noprefixroute # vip地址 } track_script { chk_nfs } notify_master "/usr/local/bin/vip_master.sh" notify_backup "/usr/local/bin/vip_backup.sh" notify_fault "/usr/local/bin/vip_fault.sh" }
3、备节点 2(192.168.189.164)配置
vi /etc/keepalived/keepalived.conf global_defs { router_id NFS_BACKUP_164 script_user root enable_script_security } vrrp_script chk_nfs { script "/usr/local/bin/chk_nfs.sh" interval 2 weight -50 fall 2 rise 2 } vrrp_instance VI_NFS { state BACKUP interface ens3 virtual_router_id 51 priority 100 advert_int 1 nopreempt preempt_delay 300 garp_master_delay 1 # 单播,防止脑裂 unicast_src_ip 192.168.189.164 unicast_peer { 192.168.189.155 192.168.189.163 } authentication { auth_type PASS auth_pass NFS_HA_2026_Prod } virtual_ipaddress { 192.168.189.156/24 dev ens3 noprefixroute # vip地址 } track_script { chk_nfs } notify_master "/usr/local/bin/vip_master.sh" notify_backup "/usr/local/bin/vip_backup.sh" notify_fault "/usr/local/bin/vip_fault.sh" }
4、核心脚本配置
所有节点一致
1. NFS 健康检查脚本(/usr/local/bin/chk_nfs.sh)
#!/bin/bash # 检查NFS服务状态 systemctl is-active --quiet nfs-server || exit 1 # 检查共享目录可写性 touch /data/nfs/.nfs_health_check 2>/dev/null if [ $? -ne 0 ]; then exit 1; fi rm -f /data/nfs/.nfs_health_check # 主节点检查VIP通信 if /usr/sbin/ip addr | grep -q 192.168.189.156; then ping -c 1 -W 1 192.168.189.156 >/dev/null 2>&1 || exit 1 fi exit 0
2、VIP 主节点通知脚本(/usr/local/bin/vip_master.sh)
#!/bin/bash LOG_FILE="/var/log/nfs_ha.log" VIP="192.168.189.156" DEV="ens3" NFS_DIR="/data/nfs" log() { echo "$(date +'%Y-%m-%d %H:%M:%S') [MASTER] - $1" >> $LOG_FILE } log "============ 切换为主节点 ============" # 先停止所有同步,避免冲突 pkill -f "nfs_inotify_sync.sh" >/dev/null 2>&1 sleep 2 log "第一步:从当前VIP拉取最新数据,防止旧数据覆盖" rsync -avz --delete --exclude ".nfs_*" root@$VIP:$NFS_DIR/ $NFS_DIR/ >> $LOG_FILE 2>&1 # 刷新ARP arping -c 3 -I $DEV $VIP >/dev/null 2>&1 log "ARP 刷新完成" # 启动NFS systemctl restart nfs-server systemctl is-active --quiet nfs-server && log "NFS 正常" || log "NFS 异常!" # 启动实时同步 nohup /usr/local/bin/nfs_inotify_sync.sh >> /var/log/nfs_inotify.log 2>&1 & log "Inotify 同步已启动" log "主节点切换完成!"
3、VIP 备节点通知脚本(/usr/local/bin/vip_backup.sh)
#!/bin/bash # 备节点状态切换脚本(核心:只拉不推,杜绝数据覆盖) LOG_FILE="/var/log/nfs_ha.log" VIP="192.168.189.156" NFS_DIR="/data/nfs" # 统一日志函数 log() { echo "$(date +'%Y-%m-%d %H:%M:%S') [BACKUP] - $1" >> $LOG_FILE } log "============ 切换为备节点 ============" # 1. 强制终止所有同步进程(关键:杜绝备节点残留inotify推旧数据) pkill -9 -f "nfs_inotify_sync.sh" >/dev/null 2>&1 pkill -9 -f "inotifywait" >/dev/null 2>&1 log "已强制停止所有inotify同步进程" # 2. 等待2秒,确保进程完全退出 sleep 2 # 3. 严格从VIP拉取最新数据(排除NFS临时文件,避免同步异常) log "开始从VIP($VIP)拉取最新数据,覆盖本地旧数据" rsync -avz --delete \ --exclude ".nfs_*" \ --exclude ".nfs_health_check" \ --timeout=30 \ root@$VIP:$NFS_DIR/ $NFS_DIR/ >> $LOG_FILE 2>&1 # 4. 同步结果校验 if [ $? -eq 0 ]; then log "✅ 备节点从VIP同步数据成功,本地数据已更新为最新" else log "❌ 备节点从VIP同步数据失败!请检查网络/免密登录/VIP状态" # 可选:添加告警(钉钉/邮件) # curl -s -X POST 你的钉钉机器人URL -d '{"msgtype":"text","text":{"content":"NFS备节点同步失败,VIP:192.168.185.156"}}' fi log "备节点切换流程完成" exit 0
4、VIP 故障通知脚本(/usr/local/bin/vip_fault.sh)
#!/bin/bash LOG_FILE="/var/log/nfs_ha.log" VIP="192.168.189.156" log() { echo "$(date +'%Y-%m-%d %H:%M:%S') [FAULT] - $1" >> $LOG_FILE } log "本机故障,VIP $VIP 已漂移!!!"
5、Inotify 实时同步脚本(/usr/local/bin/nfs_inotify_sync.sh)
#!/bin/bash # 核心配置 VIP="192.168.189.156" NFS_DIR="/data/nfs" NODE_LIST=("192.168.189.155" "192.168.189.163" "192.168.189.164") # 所有节点IP LOG_FILE="/var/log/nfs_inotify.log" RETRY_COUNT=3 # 同步失败重试次数 RETRY_INTERVAL=2 # 重试间隔(秒) SYNC_TIMEOUT=10 # rsync超时时间(秒) # 日志函数 log() { local LEVEL=$1 local MSG=$2 echo "$(date +'%Y-%m-%d %H:%M:%S') [$LEVEL] - $MSG" >> $LOG_FILE } # 校验是否持有VIP(仅持有VIP的节点执行同步) check_vip() { if ! ip addr | grep -q "$VIP"; then log "ERROR" "本机未持有VIP($VIP),退出同步进程" exit 0 fi } # 单次同步函数 single_sync() { local TARGET_NODE=$1 local FILE=$2 local RETRY=0 while [ $RETRY -lt $RETRY_COUNT ]; do rsync -avz --delete \ --exclude ".nfs_*" \ --times \ --timeout=$SYNC_TIMEOUT \ $NFS_DIR/ root@$TARGET_NODE:$NFS_DIR/ >> $LOG_FILE 2>&1 if [ $? -eq 0 ]; then log "INFO" "文件 $FILE 同步到 $TARGET_NODE 成功" return 0 else RETRY=$((RETRY + 1)) log "WARN" "文件 $FILE 同步到 $TARGET_NODE 失败,重试第$RETRY次" sleep $RETRY_INTERVAL fi done log "ERROR" "文件 $FILE 同步到 $TARGET_NODE 失败(已重试$RETRY_COUNT次)" return 1 } # 核心同步函数(推数据到其他节点) sync_data() { local CURRENT_IP=$(hostname -I | awk '{print $1}') # 二次校验VIP(防止脚本启动后VIP漂移) check_vip log "INFO" "本机($CURRENT_IP)持有VIP,开始监听文件变化并同步到其他节点" # 监听文件变化(创建/删除/修改/移动) inotifywait -mrq --format '%w%f' -e create,delete,modify,move $NFS_DIR | while read FILE; do # 跳过临时文件 if [[ $FILE =~ ".nfs_" ]]; then continue fi log "INFO" "检测到文件变化:$FILE" # 遍历所有节点,排除自身 for node in "${NODE_LIST[@]}"; do if [ "$node" != "$CURRENT_IP" ]; then single_sync $node $FILE fi done # 每次同步后,再次校验VIP(关键:VIP漂移后立刻退出) if ! ip addr | grep -q "$VIP"; then log "ERROR" "VIP已漂移,本机不再是主节点,同步进程退出" exit 0 fi done } # 启动逻辑(增加前置检查) main() { # 检查目录是否存在 if [ ! -d $NFS_DIR ]; then log "ERROR" "NFS目录($NFS_DIR)不存在,退出同步进程" exit 1 fi # 检查inotifywait命令 if ! which inotifywait >/dev/null 2>&1; then log "ERROR" "未找到inotifywait命令,请安装inotify-tools" exit 1 fi # 启动同步 check_vip sync_data } # 执行主流程 main
5、赋予脚本执行权限
chmod +x /usr/local/bin/chk_nfs.sh /usr/local/bin/vip_*.sh /usr/local/bin/nfs_inotify_sync.sh
6、服务启动与验证(所有节点执行)
# 1. 清理残留VIP和进程 pkill -9 keepalived ip addr del 192.168.189.156/32 dev ens3 2>/dev/null # 2. 启动Keepalived并设置开机自启 systemctl enable --now keepalived # 3. 验证VIP状态(主节点应显示VIP) ip addr show ens3 | grep 192.168.189.156 # 4. 验证Inotify同步(主节点执行) touch /data/nfs/test_prod.txt # 备节点验证 ls /data/nfs/test_prod.txt # 应能看到该文件 # 5. 模拟主节点故障测试,主节点执行 systemctl stop keepalived # 备节点163验证VIP漂移163应持有VIP ip addr show ens3 | grep 192.168.189.156
7、K8S设置NFS存储
创建命名空间:kubectl create ns shared
创建存储类:kubectl apply -f storageClass.yaml
kind: StorageClass apiVersion: storage.k8s.io/v1 metadata: name: nfs-storage namespace: shared annotations: storageclass.beta.kubernetes.io/is-default-class: 'true' storageclass.kubernetes.io/is-default-class: 'true' labels: environment: test provisioner: fuseim.pri/ifs reclaimPolicy: Retain volumeBindingMode: Immediate
创建RBAC权限:kubectl apply -f rbac.yaml
apiVersion: v1 kind: ServiceAccount metadata: name: nfs-client-provisioner namespace: shared --- kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 metadata: name: nfs-client-provisioner-runner rules: - apiGroups: [""] resources: ["persistentvolumes"] verbs: ["get", "list", "watch", "create", "delete"] - apiGroups: [""] resources: ["persistentvolumeclaims"] verbs: ["get", "list", "watch", "update"] - apiGroups: ["storage.k8s.io"] resources: ["storageclasses"] verbs: ["get", "list", "watch"] - apiGroups: [""] resources: ["events"] verbs: ["create", "update", "patch"] --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: name: run-nfs-client-provisioner subjects: - kind: ServiceAccount name: nfs-client-provisioner namespace: shared roleRef: kind: ClusterRole name: nfs-client-provisioner-runner apiGroup: rbac.authorization.k8s.io --- kind: Role apiVersion: rbac.authorization.k8s.io/v1 metadata: name: leader-locking-nfs-client-provisioner namespace: shared rules: - apiGroups: [""] resources: ["endpoints"] verbs: ["get", "list", "watch", "create", "update", "patch"] --- kind: RoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: name: leader-locking-nfs-client-provisioner namespace: shared subjects: - kind: ServiceAccount name: nfs-client-provisioner namespace: shared roleRef: kind: Role name: leader-locking-nfs-client-provisioner apiGroup: rbac.authorization.k8s.io
创建Provisioner:kubectl apply -f provisioner.yaml
kind: Deployment apiVersion: apps/v1 metadata: name: nfs-client-provisioner labels: app: nfs-client-provisioner namespace: shared spec: replicas: 3 strategy: type: Recreate selector: matchLabels: app: nfs-client-provisioner template: metadata: labels: app: nfs-client-provisioner spec: serviceAccountName: nfs-client-provisioner containers: - name: nfs-client-provisioner image: docker.1ms.run/eipwork/nfs-subdir-external-provisioner:v4.0.2 imagePullPolicy: IfNotPresent volumeMounts: - name: nfs-client-root mountPath: /persistentvolumes env: - name: PROVISIONER_NAME value: fuseim.pri/ifs # 必须要与storageclass中的PROVISIONER信息一致 - name: NFS_SERVER value: 192.168.189.156 # 指定NFS服务器的VIP地址 - name: NFS_PATH value: /data/nfs # 指定NFS服务器中的共享挂载目录 volumes: - name: nfs-client-root # 定义持久化卷的名称,必须要上面volumeMounts挂载的名称一致 nfs: server: 192.168.189.156 # 指定NFS所在的VIP地址 path: /data/nfs # 指定NFS服务器中的共享挂载目录
kubectl get po -A | grep shared


浙公网安备 33010602011771号