etcd集群故障恢复

一 etcd 备份脚本

#!/bin/bash
IP=127.0.0.1
BACKUP_DIR=/data/etcd_backup
if [ -d $BACKUP_DIR ];then
     mkdir -p $BACKUP_DIR
fi
#使用 3版本的etcdctl API
export ETCDCTL_API=3
#备份etcd
/data/kubernetes/bin/etcdctl --endpoints="https://$IP:2379" --cert=$SSL_DIR/etcd.pem  --key=$SSL_DIR/etcd-key.pem --cacert=$SSL_DIR/ca.pem snapshot save $BACKUP_DIR/snap-$(date +%Y%m%d%H%M).db
#压缩备份
cd $BACKUP_DIR &&  tar -cjf snap-$(date +%Y%m%d%H%M).tar.bz snap-$(date +%Y%m%d%H%M).db  && rm -f snap-$(date +%Y%m%d%H%M).db
#保存8天备份
find $BACKUP_DIR -name 'snap' -mtime +8| xargs rm -fr {};
sh etcd-bakup.sh 生产备份文件
snap-202011252359.tar.gz


二 部署 etcd集群

# 基础环境及软件版本
centos 7.6 
etcd  3.3.18  https://github.com/etcd-io/etcd/releases 
coredns 1.6.7  https://github.com/coredns/coredns/releases
etcd1 coredns-node1 172.18.1.11
etcd2 coredns-node2 172.18.1.12
etcd3 coredns-node3 172.18.1.13
- 下载好对应版本,并mv 到 /usr/bin/目录下
 (1)tar -xf etcd-v3.3.18-linux-amd64.tar.gz && cd etcd-v3.3.18-linux-amd64 && mv etcd* /usr/bin/
 (2)创建数据目录 与配置文件 三个节点分别执行 mkdir /var/lib/etcd
 (3)启动脚本
(4)恢复集群得--initial-cluster-state均为 existing
172.18.1.11上
 cat >/usr/lib/systemd/system/etcd.service<<-"EOF"
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
Documentation=https://github.com/coreos

[Service]
Type=notify
WorkingDirectory=/var/lib/etcd/
ExecStart=/usr/bin/etcd \
  --name=etcd1 \
  --initial-advertise-peer-urls=http://172.18.1.11:2380 \
  --listen-peer-urls=http://172.18.1.11:2380 \
  --listen-client-urls=http://172.18.1.11:2379,http://127.0.0.1:2379 \
  --advertise-client-urls=http://172.18.1.11:2379 \
  --initial-cluster-token=dns-etcd \
  --initial-cluster=etcd1=http://172.18.1.11:2380,etcd2=http://172.18.1.12:2380,etcd3=http://172.18.1.13:2380  \
  --initial-cluster-state=existing \
  --data-dir=/var/lib/etcd
Restart=always
RestartSec=5
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
EOF

172.18.1.12上
 cat >/usr/lib/systemd/system/etcd.service<<-"EOF"
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
Documentation=https://github.com/coreos

[Service]
Type=notify
WorkingDirectory=/var/lib/etcd/
ExecStart=/usr/bin/etcd \
  --name=etcd2 \
  --initial-advertise-peer-urls=http://172.18.1.12:2380 \
  --listen-peer-urls=http://172.18.1.12:2380 \
  --listen-client-urls=http://172.18.1.12:2379,http://127.0.0.1:2379 \
  --advertise-client-urls=http://172.18.1.12:2379 \
  --initial-cluster-token=dns-etcd \
  --initial-cluster=etcd1=http://172.18.1.11:2380,etcd2=http://172.18.1.12:2380,etcd3=http://172.18.1.13:2380  \
  --initial-cluster-state=existing \
  --data-dir=/var/lib/etcd
Restart=always
RestartSec=5
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
EOF

172.18.1.13上执行

 cat >/usr/lib/systemd/system/etcd.service<<-"EOF"
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
Documentation=https://github.com/coreos

[Service]
Type=notify
WorkingDirectory=/var/lib/etcd/
ExecStart=/usr/bin/etcd \
  --name=etcd3 \
  --initial-advertise-peer-urls=http://172.18.1.13:2380 \
  --listen-peer-urls=http://172.18.1.13:2380 \
  --listen-client-urls=http://172.18.1.13:2379,http://127.0.0.1:2379 \
  --advertise-client-urls=http://172.18.1.13:2379 \
  --initial-cluster-token=dns-etcd \
  --initial-cluster=etcd1=http://172.18.1.11:2380,etcd2=http://172.18.1.12:2380,etcd3=http://172.18.1.13:2380  \
  --initial-cluster-state=existing \
  --data-dir=/var/lib/etcd
Restart=always
RestartSec=5
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
EOF
 

三 恢复etcd备份数据

三台集群分别恢复备份数据 
解压数据
tar snap-202011252359.tar.gz

172.18.1.11恢复
ETCDCTL_API=3 etcdctl --name=etcd1 --endpoints="http://172.18.1.11:2379"  --initial-cluster-token=etcd-cluster --initial-advertise-peer-urls=http://172.18.1.11:2380 --initial-cluster=etcd1=http://172.18.1.11:2380,etcd2=http://172.18.1.12:2380,etcd3=http://172.18.1.13:2380 --data-dir=/var/lib/etcd/ snapshot restore  ./snap-202011252359.db

172.18.1.12恢复
ETCDCTL_API=3 etcdctl --name=etcd2 --endpoints="http://172.18.1.12:2379"  --initial-cluster-token=etcd-cluster --initial-advertise-peer-urls=http://172.18.1.12:2380 --initial-cluster=etcd1=http://172.18.1.11:2380,etcd2=http://172.18.1.12:2380,etcd3=http://172.18.1.13:2380 --data-dir=/var/lib/etcd/ snapshot restore  ./snap-202011252359.db

172.18.1.13恢复
ETCDCTL_API=3 etcdctl --name=etcd3 --endpoints="http://172.18.1.13:2379"  --initial-cluster-token=etcd-cluster --initial-advertise-peer-urls=http://172.18.1.13:2380 --initial-cluster=etcd1=http://172.18.1.11:2380,etcd2=http://172.18.1.12:2380,etcd3=http://172.18.1.13:2380 --data-dir=/var/lib/etcd/ snapshot restore  ./snap-202011252359.db

启动etcd集群
systemctl  daemon-reload
systemctl enable etcd
systemctl start etcd

查看数据恢复
ETCDCTL_API=3  etcdctl   --endpoints 172.18.1.11:2379,172.18.1.12:2379,172.18.1.13:2379 get / --prefix

(5)etcd  集群检查
ETCDCTL_API=3  etcdctl   --endpoints 172.18.1.11:2379,172.18.1.12:2379,172.18.1.13:2379 endpoint status  --write-out="table"
+------------------+------------------+---------+---------+-----------+-----------+------------+
|     ENDPOINT     |        ID        | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
| 172.18.1.11:2379 | a665d104822ee7c9 |  3.3.18 |   25 kB |      true |        81 |         25 |
| 172.18.1.12:2379 | f4f9dc017f438b07 |  3.3.18 |   25 kB |     false |        81 |         25 |
| 172.18.1.13:2379 | da1a094d31db677c |  3.3.18 |   25 kB |     false |        81 |         25 |
+------------------+------------------+---------+---------+-----------+-----------+------------+

ETCDCTL_API=3  etcdctl   --endpoints 172.18.1.11:2379,172.18.1.12:2379,172.18.1.13:2379 endpoint health
172.18.1.11:2379 is healthy: successfully committed proposal: took = 2.078969ms
172.18.1.12:2379 is healthy: successfully committed proposal: took = 2.209654ms
172.18.1.13:2379 is healthy: successfully committed proposal: took = 2.469605ms
posted @ 2020-11-26 17:40  dbstack  阅读(497)  评论(0)    收藏  举报