PG-基于patroni高可用方案

部署实施

规划

软件版本规划如下:

  • 操作系统: rhel Linux 7.6
  • 数据库: PostgreSQL 12.2
  • Python: Python 3.8.2
  • Etcd: etcd-v3.3.22
  • Patroni: patroni 1.6.5

部署规划如下:

主机 IP 组件 备注
pg1 192.168.10.190 PostgreSQL、Patroni、Etcd 主节点
pg2 192.168.10.191 PostgreSQL、Patroni、Etcd 备节点1
pg3 192.168.10.192 PostgreSQL、Patroni、Etcd 备节点2

软件地址

#-- patroni
https://github.com/zalando/patroni
https://mirrors.aliyun.com/pypi/simple/patroni/

#-- etcd
https://github.com/etcd-io/etcd/releases/tag/v3.4.10
https://github.com/etcd-io/etcd/releases/tag/v3.3.22

#-- zookeeper 
wget https://archive.apache.org/dist/zookeeper/zookeeper-3.3.6/zookeeper-3.3.6.tar.gz
wget https://archive.apache.org/dist/zookeeper/stable/apache-zookeeper-3.5.8.tar.gz

#-- python
https://www.python.org/downloads
https://mirrors.aliyun.com/pypi/simple/
# 官方源
wget -c https://www.python.org/ftp/python/3.8.2/Python-3.8.2.tar.xz


#--  自签证书工具  cfssl,cfssljson
wget https://pkg.cfssl.org/R1.2/cfssl_linux-amd64
wget https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64
wget https://pkg.cfssl.org/R1.2/cfssl-certinfo_linux-amd64

# postgresql
https://www.postgresql.org/download/product-categories/

环境准备

虽然Patroni支持自动化初始化PostgreSQL数据库并部署流复制,这两块工作建议手工配置。

关闭防火墙

systemctl stop firewalld.service
systemctl disable firewalld.service

安装系统依赖软件包

yum -y install libffi-devel gcc gcc-c++ zlib zlib-devel readline-devel openssl-devel bzip2-devel sqlite-devel xz lzma xz-devel gdbm gdbm-devel tk tk-devel libffi libffi-devel ncurses ncurses-devel

配置EPEL yum源

wget -O /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo

yum -y install jq

配置sudo权限

cat > /etc/sudoers.d/postgres <<-EOF
# postgres ALL=(ALL:ALL) NOPASSWD:ALL
postgres ALL=(ALL:ALL) NOPASSWD:/bin/systemctl stop postgresql12,/bin/systemctl start postgresql12,/bin/systemctl restart postgresql12,/bin/systemctl status postgresql12,/bin/systemctl daemon-reload,/bin/systemctl stop patroni,/bin/systemctl start patroni,/bin/systemctl restart patroni,/bin/systemctl status patroni,/bin/systemctl stop etcd,/bin/systemctl start etcd,/bin/systemctl restart etcd,/bin/systemctl status etcd
EOF

安装python3

# wget -c https://www.python.org/ftp/python/3.8.2/Python-3.8.2.tar.xz
./configure --prefix=/ups/app/postgresql/pgsql-12 --with-perl --with-tcl --with-python --with-openssl --with-pam --without-ldap --with-libxml --with-libxslt --with-systemd
make world
make install-world

配置动态库

cat > /etc/ld.so.conf.d/python3.conf <<-EOF
/ups/app/python3/lib
EOF

# 加载动态库
ldconfig -v | grep python3

# 检查确认
ldd /ups/app/python3/bin/python3

配置国内镜像源

cat > ~/.pip/pip.conf << -EOF
[global]
index-url = https://mirrors.aliyun.com/pypi/simple/

[install]
trusted-host=mirrors.aliyun.com
EOF

安装PG流复制(一主两从)

编译安装软件(所有数据库节点)

tar -xf postgresql-12.2.tar.gz 

mkdir build_dir && cd build_dir
../configure --prefix=/ups/app/postgresql/pgsql-12 --with-perl --with-tcl --with-python --with-openssl --with-pam --with-gssapi --with-icu --without-ldap --with-libxml --with-libxslt --with-systemd

make world
make install-world

配置自启动服务(省略)

# 编辑服务文件
cat > /usr/lib/systemd/system/postgresql12.service <<-EOF
# It's not recommended to modify this file in-place, because it will be
# overwritten during package upgrades.  If you want to customize, the
# best way is to create a file "/etc/systemd/system/postgresql12.service",
# containing
#   .include /usr/lib/systemd/system/postgresql12.service
#   ...make your changes here...
# For more info about custom unit files, see
# http://fedoraproject.org/wiki/Systemd#How_do_I_customize_a_unit_file.2F_add_a_custom_unit_file.3F
 
# Note: changing PGDATA will typically require adjusting SELinux
# configuration as well.
 
# Note: do not use a PGDATA pathname containing spaces, or you will
# break postgresql-setup.
[Unit]
Description=PostgreSQL 12 database server
Documentation=https://www.postgresql.org/docs/12/static/
After=syslog.target
After=network.target
 
[Service]
Type=notify
 
User=postgres
Group=postgres
 
# Note: avoid inserting whitespace in these Environment= lines, or you may
# break postgresql-setup.
 
# Location of database directory
Environment=PGDATA=/ups/data/pgdata/12/pg_root
Environment=PGHOME=/ups/app/postgresql/pgsql-12
 
# Where to send early-startup messages from the server (before the logging
# options of postgresql.conf take effect)
# This is normally controlled by the global default set by systemd
# StandardOutput=syslog
 
# Disable OOM kill on the postmaster
OOMScoreAdjust=-1000
Environment=PG_OOM_ADJUST_FILE=/proc/self/oom_score_adj
Environment=PG_OOM_ADJUST_VALUE=0
 
# ExecStartPre=/ups/app/postgresql/pgsql-12/bin/postgresql-12-check-db-dir \${PGDATA}
ExecStart=/ups/app/postgresql/pgsql-12/bin/postmaster -D \${PGDATA}
ExecReload=/bin/kill -HUP \$MAINPID
KillMode=mixed
KillSignal=SIGINT
  
 
# Do not set any timeout value, so that systemd will not kill postmaster
# during crash recovery.
# 禁用超时逻辑,systemd的默认超时时长是 90 秒
TimeoutSec=0
 
[Install]
WantedBy=multi-user.target
EOF

禁止postgresql自启动,通过patroni来管理postgresql

主库配置

主库初始化

initdb -D ${PGDATA} -U postgres --locale=en_US.UTF8 -E UTF8

配置主库参数配置

vi $PGDATA/postgresql.conf

cat >> $PGDATA/postgresql.conf <<-EOF
listen_addresses = '*'
port = 2020
wal_level = replica
archive_mode = on
#archive_command = 'test ! -f /ups/data/pgdata/12/arch/%f && cp %p /ups/data/pgdata/12/arch/%f'
#restore_command = 'cp /ups/data/pgdata/12/arch/%f %p'
archive_command = '/usr/bin/lz4 -q -z %p /ups/data/pgdata/12/arch/%f.lz4'
restore_command = '/usr/bin/lz4 -d /ups/data/pgdata/12/arch/%f.lz4 %p'
recovery_target_timeline='latest'
max_wal_senders = 10
wal_keep_segments = 64
hot_standby = on
hot_standby_feedback = on
full_page_writes = on
wal_log_hits = on
synchronous_commit = on
synchronous_standby_names = 'ANY 1 (*)'
archive_cleanup_command = 'pg_archivecleanup /ups/data/pgdata/12/arch %r'
primary_slot_name ='pgsql12_pg1'
EOF
检查确认配置文件
grep -Ev '^[[:space:]]|^#|^$' ${PGDATA}/postgresql.conf

sed -r '/^[ \t]*($|#)/d' $PGDATA/postgresql.conf

启动主库

pg_ctl start -D $PGDATA

配置postgres用户密码

psql -U postgres -c "ALTER USER postgres WITH PASSWORD 'postgres';"

创建流复制账号(pg1)

CREATE USER sync
REPLICATION
LOGIN
ENCRYPTED PASSWORD 'sync12345';
-- CONNECTION LIMIT 5

GRANT EXECUTE ON FUNCTION pg_read_binary_file(text) TO sync;

创建复制槽(pg1)patroni

-- 创建复制槽
select * from pg_create_physical_replication_slot('pgsql12_pg1'); 
select * from pg_create_physical_replication_slot('pgsql12_pg2'); 
select * from pg_create_physical_replication_slot('pgsql12_pg3');

-- 查看
select * from pg_replication_slots;

-- 删除复制槽
SELECT * FROM pg_drop_replication_slot('pgsql12_pg1');

配置客户端认证文件(pg_hba.conf) 所有服务器

cat >> ${PGDATA}/pg_hba.conf <<EOF

host	all 		    all             192.168.10.0/24         md5
host    replication     sync            127.0.0.1/32            md5
host    replication     sync            192.168.10.190/24       md5
host    replication     sync            192.168.10.191/24       md5
host    replication     sync            192.168.10.192/24       md5
EOF

pg_basebackup同步数据时,自动部署到其它节点从库

配置用户口令文件($HOME/.pgpass所有服务器

touch ~/.pgpass
chmod 0600 ~/.pgpass
cat > ~/.pgpass <<EOF
192.168.10.190:2020:replication:sync:sync12345
192.168.10.191:2020:replication:sync:sync12345
192.168.10.192:2020:replication:sync:sync12345
EOF

cat <<-EOF >~/.pgpass
192.168.10.190:2020:replication:sync:sync12345
192.168.10.191:2020:replication:sync:sync12345
192.168.10.192:2020:replication:sync:sync12345
EOF

从库配置

备库初始化

# pg2
pg_basebackup -h 192.168.10.190 -p 2020 -U sync -D ${PGDATA} -w -Fp -Xs -Pv -R 

# pg3
pg_basebackup -h 192.168.10.190 -p 2020 -U sync -D ${PGDATA} -w -Fp -Xs -Pv -R
  • -R, --write-recovery-conf :write configuration for replication
    • 自动创建$PGDATA/standby.signal 标识文件,且该文件内容为空
    • 自动在$PGDATA/postgresql.auto.conf 文件中添加 primary_conninfo 参数信息

修改备库参数

在postgre.auto.conf 添加 application_name =slave1

primary_conninfo = 'application_name=pgsql12_pg2 user=sync passfile=''/home/postgres/.pgpass'' host=192.168.10.190 port=2020 sslmode=prefer sslcompression=0 gssencmode=prefer krbsrvname=postgres target_session_attrs=any'
primary_slot_name ='pgsql12_pg2'

启动备库

pg_ctl start -D $PGDATA

检查同步状态

SELECT usename , application_name , client_addr, sync_state FROM pg_stat_replication;

-- 结果是f(false)则为主库,t(true)为备库
SELECT pg_is_in_recovery();

-- 检查复制情况
SELECT pid, usename, client_addr, state, write_lag, flush_lag, replay_lag FROM pg_stat_replication;

自签证书配置

背景

互联网的通信安全,建立在 SSL/TLS 协议之上。不使用 SSL/TLS 的 HTTP 通信,就是不加密的通信。

  • SSL (Secure Socket Layer):为Netscape所研发,用以保障在Internet上数据传输之安全,利用数据加密(Encryption)技术,可确保数据在网络上之传输过程中不会被截取。目前一般通用之规格为40 bit之安全标准,美国则已推出128 bit之更高安全标准,但限制出境。只要3.0版本以上之I.E.或Netscape浏览器即可支持SSL。
  • 安全传输层协议(TLS)用于在两个通信应用程序之间提供保密性和数据完整性。该协议由两层组成:TLS 记录协议(TLS Record)和 TLS 握手协议(TLS Handshake)。较低的层为 TLS 记录协议,位于某个可靠的传输协议(例如 TCP)上面。

安装 CFSSL相关软件

export WORK_DIR=/ups/app/cfssl
mkdir -p ${WORK_DIR} && cd ${WORK_DIR}

wget https://pkg.cfssl.org/R1.2/cfssl_linux-amd64 -O ${WORK_DIR}/cfssl
wget https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64 -O ${WORK_DIR}/cfssljson
wget https://pkg.cfssl.org/R1.2/cfssl-certinfo_linux-amd64 -O ${WORK_DIR}/cfssl-certinfo

export WORK_DIR=/ups/app/cfssl
for value in $(ls -tr .); do
    tmp=$(echo ${value} |awk -F"_" '{print $1}')
    mv ${value} ${tmp}
    chmod +x ${tmp}
    ln -s ${WORK_DIR}/${tmp} /usr/local/bin/${tmp}
done

验证 cfssl 的版本为 1.2或更高

查看版本

[root@pg1 cfssl]# cfssl version
Version: 1.2.0
Revision: dev
Runtime: go1.6
[root@pg1 cfssl]#

工具命令语法

cfssl
Usage:
Available commands:
	selfsign
	print-defaults
	revoke
	certinfo
	serve
	version
	gencert
	gencrl
	info
	bundle
	sign
	genkey
	ocspsign
	ocspserve
	ocspdump
	ocsprefresh
	scan
Top-level flags:
  -allow_verification_with_non_compliant_keys
    	Allow a SignatureVerifier to use keys which are technically non-compliant with RFC6962.
  -loglevel int
    	Log level (0 = DEBUG, 5 = FATAL) (default 1)
cfssljson
Usage of cfssljson:
  -bare
    	the response from CFSSL is not wrapped in the API standard response
  -f string
    	JSON input (default "-")
  -stdout
    	output the response instead of saving to a file

证书配置

创建 CA(Certificate Authority) 证书

先用 cfssl 命令生成包含默认配置的 ca-config.jsonca-csr.json 文件

创建 CA 配置文件(ca-config.json)
cd /ups/app/etcd/ssl
cfssl print-defaults config > ca-config.json
cfssl print-defaults csr > ca-csr.json

然后分别修改这两个文件为如下内容

ca-config.json

{
    "signing": {
        "default": {
            "expiry": "87600h"
        },
        "profiles": {
            "server": {
                "expiry": "87600h",
                "usages": [
                    "signing",
                    "key encipherment",
                    "server auth",
                    "client auth"
                ]
            },
            "client": {
                "expiry": "87600h",
                "usages": [
                    "signing",
                    "key encipherment",
                    "client auth"
                ]
            }
        }
    }
}
  • ca-config.json:

    可以定义多个 profiles,分别指定不同的过期时间、使用场景等参数;后续在签名证书时使用某个 profile;

  • signing:

    表示该证书可用于签名其它证书;生成的 ca.pem 证书中 CA=TRUE;

  • server auth:

    表示 Client 可以用该 CA 对 Server 提供的证书进行验证;

  • client auth:

    表示 Server 可以用该 CA 对 Client 提供的证书进行验证;

创建 CA 证书签名请求(ca-csr.json)

ca-csr.json

{
  "CN": "etcd",
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "names": [
    {
      "C": "CN",
      "L": "GZ",
      "ST": "GD"
    }
  ]
}
  • CN(Common Name):

    • kube-apiserver 从证书中提取该字段作为请求的用户名(User Name);浏览器使用该字段验证网站是否合法。一般写都是域名
  • ST(State):

    • 州、省
  • L(Locality):

    • 地区、城市
  • O(Organization Name):

    • 组织名称,公司名称。kube-apiserver 从证书中提取该字段作为请求用户所属的组(Group)
  • OU(Organization Unit Name):

    • 组织单位名称,公司部门名称

前面2步可以通过以下方式创建CA证书

cat > /ups/app/etcd/ssl/ca-config.json <<-EOF
{
    "signing": {
        "default": {
            "expiry": "87600h"
        },
        "profiles": {
            "server": {
                "expiry": "87600h",
                "usages": [
                    "signing",
                    "key encipherment",
                    "server auth",
                    "client auth"
                ]
            },
            "client": {
                "expiry": "87600h",
                "usages": [
                    "signing",
                    "key encipherment",
                    "client auth"
                ]
            }
        }
    }
}
EOF

cat > /ups/app/etcd/ssl/ca-csr.json <<-EOF
{
    "CN": "etcd",
    "key": {
        "algo": "rsa",
        "size": 2048
    },
    "names": [
        {
            "C": "CN",
            "ST": "GD",
            "L": "GZ"
        }
    ]
}
EOF

生成 CA 证书和私钥

cd /ups/app/etcd/ssl
cfssl gencert -initca ca-csr.json | cfssljson -bare ca

[root@pg1 ssl]# cfssl gencert -initca ca-csr.json | cfssljson -bare ca
2020/07/29 20:01:32 [INFO] generating a new CA key and certificate from CSR
2020/07/29 20:01:32 [INFO] generate received request
2020/07/29 20:01:32 [INFO] received CSR
2020/07/29 20:01:32 [INFO] generating key: rsa-2048
2020/07/29 20:01:32 [INFO] encoded CSR
2020/07/29 20:01:32 [INFO] signed certificate with serial number 51387564996842455704424109245687617617608202805
[root@pg1 ssl]#

CA 有关证书列表如下:

[root@pg1 ssl]# tree
.
├── ca.csr
├── ca-key.pem
├── ca.pem
├── ca-config.json
└── ca-csr.json

创建 etcd 的TLS认证证书,生成 etcd 证书和私钥

/ups/app/etcd/ssl 下添加文件 etcd-csr.json,内容如下

cat > /ups/app/etcd/ssl/etcd-csr.json <<-EOF
{
    "CN": "etcd",
    "hosts": [
        "127.0.0.1",
        "192.168.10.190",
        "192.168.10.191",
        "192.168.10.192"
    ],
    "key": {
        "algo": "rsa",
        "size": 2048
    },
    "names": [
        {
            "C": "CN",
            "ST": "GD",
            "L": "GZ"
        }
    ]
}
EOF
生成 etcd server证书
cd /ups/app/etcd/ssl
cfssl gencert -ca=/ups/app/etcd/ssl/ca.pem \
-ca-key=/ups/app/etcd/ssl/ca-key.pem \
-config=/ups/app/etcd/ssl/ca-config.json \
-profile=server /ups/app/etcd/ssl/etcd-csr.json | cfssljson -bare etcd

# 或
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=server etcd-csr.json | cfssljson -bare etcd

etcd 有关证书证书列表如下

[root@pg1 ssl]# cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=etcd etcd-csr.json | cfssljson -bare etcd
2020/07/29 20:02:11 [INFO] generate received request
2020/07/29 20:02:11 [INFO] received CSR
2020/07/29 20:02:11 [INFO] generating key: rsa-2048
2020/07/29 20:02:11 [INFO] encoded CSR
2020/07/29 20:02:11 [INFO] signed certificate with serial number 624986965983467912700213173636453978413637921699
2020/07/29 20:02:11 [WARNING] This certificate lacks a "hosts" field. This makes it unsuitable for
websites. For more information see the Baseline Requirements for the Issuance and Management
of Publicly-Trusted Certificates, v.1.1.6, from the CA/Browser Forum (https://cabforum.org);
specifically, section 10.2.3 ("Information Requirements").
[root@pg1 ssl]# ls
ca-config.json  ca.csr  ca-csr.json  ca-key.pem  ca.pem  etcd.csr  etcd-csr.json  etcd-key.pem  etcd.pem
[root@pg1 ssl]#

证书分发到其它所有节点

# 给证书读权限
chmod 644 /ups/app/etcd/ssl/*

cd /ups/app/etcd/ssl/
for IP in pg2 pg3;do
    scp ca-key.pem ca.pem etcd.pem etcd-key.pem $IP:/ups/app/etcd/ssl/
done

DCS软件安装

DCS主要有etcd,zookeeper,consul

Etcd安装

三台主机上下载并安装ETCD,如下:

# wget -c https://github.com/etcd-io/etcd/releases/download/v3.4.7/etcd-v3.4.7-linux-amd64.tar.gz

tar -xf etcd-v3.3.22-linux-amd64.tar.gz -C /ups/app/
cd /ups/app
mv etcd-v3.3.22-linux-amd64/ etcd
chown -R root:root etcd
mkdir -p ./{bin,cfg,ssl,log,data} && mv etcd etcdctl ./bin/

# 可选
useradd etcd -M -d /ups/app/etcd -c "Etcd user" -r -s /sbin/nologin

编辑配置文件

注意是否启动自签证书+SSL+HTTPS组合配置,若未启动SSL,则修改对应http协议

节点1
  • 常规(不适用SSL)
cat > /ups/app/etcd/cfg/etcd.conf <<-EOF
#[Member]
#etcd实例名称
ETCD_NAME="etcd01"
#etcd数据保存目录
ETCD_DATA_DIR="/ups/app/etcd/data/default.etcd"
#集群内部通信使用的URL
ETCD_LISTEN_PEER_URLS="http://192.168.10.190:2380"
#供外部客户端使用的URL
ETCD_LISTEN_CLIENT_URLS="http://192.168.10.190:2379,http://127.0.0.1:2379"

#[Clustering]
#广播给集群内其他成员访问的URL
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://192.168.10.190:2380"
#广播给外部客户端使用的URL
ETCD_ADVERTISE_CLIENT_URLS="http://192.168.10.190:2379"
#初始集群成员列表
ETCD_INITIAL_CLUSTER="etcd01=http://192.168.10.190:2380,etcd02=http://192.168.10.191:2380,etcd03=http://192.168.10.192:2380"
#集群的名称
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster-pg"
#初始集群状态,new表示新建集群| existing表示加入已有集群
ETCD_INITIAL_CLUSTER_STATE="new"
# 使用3.4.X版本时配置
# ETCD_ENABLE_V2="true"
EOF
  • 使用SSL证书
cat > /ups/app/etcd/cfg/etcd.conf <<-EOF
#[Member]
#etcd实例名称,可以随意设置不重复值
ETCD_NAME="etcd01"
#etcd数据保存目录
ETCD_DATA_DIR="/ups/app/etcd/data/default.etcd"
#集群内部通信使用的URL
ETCD_LISTEN_PEER_URLS="https://192.168.10.190:2380"
#供外部客户端使用的URL
ETCD_LISTEN_CLIENT_URLS="https://192.168.10.190:2379,http://127.0.0.1:2379"

#[Clustering]
#广播给集群内其他成员访问的URL
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://192.168.10.190:2380"
#广播给外部客户端使用的URL
ETCD_ADVERTISE_CLIENT_URLS="https://192.168.10.190:2379"
#初始集群成员列表
ETCD_INITIAL_CLUSTER="etcd01=https://192.168.10.190:2380,etcd02=https://192.168.10.191:2380,etcd03=https://192.168.10.192:2380"
#集群的名称
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster-pg"
#初始集群状态,new表示新建集群| existing表示加入已有集群
ETCD_INITIAL_CLUSTER_STATE="new"
# 使用3.4.X版本时配置
# ETCD_ENABLE_V2="true"

#[Security]
ETCD_CERT_FILE="/ups/app/etcd/ssl/etcd.pem"
ETCD_KEY_FILE="/ups/app/etcd/ssl/etcd-key.pem"
ETCD_TRUSTED_CA_FILE="/ups/app/etcd/ssl/ca.pem"
ETCD_CLIENT_CERT_AUTH="true"
ETCD_PEER_CERT_FILE="/ups/app/etcd/ssl/etcd.pem"
ETCD_PEER_KEY_FILE="/ups/app/etcd/ssl/etcd-key.pem"
ETCD_PEER_TRUSTED_CA_FILE="/ups/app/etcd/ssl/ca.pem"
ETCD_PEER_CLIENT_CERT_AUTH="true"
ETCD_PEER_AUTO_TLS="true"
ETCD_AUTO_TLS="true"
EOF

配置说明:

  • ETCD_NAME:

    • etcd 集群中的节点名,这里可以随意,可区分且不重复就行。
  • ETCD_LISTEN_PEER_URLS:

    • 监听的用于节点之间通信的 URL,可监听多个,集群内部将通过这些 URL 进行数据交互(如选举、数据同步等)。
  • ETCD_LISTEN_CLIENT_URLS:

    • 监听的用于客户端通信的 URL,同样可以监听多个。
  • ETCD_ADVERTISE_CLIENT_URLS:

    • 建议使用的客户端通信 URL,该值用于 etcd 代理或 etcd 成员与 etcd 节点通信。
  • ETCD_INITIAL_ADVERTISE_PEER_URLS:

    • 建议用于节点之间通信的 URL,节点间将以该值进行通信。
  • ETCD_INITIAL_CLUSTER:

    • 也就是集群中所有的 initial--advertise-peer-urls 的合集。
  • ETCD_INITIAL_CLUSTER_STATE:

    • 新建集群的标志。
  • ETCD_INITIAL_CLUSTER_TOKEN:

    • 节点的 token 值,设置该值后集群将生成唯一 ID,并为每个节点也生成唯一 ID,当使用相同配置文件再启动一个集群时,只要该 token 值不一样,etcd 集群就不会相互影响。
节点2
cat > /ups/app/etcd/cfg/etcd.conf <<-EOF
#[Member]
#etcd实例名称
ETCD_NAME="etcd02"
#etcd数据保存目录
ETCD_DATA_DIR="/ups/app/etcd/data/default.etcd"
#集群内部通信使用的URL
ETCD_LISTEN_PEER_URLS="http://192.168.10.191:2380"
#供外部客户端使用的URL
ETCD_LISTEN_CLIENT_URLS="http://192.168.10.191:2379,http://127.0.0.1:2379"

#[Clustering]
#广播给集群内其他成员访问的URL
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://192.168.10.191:2380"
#广播给外部客户端使用的URL
ETCD_ADVERTISE_CLIENT_URLS="http://192.168.10.191:2379"
#初始集群成员列表
ETCD_INITIAL_CLUSTER="etcd01=http://192.168.10.190:2380,etcd02=http://192.168.10.191:2380,etcd03=http://192.168.10.192:2380"
#集群的名称
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster-pg"
#初始集群状态,new表示新建集群| existing表示加入已有集群
ETCD_INITIAL_CLUSTER_STATE="new"
EOF

配置etcd启动文件

etcd-V3.3.X版本
  • 不适用证书
cat > /usr/lib/systemd/system/etcd.service <<-EOF
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target

[Service]
Type=notify
#User=etcd
WorkingDirectory=/ups/app/etcd/
EnvironmentFile=-/ups/app/etcd/cfg/etcd.conf
ExecStart=/ups/app/etcd/bin/etcd \
--name=\${ETCD_NAME} \
--data-dir=\${ETCD_DATA_DIR} \
--listen-peer-urls=\${ETCD_LISTEN_PEER_URLS} \
--listen-client-urls=\${ETCD_LISTEN_CLIENT_URLS} \
--advertise-client-urls=\${ETCD_ADVERTISE_CLIENT_URLS} \
--initial-advertise-peer-urls=\${ETCD_INITIAL_ADVERTISE_PEER_URLS} \
--initial-cluster=\${ETCD_INITIAL_CLUSTER} \
--initial-cluster-token=\${ETCD_INITIAL_CLUSTER_TOKEN} \
--initial-cluster-state=\${ETCD_INITIAL_CLUSTER_STATE} \
--log-output=stderr
Restart=on-failure
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
EOF
  • 使用证书
cat > /usr/lib/systemd/system/etcd.service <<-EOF
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target

[Service]
Type=notify
WorkingDirectory=/ups/app/etcd/
EnvironmentFile=-/ups/app/etcd/cfg/etcd.conf
ExecStart=/bin/bash -c "GOMAXPROCS=$(nproc) /ups/app/etcd/bin/etcd \
--name=\${ETCD_NAME} \
--data-dir=\${ETCD_DATA_DIR} \
--listen-peer-urls=\${ETCD_LISTEN_PEER_URLS} \
--listen-client-urls=\${ETCD_LISTEN_CLIENT_URLS} \
--advertise-client-urls=\${ETCD_ADVERTISE_CLIENT_URLS} \
--initial-advertise-peer-urls=\${ETCD_INITIAL_ADVERTISE_PEER_URLS} \
--initial-cluster=\${ETCD_INITIAL_CLUSTER} \
--initial-cluster-token=\${ETCD_INITIAL_CLUSTER_TOKEN} \
--initial-cluster-state=\${ETCD_INITIAL_CLUSTER_STATE} \
--auto-tls=\${ETCD_AUTO_TLS} \
--cert-file=\${ETCD_CERT_FILE} \
--key-file=\${ETCD_KEY_FILE} \
--peer-cert-file=\${ETCD_PEER_CERT_FILE} \
--peer-key-file=\${ETCD_PEER_KEY_FILE} \
--trusted-ca-file=\${ETCD_TRUSTED_CA_FILE} \
--client-cert-auth=\${ETCD_CLIENT_CERT_AUTH} \
--peer-client-cert-auth=\${ETCD_PEER_CLIENT_CERT_AUTH} \
--peer-trusted-ca-file=\${ETCD_PEER_TRUSTED_CA_FILE} \
--peer-auto-tls=\${ETCD_PEER_AUTO_TLS} "
Restart=on-failure
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
EOF
etcd-V3.4.X版本
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target

[Service]
Type=notify
# User=etcd
WorkingDirectory=/ups/app/etcd/
EnvironmentFile=-/ups/app/etcd/cfg/etcd.conf
ExecStart=/ups/app/etcd/bin/etcd --log-output=stderr
Restart=on-failure
RestartSec=10
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target

在3.4.X的版本中,不能如下图同时配置参数文件和启动带选项

image-20200729134216292

启动

服务方式启动etcd
systemctl daemon-reload
systemctl enable etcd
systemctl restart etcd

systemctl daemon-reload && systemctl restart etcd && systemctl status etcd
脚本方式启动etcd

pg1主机创建 main.sh 启动脚本,如下:

etcd --name etcd01 \
     --initial-advertise-peer-urls http://192.168.10.190:2380 \
     --listen-peer-urls http://192.168.10.190:2380 \
     --listen-client-urls http://192.168.10.190:2379,http://127.0.0.1:2379 \
     --advertise-client-urls http://192.168.10.190:2379 \
     --initial-cluster-token etcd-cluster-pg \
     --initial-cluster etcd01=http://192.168.10.190:2380,etcd02=http://192.168.10.191:2380,etcd03=http://192.168.10.192:2380 \
     --initial-cluster-state new \
     --enable-v2

pg2 主机创建 main.sh启动脚本,如下:

cd /ups/app/etcd
./etcd --name etcd02 \
     --initial-advertise-peer-urls http://192.168.10.191:2380 \
     --listen-peer-urls http://192.168.10.191:2380 \
     --listen-client-urls http://192.168.10.191:2379,http://127.0.0.1:2379 \
     --advertise-client-urls http://192.168.10.191:2379 \
     --initial-cluster-token etcd-cluster-pg \
     --initial-cluster etcd01=http://192.168.10.190:2380,etcd02=http://192.168.10.191:2380,etcd03=http://192.168.10.192:2380 \
     --initial-cluster-state existing \
     --enable-v2

pg3 主机创建 main.sh 启动脚本,如下:

cd /ups/app/etcd
./etcd --name etcd03 \
     --initial-advertise-peer-urls http://192.168.10.192:2380 \
     --listen-peer-urls http://192.168.10.192:2380 \
     --listen-client-urls http://192.168.10.192:2379,http://127.0.0.1:2379 \
     --advertise-client-urls http://192.168.10.192:2379 \
     --initial-cluster-token etcd-cluster-pg \
     --initial-cluster etcd01=http://192.168.10.190:2380,etcd02=http://192.168.10.191:2380,etcd03=http://192.168.10.192:2380 \
     --initial-cluster-state new \
     --enable-v2

使用证书

etcd --name etcd01 \
     --initial-advertise-peer-urls https://192.168.10.190:2380 \
     --listen-peer-urls https://192.168.10.190:2380 \
     --listen-client-urls https://192.168.10.190:2379,http://127.0.0.1:2379 \
     --advertise-client-urls https://192.168.10.190:2379 \
     --initial-cluster-token etcd-cluster-pg \
     --initial-cluster etcd01=https://192.168.10.190:2380,etcd02=https://192.168.10.191:2380,etcd03=https://192.168.10.192:2380 \
     --initial-cluster-state new \
     --cert-file=/ups/app/etcd/ssl/etcd.pem \
     --key-file=/ups/app/etcd/ssl/etcd-key.pem \
     --peer-cert-file=/ups/app/etcd/ssl/etcd.pem \
     --peer-key-file=/ups/app/etcd/ssl/etcd-key.pem \
     --trusted-ca-file=/ups/app/etcd/ssl/ca.pem \
     --peer-trusted-ca-file=/ups/app/etcd/ssl/ca.pem 


etcd --name etcd02 \
     --initial-advertise-peer-urls https://192.168.10.191:2380 \
     --listen-peer-urls https://192.168.10.191:2380 \
     --listen-client-urls https://192.168.10.191:2379,http://127.0.0.1:2379 \
     --advertise-client-urls https://192.168.10.191:2379 \
     --initial-cluster-token etcd-cluster-pg \
     --initial-cluster etcd01=https://192.168.10.190:2380,etcd02=https://192.168.10.191:2380,etcd03=https://192.168.10.192:2380 \
     --initial-cluster-state new \
     --cert-file=/ups/app/etcd/ssl/etcd.pem \
     --key-file=/ups/app/etcd/ssl/etcd-key.pem \
     --peer-cert-file=/ups/app/etcd/ssl/etcd.pem \
     --peer-key-file=/ups/app/etcd/ssl/etcd-key.pem \
     --trusted-ca-file=/ups/app/etcd/ssl/ca.pem \
     --peer-trusted-ca-file=/ups/app/etcd/ssl/ca.pem 

etcd --name etcd03 \
     --initial-advertise-peer-urls https://192.168.10.192:2380 \
     --listen-peer-urls https://192.168.10.192:2380 \
     --listen-client-urls https://192.168.10.192:2379,http://127.0.0.1:2379 \
     --advertise-client-urls https://192.168.10.192:2379 \
     --initial-cluster-token etcd-cluster-pg \
     --initial-cluster etcd01=https://192.168.10.190:2380,etcd02=https://192.168.10.191:2380,etcd03=https://192.168.10.192:2380 \
     --initial-cluster-state new \
     --cert-file=/ups/app/etcd/ssl/etcd.pem \
     --key-file=/ups/app/etcd/ssl/etcd-key.pem \
     --peer-cert-file=/ups/app/etcd/ssl/etcd.pem \
     --peer-key-file=/ups/app/etcd/ssl/etcd-key.pem \
     --trusted-ca-file=/ups/app/etcd/ssl/ca.pem \
     --peer-trusted-ca-file=/ups/app/etcd/ssl/ca.pem 

启动etcd,如下:

# cat start.sh
#!/bin/sh

cd /ups/app/etcd
sh ./main.sh > etcd.log 2>&1 &

# 执行启动脚本
sh start.sh

验证

# 查看成员
etcdctl \
--ca-file=/ups/app/etcd/ssl/ca.pem \
--cert-file=/ups/app/etcd/ssl/etcd.pem \
--key-file=/ups/app/etcd/ssl/etcd-key.pem \
--endpoints=https://192.168.10.190:2379  member list

# 查看集群状态
etcdctl \
  --endpoints=https://192.168.10.190:2379 \
  --cert-file=/ups/app/etcd/ssl/etcd.pem \
  --ca-file=/ups/app/etcd/ssl/ca.pem \
  --key-file=/ups/app/etcd/ssl/etcd-key.pem \
  cluster-health

ETCDCTL_API=3 etcdctl --endpoints=http://192.168.10.190:2379,http://192.168.10.191:2379,http://192.168.10.192:2379 endpoint health

ETCDCTL_API=2  etcdctl --endpoints "http://192.168.10.190:2379,http://192.168.10.191:2379,http://192.168.10.192:2379" member list

# -- etcd3.4
/ups/app/etcd/bin/etcd --version
/ups/app/etcd/bin/etcdctl endpoint health
/ups/app/etcd/bin/etcdctl endpoint status
/ups/app/etcd/bin/etcdctl member list

image-20200801213547803

etcdctl管理工具

etcdctl 是一个命令行客户端,它能提供一些简洁的命令,供用户直接跟etcd服务打交道,而无需基于 HTTP API 方式。

各个版本不同命令选项

export ETCDCTL_API=2
export ETCDCTL_API=3
语法
AME:
   etcdctl - A simple command line client for etcd.

WARNING:
   Environment variable ETCDCTL_API is not set; defaults to etcdctl v2.
   Set environment variable ETCDCTL_API=3 to use v3 API or ETCDCTL_API=2 to use v2 API.

USAGE:
   etcdctl [global options] command [command options] [arguments...]
   
VERSION:
   3.3.22
   
COMMANDS:
     backup          backup an etcd directory
     cluster-health  check the health of the etcd cluster
     mk              make a new key with a given value
     mkdir           make a new directory
     rm              remove a key or a directory
     rmdir           removes the key if it is an empty directory or a key-value pair
     get             retrieve the value of a key
     ls              retrieve a directory
     set             set the value of a key
     setdir          create a new directory or update an existing directory TTL
     update          update an existing key with a given value
     updatedir       update an existing directory
     watch           watch a key for changes
     exec-watch      watch a key for changes and exec an executable
     member          member add, remove and list subcommands
     user            user add, grant and revoke subcommands
     role            role add, grant and revoke subcommands
     auth            overall auth controls
     help, h         Shows a list of commands or help for one command

GLOBAL OPTIONS:
   --debug                          output cURL commands which can be used to reproduce the request
   --no-sync                        don't synchronize cluster information before sending request
   --output simple, -o simple       output response in the given format (simple, `extended` or `json`) (default: "simple")
   --discovery-srv value, -D value  domain name to query for SRV records describing cluster endpoints
   --insecure-discovery             accept insecure SRV records describing cluster endpoints
   --peers value, -C value          DEPRECATED - "--endpoints" should be used instead
   --endpoint value                 DEPRECATED - "--endpoints" should be used instead
   --endpoints value                a comma-delimited list of machine addresses in the cluster (default: "http://127.0.0.1:2379,http://127.0.0.1:4001")
   --cert-file value                identify HTTPS client using this SSL certificate file
   --key-file value                 identify HTTPS client using this SSL key file
   --ca-file value                  verify certificates of HTTPS-enabled servers using this CA bundle
   --username value, -u value       provide username[:password] and prompt if password is not supplied.
   --timeout value                  connection timeout per request (default: 2s)
   --total-timeout value            timeout for the command execution (except watch) (default: 5s)
   --help, -h                       show help
   --version, -v                    print the version
   


[root@pg3 ~]# etcdctl ls -h
NAME:
   etcdctl ls - retrieve a directory

USAGE:
   etcdctl ls [command options] [key]

OPTIONS:
   --sort           returns result in sorted order
   --recursive, -r  returns all key names recursively for the given path
   -p               append slash (/) to directories
   --quorum, -q     require quorum for get request
   
[root@pg3 ~]#


export ETCDCTL_API=3
[root@pg1 ~]# etcdctl --help
NAME:
	etcdctl - A simple command line client for etcd3.

USAGE:
	etcdctl

VERSION:
	3.3.22

API VERSION:
	3.3


COMMANDS:
	get			Gets the key or a range of keys
	put			Puts the given key into the store
	del			Removes the specified key or range of keys [key, range_end)
	txn			Txn processes all the requests in one transaction
	compaction		Compacts the event history in etcd
	alarm disarm		Disarms all alarms
	alarm list		Lists all alarms
	defrag			Defragments the storage of the etcd members with given endpoints
	endpoint health		Checks the healthiness of endpoints specified in `--endpoints` flag
	endpoint status		Prints out the status of endpoints specified in `--endpoints` flag
	endpoint hashkv		Prints the KV history hash for each endpoint in --endpoints
	move-leader		Transfers leadership to another etcd cluster member.
	watch			Watches events stream on keys or prefixes
	version			Prints the version of etcdctl
	lease grant		Creates leases
	lease revoke		Revokes leases
	lease timetolive	Get lease information
	lease list		List all active leases
	lease keep-alive	Keeps leases alive (renew)
	member add		Adds a member into the cluster
	member remove		Removes a member from the cluster
	member update		Updates a member in the cluster
	member list		Lists all members in the cluster
	snapshot save		Stores an etcd node backend snapshot to a given file
	snapshot restore	Restores an etcd member snapshot to an etcd directory
	snapshot status		Gets backend snapshot status of a given file
	make-mirror		Makes a mirror at the destination etcd cluster
	migrate			Migrates keys in a v2 store to a mvcc store
	lock			Acquires a named lock
	elect			Observes and participates in leader election
	auth enable		Enables authentication
	auth disable		Disables authentication
	user add		Adds a new user
	user delete		Deletes a user
	user get		Gets detailed information of a user
	user list		Lists all users
	user passwd		Changes password of user
	user grant-role		Grants a role to a user
	user revoke-role	Revokes a role from a user
	role add		Adds a new role
	role delete		Deletes a role
	role get		Gets detailed information of a role
	role list		Lists all roles
	role grant-permission	Grants a key to a role
	role revoke-permission	Revokes a key from a role
	check perf		Check the performance of the etcd cluster
	help			Help about any command

OPTIONS:
      --cacert=""				verify certificates of TLS-enabled secure servers using this CA bundle
      --cert=""					identify secure client using this TLS certificate file
      --command-timeout=5s			timeout for short running command (excluding dial timeout)
      --debug[=false]				enable client-side debug logging
      --dial-timeout=2s				dial timeout for client connections
  -d, --discovery-srv=""			domain name to query for SRV records describing cluster endpoints
      --endpoints=[127.0.0.1:2379]		gRPC endpoints
      --hex[=false]				print byte strings as hex encoded strings
      --insecure-discovery[=true]		accept insecure SRV records describing cluster endpoints
      --insecure-skip-tls-verify[=false]	skip server certificate verification
      --insecure-transport[=true]		disable transport security for client connections
      --keepalive-time=2s			keepalive time for client connections
      --keepalive-timeout=6s			keepalive timeout for client connections
      --key=""					identify secure client using this TLS key file
      --user=""					username[:password] for authentication (prompt if password is not supplied)
  -w, --write-out="simple"			set the output format (fields, json, protobuf, simple, table)

[root@pg1 ~]# 

示例
# 查看所有数据
etcdctl ls -recursive
etcdctl --endpoints=http://127.0.0.1:2379 ls -recursive

# 删除 key名称 /service

root@pg1 ~]# etcdctl ls -recursive
/service
/service/etcd-cluster-pg
/service/etcd-cluster-pg/config
/service/etcd-cluster-pg/optime
/service/etcd-cluster-pg/optime/leader
/service/etcd-cluster-pg/history
/service/etcd-cluster-pg/members
/service/etcd-cluster-pg/initialize
[root@pg1 ~]# etcdctl rm /service
Error:  102: Not a file (/service) [6680]
[root@pg1 ~]# etcdctl rm -recursive /service
[root@pg1 ~]# etcdctl ls -recursive
[root@pg1 ~]# 

zookeeper安装

安装配置ZK

# 安装zookeeper
tar -xf apache-zookeeper-3.6.0-bin.tar.gz -C /ups/app/zookeeper/
cd zookeeper-3.6.0
cp conf/zoo_sample.cfg conf/zoo.cfg
编辑配置文件
# vi conf/zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial 
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between 
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
dataDir=/ups/app/zookeeper/data
dataLogDir=/ups/app/zookeeper/log
# the port at which the clients will connect
clientPort=2181
# 配置Zookeeper集群信息
# server.[服务器编号]=[服务器地址]:[LF通信端口]:[选举端口]
# 服务器编号:必须与data/myid文件中的id一致
# LF通信端口: 服务器与集群中的leader交换信息的端口,一般选用相同的端口
# 选举端口: 选举新leader时服务器间相互通信的端口,一般选用相同的端口
server.1=192.168.10.190:2888:3888
server.2=192.168.10.191:2888:3888
server.3=192.168.10.192:2888:3888
配置创建mydi文件

3台服务器分别设置myid

# 192.168.10.190
echo "1" > data/myid

# 192.168.10.191
echo "2" > data/myid

# 192.168.10.192
echo "3" > data/myid

启动

# 依次启动服务
nohup sh ./bin/zkServer.sh start >/dev/null 2>&1 &

验证

# 检查状态
sh ./bin/zkServer.sh status

# 测试连接
./bin/zkCli.sh -server localhost:2181

Patroni 安装

export LD_LIBRARY_PATH=/ups/app/python3/lib:${LD_LIBRARY_PATH}
cd /ups/app/python3/bin

./python3 -m pip install --upgrade setuptools
./python3 -m pip install --upgrade pip
./python3 -m pip install psycopg2_binary
./python3 -m pip install patroni[etcd,consul]
# -- 或者
/ups/app/python3/bin/pip3 install psycopg2-binary -i https://mirrors.aliyun.com/pypi/simple/
/ups/app/python3/bin/pip3 install patroni -i https://mirrors.aliyun.com/pypi/simple/
# OR
/ups/app/python3/bin/pip3 install patroni[etcd,consul,zookeeper] -i https://mirrors.aliyun.com/pypi/simple/

过程

[root@pg1 bin]# cd /ups/app/python3/bin
[root@pg1 bin]# ./pip3 install patroni[etcd] -i https://mirrors.aliyun.com/pypi/simple/
Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
Collecting patroni[etcd]
  Downloading https://mirrors.aliyun.com/pypi/packages/25/01/e4656c541ac648a530fc1b6094324969f9f2ed8d7005ad0fa2598cbf1199/patroni-1.6.5-py3-none-any.whl (178kB)
     |████████████████████████████████| 184kB 425kB/s 
Collecting psutil>=2.0.0 (from patroni[etcd])
  Downloading https://mirrors.aliyun.com/pypi/packages/aa/3e/d18f2c04cf2b528e18515999b0c8e698c136db78f62df34eee89cee205f1/psutil-5.7.2.tar.gz (460kB)
     |████████████████████████████████| 460kB 1.4MB/s 
Collecting six>=1.7 (from patroni[etcd])
  Downloading https://mirrors.aliyun.com/pypi/packages/ee/ff/48bde5c0f013094d729fe4b0316ba2a24774b3ff1c52d924a8a4cb04078a/six-1.15.0-py2.py3-none-any.whl
Collecting prettytable>=0.7 (from patroni[etcd])
  Downloading https://mirrors.aliyun.com/pypi/packages/ef/30/4b0746848746ed5941f052479e7c23d2b56d174b82f4fd34a25e389831f5/prettytable-0.7.2.tar.bz2
Collecting click>=4.1 (from patroni[etcd])
  Downloading https://mirrors.aliyun.com/pypi/packages/d2/3d/fa76db83bf75c4f8d338c2fd15c8d33fdd7ad23a9b5e57eb6c5de26b430e/click-7.1.2-py2.py3-none-any.whl (82kB)
     |████████████████████████████████| 92kB 1.3MB/s 
Collecting urllib3!=1.21,>=1.19.1 (from patroni[etcd])
  Downloading https://mirrors.aliyun.com/pypi/packages/9f/f0/a391d1463ebb1b233795cabfc0ef38d3db4442339de68f847026199e69d7/urllib3-1.25.10-py2.py3-none-any.whl (127kB)
     |████████████████████████████████| 133kB 323kB/s 
Collecting PyYAML (from patroni[etcd])
  Downloading https://mirrors.aliyun.com/pypi/packages/64/c2/b80047c7ac2478f9501676c988a5411ed5572f35d1beff9cae07d321512c/PyYAML-5.3.1.tar.gz (269kB)
     |████████████████████████████████| 276kB 383kB/s 
Collecting cdiff (from patroni[etcd])
  Downloading https://mirrors.aliyun.com/pypi/packages/69/6c/301876940e760a8b46c1caacf08c298f511f517c70eec32e43f38e9cc6f5/cdiff-1.0.tar.gz
Collecting python-dateutil (from patroni[etcd])
  Downloading https://mirrors.aliyun.com/pypi/packages/d4/70/d60450c3dd48ef87586924207ae8907090de0b306af2bce5d134d78615cb/python_dateutil-2.8.1-py2.py3-none-any.whl (227kB)
     |████████████████████████████████| 235kB 943kB/s 
Collecting python-etcd<0.5,>=0.4.3; extra == "etcd" (from patroni[etcd])
  Downloading https://mirrors.aliyun.com/pypi/packages/a1/da/616a4d073642da5dd432e5289b7c1cb0963cc5dde23d1ecb8d726821ab41/python-etcd-0.4.5.tar.gz
Collecting dnspython>=1.13.0 (from python-etcd<0.5,>=0.4.3; extra == "etcd"->patroni[etcd])
  Downloading https://mirrors.aliyun.com/pypi/packages/90/49/cb426577c28ca3e35332815b795a99e467523843fc83cc85ca0d6be2515a/dnspython-2.0.0-py3-none-any.whl (208kB)
     |████████████████████████████████| 215kB 2.8MB/s 
Installing collected packages: psutil, six, prettytable, click, urllib3, PyYAML, cdiff, python-dateutil, dnspython, python-etcd, patroni
  Running setup.py install for psutil ... done
  Running setup.py install for prettytable ... done
  Running setup.py install for PyYAML ... done
  Running setup.py install for cdiff ... done
  Running setup.py install for python-etcd ... done
Successfully installed PyYAML-5.3.1 cdiff-1.0 click-7.1.2 dnspython-2.0.0 patroni-1.6.5 prettytable-0.7.2 psutil-5.7.2 python-dateutil-2.8.1 python-etcd-0.4.5 six-1.15.0 urllib3-1.25.10
WARNING: You are using pip version 19.2.3, however version 20.1.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.

结果确认

[root@temp bin]# ./pip3 install patroni -i https://mirrors.aliyun.com/pypi/simple/
Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
Requirement already satisfied: patroni in /ups/app/python3/lib/python3.8/site-packages (1.6.5)
Requirement already satisfied: PyYAML in /ups/app/python3/lib/python3.8/site-packages (from patroni) (5.3.1)
Requirement already satisfied: click>=4.1 in /ups/app/python3/lib/python3.8/site-packages (from patroni) (7.1.2)
Requirement already satisfied: prettytable>=0.7 in /ups/app/python3/lib/python3.8/site-packages (from patroni) (0.7.2)
Requirement already satisfied: cdiff in /ups/app/python3/lib/python3.8/site-packages (from patroni) (1.0)
Requirement already satisfied: python-dateutil in /ups/app/python3/lib/python3.8/site-packages (from patroni) (2.8.1)
Requirement already satisfied: psutil>=2.0.0 in /ups/app/python3/lib/python3.8/site-packages (from patroni) (5.7.2)
Requirement already satisfied: urllib3!=1.21,>=1.19.1 in /ups/app/python3/lib/python3.8/site-packages (from patroni) (1.25.10)
Requirement already satisfied: six>=1.7 in /ups/app/python3/lib/python3.8/site-packages (from patroni) (1.15.0)
[root@temp bin]# 
find / -name 'patroni'
/ups/app/python3/bin/patroni
/ups/app/python3/lib/python3.8/site-packages/patroni

服务文件配置

vi /usr/lib/systemd/system/patroni.service

cat <<-EOF >/usr/lib/systemd/system/patroni.service
[Unit]
Description=Runners to orchestrate a high-availability PostgreSQL - patroni
Documentation=https://patroni.readthedocs.io/en/latest/index.html
After=syslog.target network.target etcd.target 
Wants=network-online.target

[Service]
Type=simple

User=postgres
Group=postgres

ExecStart=/ups/app/python3/bin/patroni /etc/patroni/patroni.yml
ExecReload=/bin/kill -s HUP $MAINPID
KillMode=process
KillSignal=SIGINT

LimitNOFILE=65536
Restart=on-abnormal
RestartSec=30s
TimeoutSec=0

[Install]
WantedBy=multi-user.target

EOF

patroni参数配置

patroni读取PG参数顺序

  • postgresql.base.conf
  • postgresql.conf
  • postgresql.auto.conf
  • run-time parameter (即运行时alter命令设置的属性)

配置patroni文件

Patroni 使用的是YAML的方式来进行配置,配置文件的非常严谨。

mkdir -p /etc/patroni
chown -R postgres:postgres /etc/patroni


vi /etc/patroni/patroni.yml
grep -Ev "^[ \t]*(#|$)" /etc/patroni/patroni.yml

启动patroni服务时,将$PGDATA/postgresql.conf重命名为$PGDATA/postgresql.base.conf,然后将/etc/patroni.yml文件中配置的postgresql项内容覆盖写入到$PGDATA/postgresql.conf文件

配置说明

​ 包括全局参数、restapi模块参数、etcd|zookeeper|consul模块参数、bootstrap启动参数、postgresql模块参数。

  • Global 设置/<namespace>/<scope>/config

    • name 集群名集群内的机器必须唯一,每台机器有自己的名字
    • namespace 存储配置信息的区域路径(保持默认)
    • scope 集群的名字(同 postgresql.conf 的 cluster_name 参数)
  • log 的配置

    • level 设置日志的等级
    • format 设置日志的等级 默认的设置是 asctime levelname message
    • dateformat 设置时间格式
    • dir 要写入程序日志的目录,目录必须存在并且是patroni 用户编写并且可以由您设置此值。应用程序将默认保留4个25MB 的日志。
    • file_num 要保留的日志的数量
    • file_size patroni.log的尺寸
    • loggers: 定义允许日志等级
  • 引导配置:

    • DCS: 在集群的全局配置,更改参数需要在 DCS 中或听过API 进行更改。
      • loop_wait 循环休眠的描述 默认 10秒
      • ttl: TTL获取先导锁。可以将其视为启动自动故障转移过程之前的时间长度。默认值:30
      • retry_timeout: 分布式程序和POSTGRESQL 之间的失联后多长时间不触发切换。
      • maximum_lag_on_failover:从库和主库之间在可以能进行主从切换中运行的字节差距。
      • master_start_timeout 主库在故障转移中的时间容忍度,loop_wait + master_start_timeout+loop_wait
      • synchronous_mode 打开这个模式将选择与主库最接近的从库作为可的新主库
      • synchronous_mode_strict :打开这个模式将如果发现没有和主库进行数据复制的从库,则主库将禁止写入数据。

详见附录

Etcd方案
  • 主机1配置yml文件
scope: etcd-cluster-pg
namespace: /service/
name: pg1

restapi:
  listen: 192.168.10.190:8008
  connect_address: 192.168.10.190:8008

etcd:
  #Provide host to do the initial discovery of the cluster topology:
  # host: 192.168.10.190:2379
  hosts: 192.168.10.190:2379,192.168.10.191:2379,192.168.10.192:2379
  # protocol: https
  # cacert: /ups/app/etcd/ssl/ca.pem
  # cert: /ups/app/etcd/ssl/etcd.pem
  # key: /ups/app/etcd/ssl/etcd-key.pem

bootstrap:
  # this section will be written into Etcd:/<namespace>/<scope>/config after initializing new cluster
  # and all other cluster members will use it as a `global configuration`
  dcs:
    ttl: 30
    loop_wait: 10
    retry_timeout: 10
    maximum_lag_on_failover: 1048576
    master_start_timeout: 300
    synchronous_mode: false
    postgresql:
      use_pg_rewind: true
      use_slots: true
      parameters:
        port: 2020
        listen_addresses: "*"
        wal_level: locical
        hot_standby: "on"
        wal_keep_segments: 64
        max_wal_senders: 10
        max_replication_slots: 10
        wal_log_hints: "on"
        # archive_mode: "on"
        hot_standby: on
        # archive_timeout: 1800s

postgresql:
  listen: 0.0.0.0:2020
  connect_address: 192.168.10.190:2020
  data_dir: /ups/data/pgdata/12/pg_root
  bin_dir: /ups/app/postgresql/pgsql-12/bin
  # config_dir: /ups/data/pgdata/12/pg_root
  pgpass: /home/postgres/.pgpass
  authentication:
    replication:
      username: sync
      password: sync12345
    superuser:
      username: postgres
      password: postgres
    #rewind:  # Has no effect on postgres 10 and lower
      #username: pg_rewind
      #password: 

tags:
  nofailover: false
  noloadbalance: false
  clonefrom: false
  nosync: false

其它节点需修改全局参数name、restapi模块的listen和connect_address参数、etcd模块的host参数,以及postgresql模块的connect_address参数。

  • 主机2配置yml文件
scope: etcd-cluster-pg
namespace: /service/
name: pg2

restapi:
  listen: 192.168.10.191:8008
  connect_address: 192.168.10.191:8008

etcd:
  #Provide host to do the initial discovery of the cluster topology:
  hosts: 192.168.10.190:2379,192.168.10.191:2379,192.168.10.192:2379
  # protocol: https
  # cacert: /ups/app/etcd/ssl/ca.pem
  # cert: /ups/app/etcd/ssl/etcd.pem
  # key: /ups/app/etcd/ssl/etcd-key.pem

bootstrap:
  # this section will be written into Etcd:/<namespace>/<scope>/config after initializing new cluster
  # and all other cluster members will use it as a `global configuration`
  dcs:
    ttl: 30
    loop_wait: 10
    retry_timeout: 10
    maximum_lag_on_failover: 1048576
    master_start_timeout: 300
    synchronous_mode: false
    postgresql:
      use_pg_rewind: true
      use_slots: true
      parameters:
        wal_level: locical
        hot_standby: "on"
        wal_keep_segments: 64
        max_wal_senders: 10
        max_replication_slots: 10
        wal_log_hints: "on"
        archive_mode: "on"
        hot_standby: on
        archive_timeout: 1800s

postgresql:
  listen: 0.0.0.0:2020
  connect_address: 192.168.10.191:2020
  data_dir: /ups/data/pgdata/12/pg_root
  bin_dir: /ups/app/postgresql/pgsql-12/bin
  #config_dir: /ups/data/pgdata/12/pg_root
  pgpass: /home/postgres/.pgpass
  authentication:
    replication:
      username: sync
      password: sync12345
    superuser:
      username: postgres
      password: 
   # rewind:  # Has no effect on postgres 10 and lower
    #  username: pg_rewind
     # password: 

tags:
  nofailover: false
  noloadbalance: false
  clonefrom: false
  nosync: false
zookeeper方案

替换上面etcd项为ZooKeeper即可

zookeeper:
  hosts: ['192.168.10.190:2181','192.168.10.191:2181','192.168.10.192:2181']

启动服务

三台主机分别启动 patroni ,如下:

/ups/app/python3/bin/patroni /etc/patroni.yml> /tmp/pg_patroni.log 2>&1 &
/ups/app/python3/bin/patroni /etc/patroni.yml> /tmp/pg_patroni.log 2>&1 &
/ups/app/python3/bin/patroni /etc/patroni.yml> /tmp/pg_patroni.log 2>&1 &

# 服务方式启动
systemctl daemon-reload && systemctl start patroni
启动Patroni,在host1上
  • Patroni1 把本地PostgreSQL(postgresql1)的信息写入etcd.
  • Patroni1 监测到数据库目录(/home/rudi/pgdata/)是空的,于是初始化数据库(initdb -D /home/rudi/pgdata)
  • Patroni1 配置本地数据库相关的配置文件,例如:postgresql.conf, pg_hba.conf
  • Patroni1 启动本地数据库(postgresql1): pg_ctl -D /home/rudi/pgdata start
  • Patroni1 把本地数据库(postgresql1)设定为主数据库(Primary)
启动Patroni,在host2/host3上
  • Patroni2/Patroni3 基于postgresql1做数据库备份(pg_basebackup),创建各自的本地数据库
  • Patroni2/Patroni3 配置本地数据库相关的配置文件,例如:postgresql.conf, pg_hba.conf
  • Patroni2 启动postgresql2,作为从库(Standby)
  • Patroni3 启动postgresql3,作为从库(Standby)

建议手工方式配置流复制,不建议通过patroni方式配置主从环境

Patronictl 基本操作

语法
[root@pg3 ~]# patronictl --help
Usage: patronictl [OPTIONS] COMMAND [ARGS]...

Options:
  -c, --config-file TEXT  Configuration file
  -d, --dcs TEXT          Use this DCS
  -k, --insecure          Allow connections to SSL sites without certs
  --help                  Show this message and exit.

Commands:
  configure    Create configuration file
  dsn          Generate a dsn for the provided member, defaults to a dsn of...
  edit-config  Edit cluster configuration
  failover     Failover to a replica
  flush        Discard scheduled events (restarts only currently)
  history      Show the history of failovers/switchovers
  list         List the Patroni members for a given Patroni
  pause        Disable auto failover
  query        Query a Patroni PostgreSQL member
  reinit       Reinitialize cluster member
  reload       Reload cluster member configuration
  remove       Remove cluster from DCS
  restart      Restart cluster member
  resume       Resume auto failover
  scaffold     Create a structure for the cluster in DCS
  show-config  Show cluster configuration
  switchover   Switchover to a replica
  version      Output version of patronictl command or a running Patroni...
[root@pg3 ~]# 

示例
patronictl -c /etc/patroni/patroni_postgresql.yml show-config
patronictl -c /etc/patroni/patroni_postgresql.yml list
patronictl -c /etc/patroni/patroni_postgresql.yml edit-config

# 删除属性
patronictl -c /etc/patroni/patroni_postgresql.yml edit-config -s postgresql.parameters.synchronous_standby_names=null

# 重启数据库 
patronictl -c /etc/patroni/patroni_postgresql.yml restart pgha
查看 patroni 集群
patronictl -c /etc/patroni.yml list
patronictl -c /etc/patroni.yml list etcd-cluster-pg

patronictl -d etcd://pg1:2379 list etcd-cluster-pg

# 输出结果
+ Cluster: etcd-cluster-pg (6854432365693308402) -+----+-----------+
| Member |         Host        |  Role  |  State  | TL | Lag in MB |
+--------+---------------------+--------+---------+----+-----------+
|  pg1   | 192.168.10.190:2020 | Leader | running |  1 |           |
|  pg2   | 192.168.10.191:2020 |        | running |  1 |         0 |
|  pg3   | 192.168.10.192:2020 |        | running |  1 |         0 |
+--------+---------------------+--------+---------+----+-----------+

[root@pg1 ~]# etcdctl ls --recursive --sort -p /service
/service/etcd-cluster-pg/
/service/etcd-cluster-pg/config
/service/etcd-cluster-pg/initialize
/service/etcd-cluster-pg/leader
/service/etcd-cluster-pg/members/
/service/etcd-cluster-pg/members/pg1
/service/etcd-cluster-pg/members/pg2
/service/etcd-cluster-pg/members/pg3
/service/etcd-cluster-pg/optime/
/service/etcd-cluster-pg/optime/leader
[root@pg1 ~]#

[root@pg1 ~]# etcdctl get /service/etcd-cluster-pg/members/pg1
{"conn_url":"postgres://192.168.10.190:2020/postgres","api_url":"http://192.168.10.190:8008/patroni","state":"running","role":"master","version":"1.6.5","xlog_location":201335568,"timeline":1}

查看 cluster 状态
curl -s "http://192.168.10.190:8008/cluster" | jq .
curl -s "http://192.168.10.191:8008/cluster" | jq .
curl -s "http://192.168.10.192:8008/cluster" | jq .

[postgres@pg1 ~]$ curl -s "http://192.168.10.190:8008/patroni" | jq .
{
  "members": [
    {
      "name": "pgsql12_pg1",
      "role": "leader",
      "state": "running",
      "api_url": "http://192.168.10.190:8008/patroni",
      "host": "192.168.10.190",
      "port": 2020,
      "timeline": 2
    },
    {
      "name": "pgsql12_pg2",
      "role": "replica",
      "state": "running",
      "api_url": "http://192.168.10.191:8008/patroni",
      "host": "192.168.10.191",
      "port": 2020,
      "timeline": 2,
      "lag": 0
    },
    {
      "name": "pgsql12_pg3",
      "role": "replica",
      "state": "running",
      "api_url": "http://192.168.10.192:8008/patroni",
      "host": "192.168.10.192",
      "port": 2020,
      "timeline": 2,
      "lag": 0
    }
  ]
}

image-20200731134240189

查看 patroni 节点状态
curl -s "http://192.168.10.190:8008/patroni" | jq .
curl -s "http://192.168.10.191:8008/patroni" | jq .
curl -s "http://192.168.10.192:8008/patroni" | jq .

[root@pg1 ~]# curl -s "http://192.168.10.190:8008/patroni" | jq .
{
  "state": "running",
  "postmaster_start_time": "2020-07-29 10:25:32.214 CST",
  "role": "master",
  "server_version": 120002,
  "cluster_unlocked": false,
  "xlog": {
    "location": 201335568
  },
  "timeline": 1,
  "replication": [
    {
      "usename": "sync",
      "application_name": "pg2",
      "client_addr": "192.168.10.191",
      "state": "streaming",
      "sync_state": "sync",
      "sync_priority": 1
    },
    {
      "usename": "sync",
      "application_name": "pg3",
      "client_addr": "192.168.10.192",
      "state": "streaming",
      "sync_state": "potential",
      "sync_priority": 1
    }
  ],
  "database_system_identifier": "6854432365693308402",
  "patroni": {
    "version": "1.6.5",
    "scope": "etcd-cluster-pg"
  }
}

更多查询命令参考 Patroni REST API

posted @ 2020-07-31 14:12  KuBee  阅读(2192)  评论(0编辑  收藏  举报