本文档将指导您在 2 台服务器上手动搭建一个高可用的 ClickHouse 单分片、双副本集群,并使用独立的 Apache ZooKeeper 集群作为协调服务。与集成 Keeper 版本的核心区别在于:协调服务(ZooKeeper)与数据服务(ClickHouse Server)是独立部署和维护的。
1. 集群规划与架构 📊
-
集群模式:单分片(Shard 1)、双副本(Replica 2)
-
服务器(与之前相同):
-
172.21.204.200(ClickHouse 副本 1 + ZooKeeper 节点1、节点3) -
172.21.204.201(ClickHouse 副本 2 + ZooKeeper 节点2)
-
-
协调服务:Apache ZooKeeper 独立集群(3个节点)
-
网络访问:开启
0.0.0.0监听,支持跨节点与远程访问 -
架构核心变化:每台服务器运行
clickhouse-server,并在两台服务器上分布3个ZooKeeper进程以实现奇数节点集群。
2. 系统环境准备
2.1 设置主机名与 hosts 解析(两台服务器)
# 在 172.21.204.200 和 172.21.204.201 上分别执行
# 设置主机名(如果未设置)
sudo hostnamectl set-hostname zb-yunweitest-mysql-204-200 # 在200上
sudo hostnamectl set-hostname zb-yunweitest-mysql-204-201 # 在201上
# 编辑 /etc/hosts,在两台服务器上添加相同内容
sudo vim /etc/hosts
添加以下行:
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
172.21.204.200 zb-yunweitest-mysql-204-200 zk1 ch1
172.21.204.201 zb-yunweitest-mysql-204-201 zk2 ch2
2.2 关闭 SELinux(可选,生产环境请按需配置)
# 临时关闭
sudo setenforce 0
# 永久关闭(需要重启)
sudo sed -i ‘s/^SELINUX=enforcing/SELINUX=disabled/’ /etc/selinux/config
2.3 配置防火墙规则
# 在两台服务器上执行,开放所有必需端口
sudo firewall-cmd --permanent --add-port=9000/tcp # ClickHouse TCP 端口
sudo firewall-cmd --permanent --add-port=8123/tcp # ClickHouse HTTP 端口
sudo firewall-cmd --permanent --add-port=9009/tcp # ClickHouse 内部通信
# ZooKeeper 基础端口 (每个实例需要一组)
sudo firewall-cmd --permanent --add-port=2181/tcp # ZK节点1客户端端口
sudo firewall-cmd --permanent --add-port=2182/tcp # ZK节点3客户端端口
sudo firewall-cmd --permanent --add-port=2183/tcp # ZK节点2客户端端口
sudo firewall-cmd --permanent --add-port=2888/tcp # ZK对等通信基础端口
sudo firewall-cmd --permanent --add-port=3888/tcp # ZK选举基础端口
sudo firewall-cmd --reload
2.4 系统参数优化
# 调整内核参数
sudo tee -a /etc/sysctl.conf << EOF
# 增加最大文件描述符
fs.file-max = 1000000
# 增加虚拟内存映射区域,对Java应用(ZooKeeper)很重要
vm.max_map_count = 262144
# 优化网络
net.core.somaxconn = 65535
net.ipv4.tcp_max_syn_backlog = 65535
EOF
sudo sysctl -p
# 为用户设置资源限制
sudo tee /etc/security/limits.d/clickhouse-zookeeper.conf << EOF
# 为 clickhouse 用户设置
clickhouse soft nofile 262144
clickhouse hard nofile 262144
clickhouse soft nproc 131072
clickhouse hard nproc 131072
# 为 zookeeper 用户设置
zookeeper soft nofile 65535
zookeeper hard nofile 65535
zookeeper soft nproc 65535
zookeeper hard nproc 65535
EOF
3. 软件安装
3.1 安装 ClickHouse(与之前相同)
# 在两台服务器上执行
cd /path/to/clickhouse-packages/
sudo rpm -ivh clickhouse-common-static-*.x86_64.rpm
sudo rpm -ivh clickhouse-server-*.x86_64.rpm
sudo rpm -ivh clickhouse-client-*.x86_64.rpm
3.2 安装 Java 环境(ZooKeeper 依赖)
# 安装 OpenJDK(ZooKeeper 3.8.x 需要 JDK 8 或 11)
sudo yum install -y java-11-openjdk-devel
# 验证安装
java -version
3.3 安装 ZooKeeper
# 在两台服务器上执行
# 1. 创建专用用户和目录
sudo groupadd -r zookeeper
sudo useradd -r -g zookeeper -s /sbin/nologin -d /var/lib/zookeeper zookeeper
# 2. 创建数据、日志和安装目录
sudo mkdir -p /opt/zookeeper /var/lib/zookeeper /var/log/zookeeper
# 为三个实例分别创建数据目录
sudo mkdir -p /var/lib/zookeeper/{zk1,zk2,zk3}/{data,log}
sudo chown -R zookeeper:zookeeper /opt/zookeeper /var/lib/zookeeper /var/log/zookeeper
# 3. 下载并解压 ZooKeeper(以 3.8.4 稳定版为例)
cd /tmp
wget https://downloads.apache.org/zookeeper/zookeeper-3.8.4/apache-zookeeper-3.8.4-bin.tar.gz
sudo tar -xzf apache-zookeeper-3.8.4-bin.tar.gz -C /opt/zookeeper --strip-components=1
sudo chown -R zookeeper:zookeeper /opt/zookeeper
4. ZooKeeper 集群配置(3节点,部署于2台服务器)
4.1 为每个实例创建配置文件
在 172.21.204.200 上配置 (运行 zk1 和 zk3):
# 实例 zk1 (myid=1, clientPort=2181)
sudo tee /opt/zookeeper/conf/zoo1.cfg << EOF
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/var/lib/zookeeper/zk1/data
dataLogDir=/var/lib/zookeeper/zk1/log
clientPort=2181
maxClientCnxns=60
autopurge.snapRetainCount=3
autopurge.purgeInterval=24
server.1=172.21.204.200:2888:3888
server.2=172.21.204.201:2889:3889
server.3=172.21.204.200:2890:3890
EOF
# 实例 zk3 (myid=3, clientPort=2182)
sudo tee /opt/zookeeper/conf/zoo3.cfg << EOF
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/var/lib/zookeeper/zk3/data
dataLogDir=/var/lib/zookeeper/zk3/log
clientPort=2182
maxClientCnxns=60
autopurge.snapRetainCount=3
autopurge.purgeInterval=24
server.1=172.21.204.200:2888:3888
server.2=172.21.204.201:2889:3889
server.3=172.21.204.200:2890:3890
EOF
# 创建 myid 文件
echo 1 | sudo tee /var/lib/zookeeper/zk1/data/myid
echo 3 | sudo tee /var/lib/zookeeper/zk3/data/myid
在 172.21.204.201 上配置 (运行 zk2):
# 实例 zk2 (myid=2, clientPort=2183)
sudo tee /opt/zookeeper/conf/zoo2.cfg << EOF
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/var/lib/zookeeper/zk2/data
dataLogDir=/var/lib/zookeeper/zk2/log
clientPort=2183
maxClientCnxns=60
autopurge.snapRetainCount=3
autopurge.purgeInterval=24
server.1=172.21.204.200:2888:3888
server.2=172.21.204.201:2889:3889
server.3=172.21.204.200:2890:3890
EOF
# 创建 myid 文件
echo 2 | sudo tee /var/lib/zookeeper/zk2/data/myid
4.2 配置 ZooKeeper 系统服务(每个实例)
在 172.21.204.200 上创建服务文件:
# 实例 zk1 服务
sudo tee /etc/systemd/system/zookeeper1.service << EOF
[Unit]
Description=Apache ZooKeeper (Instance 1)
After=network.target
[Service]
Type=forking
User=zookeeper
Group=zookeeper
Environment=JAVA_HOME=/usr/lib/jvm/java-11-openjdk
ExecStart=/opt/zookeeper/bin/zkServer.sh start /opt/zookeeper/conf/zoo1.cfg
ExecStop=/opt/zookeeper/bin/zkServer.sh stop /opt/zookeeper/conf/zoo1.cfg
ExecReload=/opt/zookeeper/bin/zkServer.sh restart /opt/zookeeper/conf/zoo1.cfg
Restart=on-abnormal
RestartSec=10s
LimitNOFILE=65535
[Install]
WantedBy=multi-user.target
EOF
# 实例 zk3 服务 (仅修改 Description 和配置文件路径)
sudo tee /etc/systemd/system/zookeeper3.service << EOF
[Unit]
Description=Apache ZooKeeper (Instance 3)
After=network.target
[Service]
Type=forking
User=zookeeper
Group=zookeeper
Environment=JAVA_HOME=/usr/lib/jvm/java-11-openjdk
ExecStart=/opt/zookeeper/bin/zkServer.sh start /opt/zookeeper/conf/zoo3.cfg
ExecStop=/opt/zookeeper/bin/zkServer.sh stop /opt/zookeeper/conf/zoo3.cfg
ExecReload=/opt/zookeeper/bin/zkServer.sh restart /opt/zookeeper/conf/zoo3.cfg
Restart=on-abnormal
RestartSec=10s
LimitNOFILE=65535
[Install]
WantedBy=multi-user.target
EOF
在 172.21.204.201 上创建服务文件:
# 实例 zk2 服务
sudo tee /etc/systemd/system/zookeeper2.service << EOF
[Unit]
Description=Apache ZooKeeper (Instance 2)
After=network.target
[Service]
Type=forking
User=zookeeper
Group=zookeeper
Environment=JAVA_HOME=/usr/lib/jvm/java-11-openjdk
ExecStart=/opt/zookeeper/bin/zkServer.sh start /opt/zookeeper/conf/zoo2.cfg
ExecStop=/opt/zookeeper/bin/zkServer.sh stop /opt/zookeeper/conf/zoo2.cfg
ExecReload=/opt/zookeeper/bin/zkServer.sh restart /opt/zookeeper/conf/zoo2.cfg
Restart=on-abnormal
RestartSec=10s
LimitNOFILE=65535
[Install]
WantedBy=multi-user.target
EOF
重新加载 systemd:
# 在两台服务器上执行
sudo systemctl daemon-reload
5. ClickHouse 服务配置(关键调整)
5.1 主配置文件 /etc/clickhouse-server/config.xml 调整
<!-- 移除或注释掉 Keeper 配置部分 -->
<!--
<keeper_server> ... </keeper_server>
-->
<!-- 确保主服务监听配置正确 -->
<listen_host>0.0.0.0</listen_host>
<listen_host>::</listen_host>
<interserver_http_host>172.21.204.200</interserver_http_host>
<!-- 注意:在 201 服务器上改为 172.21.204.201 -->
<!-- 其他配置(如路径、内存等)保持不变 -->
<path>/data/clickhouse/</path>
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
5.2 集群配置文件 /etc/clickhouse-server/config.d/metrika.xml
<?xml version="1.0"?>
<yandex>
<!-- ========== 集群配置(保持不变) ========== -->
<remote_servers>
<cluster_1s2r>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>172.21.204.200</host>
<port>9000</port>
</replica>
<replica>
<host>172.21.204.201</host>
<port>9000</port>
</replica>
</shard>
</cluster_1s2r>
</remote_servers>
<!-- ========== ZooKeeper 配置(指向3个节点) ========== -->
<zookeeper>
<!-- 节点1 (运行在 200 服务器,端口 2181) -->
<node index="1">
<host>172.21.204.200</host>
<port>2181</port>
</node>
<!-- 节点2 (运行在 201 服务器,端口 2183) -->
<node index="2">
<host>172.21.204.201</host>
<port>2183</port>
</node>
<!-- 节点3 (运行在 200 服务器,端口 2182) -->
<node index="3">
<host>172.21.204.200</host>
<port>2182</port>
</node>
<!-- 超时设置 -->
<session_timeout_ms>30000</session_timeout_ms>
<operation_timeout_ms>10000</operation_timeout_ms>
<enable_address_replication>true</enable_address_replication>
</zookeeper>
<!-- ========== 宏配置 ========== -->
<macros>
<cluster>cluster_1s2r</cluster>
<shard>1</shard>
<replica>replica_200</replica>
</macros>
<!-- 在 172.21.204.201 上改为:<replica>replica_201</replica> -->