生产级 Kubernetes 集群部署:Master 节点高可用最佳实践

使用ubuntu22.04 部署生产环境高可用k8s 1.31.0

主机信息

主机名 ip 配置
k8s-master01 192.168.10.101 4c4g
k8s-master02 192.168.10.102 4c4g
k8s-master03 192.168.10.103 4c4g
lb01 192.168.10.104 4c4g
lb02 192.168.10.105 4c4g
VIP 192.168.10.100

架构图

部署软负载

修改静态IP和主机名

user@ubuntu2204-server:~$ hostnamectl hostname  lob01
==== AUTHENTICATING FOR org.freedesktop.hostname1.set-static-hostname ===
Authentication is required to set the statically configured local hostname, as well as the pretty hostname.
Authenticating as: user
Password: 
==== AUTHENTICATION COMPLETE ===
user@ubuntu2204-server:~$ bash
user@lob01:~$ 

cat > /etc/netplan/01-network-manager-all.yaml  <<EOF
# This is the network config written by 'subiquity'
network:
  ethernets:
    ens33:
      dhcp4: no
      addresses:
        - 192.168.10.104/24
      routes:
        - to: default
          via: 192.168.10.2
      nameservers:
        addresses: [223.5.5.5,114.114.114.114]
  version: 2
  renderer: networkd
EOF

部署 keepalived 和 HAproxy

apt install keepalived haproxy psmisc -y

配置 HAProxy

在 IP 为 192.168.10.104与 192.168.10.105 的服务器上按如下参数配置 HAProxy (两台 lb 机器配置一致即可,注意后端服务地址)。

image-20250721132307277

image-20250721132622865

root@lb02:~# cat /etc/haproxy/haproxy.cfg

global
	log /dev/log	local0
	log /dev/log	local1 notice
	chroot /var/lib/haproxy
	stats socket /run/haproxy/admin.sock mode 660 level admin expose-fd listeners
	stats timeout 30s
	user haproxy
	group haproxy
	daemon

	# Default SSL material locations
	ca-base /etc/ssl/certs
	crt-base /etc/ssl/private

	# See: https://ssl-config.mozilla.org/#server=haproxy&server-version=2.0.3&config=intermediate
        ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
        ssl-default-bind-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256
        ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets

defaults
	log	global
	mode	http
	option	httplog
	option	dontlognull
        timeout connect 5000
        timeout client  50000
        timeout server  50000
	errorfile 400 /etc/haproxy/errors/400.http
	errorfile 403 /etc/haproxy/errors/403.http
	errorfile 408 /etc/haproxy/errors/408.http
	errorfile 500 /etc/haproxy/errors/500.http
	errorfile 502 /etc/haproxy/errors/502.http
	errorfile 503 /etc/haproxy/errors/503.http
	errorfile 504 /etc/haproxy/errors/504.http
	
	
root@lb02:~# vim  /etc/haproxy/haproxy.cfg
root@lb02:~# cat /etc/haproxy/haproxy.cfg 
global
	log /dev/log	local0
	log /dev/log	local1 notice
	chroot /var/lib/haproxy
	stats socket /run/haproxy/admin.sock mode 660 level admin expose-fd listeners
	stats timeout 30s
	user haproxy
	group haproxy
	daemon

	# Default SSL material locations
	ca-base /etc/ssl/certs
	crt-base /etc/ssl/private

	# See: https://ssl-config.mozilla.org/#server=haproxy&server-version=2.0.3&config=intermediate
        ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
        ssl-default-bind-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256
        ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets

defaults
	log	global
	mode	http
	option	httplog
	option	dontlognull
        timeout connect 5000
        timeout client  50000
        timeout server  50000
	errorfile 400 /etc/haproxy/errors/400.http
	errorfile 403 /etc/haproxy/errors/403.http
	errorfile 408 /etc/haproxy/errors/408.http
	errorfile 500 /etc/haproxy/errors/500.http
	errorfile 502 /etc/haproxy/errors/502.http
	errorfile 503 /etc/haproxy/errors/503.http
	errorfile 504 /etc/haproxy/errors/504.http
#---------------------------------------------------------------------
frontend  kube-apiserver
    bind *:6443
    mode tcp
    option tcplog
    default_backend kube-apiserver
#---------------------------------------------------------------------
# round robin balancing between the various backends
#---------------------------------------------------------------------
backend kube-apiserver
    mode tcp
    option tcplog
    balance     roundrobin
    default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100
    server kube-apiserver-1 192.168.10.101:6443 check
    server kube-apiserver-2 192.168.10.102:6443 check
    server kube-apiserver-3 192.168.10.103:6443 check


EOF

启动之前检查语法是否有问题

 haproxy -f /etc/haproxy/haproxy.cfg -c
 
systemctl  restart  haproxy.service  && systemctl  enable  haproxy.service  && systemctl  status haproxy.service

image-20250721133014793

停止 Haproxy

 systemctl stop haproxy.service 

配置 Keepalived

主 HAProxy lb-01

192.168.10.104 (/etc/keepalived/keepalived.conf),修改网卡名和你自己的IP

cat  > /etc/keepalived/keepalived.conf <<EOF
global_defs {
notification_email {
}
smtp_connect_timeout 30    #连接超时时间
router_id LVS_DEVEL01 ##相当于给这个服务器起个昵称
vrrp_skip_check_adv_addr
vrrp_garp_interval 0
vrrp_gna_interval 0
}
vrrp_script chk_haproxy {
script "killall -0 haproxy"
interval 2
weight 20
}
vrrp_instance haproxy-vip {
state MASTER  #主服务器 是MASTER
priority 100  #主服务器优先级要比备服务器高
interface ens33                        #实例绑定的网卡
virtual_router_id 60 #定义一个热备组,可以认为这是60号热备组
advert_int 1 #1秒互相通告一次,检查对方死了没。
authentication {
  auth_type PASS #认证类型
  auth_pass 1111 #认证密码  这些相当于暗号
}
unicast_src_ip 192.168.10.104     #当前机器地址
unicast_peer {
  192.168.10.105                      #peer中其它机器地址
}
virtual_ipaddress {
  #vip地址
 192.168.10.100/24
}
track_script {
  chk_haproxy
}
}
EOF

备主机 HAProxy lb-02

10.20.220.87 (/etc/keepalived/keepalived.conf)

cat  > /etc/keepalived/keepalived.conf <<EOF
global_defs {
notification_email {
}
router_id LVS_DEVEL02 ##相当于给这个服务器起个昵称
vrrp_skip_check_adv_addr
vrrp_garp_interval 0
vrrp_gna_interval 0
}
vrrp_script chk_haproxy {
script "killall -0 haproxy"
interval 2
weight 20
}
vrrp_instance haproxy-vip {
state BACKUP #备份服务器 是 backup
priority 90 #优先级要低(把备份的90修改为100)
interface ens33                        #实例绑定的网卡
virtual_router_id 60
advert_int 1
authentication {
  auth_type PASS
  auth_pass 1111
}
unicast_src_ip 192.168.10.105      #当前机器地址
unicast_peer {
  192.168.10.104                         #peer 中其它机器地址
}
virtual_ipaddress {
  #加/24
  192.168.10.100/24
}
track_script {
  chk_haproxy
}
}
EOF

image-20250721133409229

启动服务

systemctl restart keepalived && systemctl enable keepalived &&systemctl status keepalived

image-20250721134111476

image-20250721133637840

测试VIP负载

image-20250721134516041

安装k8s master节点

三台机器都执行,192.168.10.101、192.168.10.102、192.168.10.103,咱们还是通过之前的shell修改后安装,之前是部署单节点的master,现在是部署多节点的,关键在于初始化节点那块需要修改为VIP的地址。

#!/bin/bash
# 检查是否使用 bash 执行
if [ -z "$BASH_VERSION" ]; then
    echo "错误: 请使用 bash 执行此脚本"
    echo "尝试: bash $0"
    exit 1
fi
# 指定 Kubernetes 版本
kubernetes_version="1.31.0"
k8s_version="v1.31"

# 判断传递的参数,如果没有传递或传递的是错误参数,则默认安装master节点
node_type=${1:-"master"}

# 脚本用途说明
cat <<EOF
该脚本用于在Ubuntu系统上安装 Kubernetes 集群
请在运行脚本之前确认:
===================================================================
1. 安装集群Master节点: bash xxx-install.sh master
2. 安装worker节点: xxx-install.sh worker
3. 当前用户是 root 用户
4. 确保系统网络畅通,可以访问外部镜像源
5. 指定kubernetes安装版本
6. 默认使用flannel网络组件,可注释并改为install_network_plugin_calico
7. 请修改主机名
===================================================================
EOF

# 检查当前用户是否为 root 用户
check_root_user() {
    if [[ $EUID -ne 0 ]]; then
        echo "请使用 root 用户执行此脚本。"
        exit 1
    fi
}
# 修改 apt 源为清华源
configure_apt_sources() {
    echo "正在配置 apt 源为清华源..."
    
    # 备份原有源文件
    cp /etc/apt/sources.list /etc/apt/sources.list.bak
    
    # 获取系统版本信息
    codename=$(lsb_release -cs)
    
    # 创建清华源配置文件
    cat > /etc/apt/sources.list <<EOF
# 默认注释了源码镜像以提高 apt update 速度,如有需要可自行取消注释
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ $codename main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ $codename main restricted universe multiverse
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ $codename-updates main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ $codename-updates main restricted universe multiverse
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ $codename-backports main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ $codename-backports main restricted universe multiverse
deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ $codename-security main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ $codename-security main restricted universe multiverse

# 预发布软件源,不建议启用
# deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ $codename-proposed main restricted universe multiverse
# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ $codename-proposed main restricted universe multiverse
EOF
    
    echo "已成功配置 apt 源为清华源。"
}


# 直接使用国内镜像源(固定为阿里云)
select_mirror() {
    echo "使用国内镜像源(阿里云)。"
    docker_image_repository="registry.aliyuncs.com/google_containers"
    apt_repository="https://mirrors.aliyun.com/kubernetes-new"
    flannel="https://gitee.com/mirrors/flannel/raw/master/Documentation/kube-flannel.yml"
    #flannel="https://github.com/flannel-io/flannel/releases/latest/download/kube-flannel.yml"
    calico="https://docs.projectcalico.org/v3.20/manifests/calico.yaml --image-repository=registry.cn-hangzhou.aliyuncs.com/calico"
}

# 检查是否已安装 Kubernetes
check_kubernetes_installed() {
    if command -v kubeadm >/dev/null 2>&1; then
        echo "已检测到已安装的 Kubernetes。"
        read -p "是否卸载已存在的 Kubernetes?(y/n): " uninstall_choice
        if [[ $uninstall_choice = "y" || $uninstall_choice = "Y" ]]; then
            uninstall_kubernetes
        else
            echo "已取消安装。"
            exit 0
        fi
    fi
}

# 卸载 Kubernetes
uninstall_kubernetes() {
    echo "正在卸载 Kubernetes..."
    uninstall_kubernetes_ubuntu
    echo "Kubernetes 已成功卸载。"
}

# 卸载 Kubernetes(Ubuntu)
uninstall_kubernetes_ubuntu() {
    echo "正在卸载 Kubernetes..."
    if command -v kubeadm &>/dev/null; then
        kubeadm reset -f
    else
        echo "kubeadm 未找到,无法执行重置操作。请手动重置 Kubernetes。"
    fi
    if command -v kubectl &>/dev/null; then
        kubectl delete -f $flannel
        kubectl delete -f $calico
        apt remove -y kubeadm kubelet kubectl containerd
        rm -rf /etc/kubernetes /var/lib/etcd /var/lib/kubelet
    else
        echo "kubectl 未找到,无法执行删除操作。请手动删除相关资源。"
    fi
}

# 关闭并禁用防火墙(Ubuntu)
disable_firewall() {
    echo "正在关闭并禁用防火墙..."
    ufw disable
}

# 关闭并禁用 Swap
disable_swap() {
    echo "正在关闭并禁用 Swap..."
    swapoff -a
    sed -i '/swap/d' /etc/fstab
}

# 优化内核参数
optimize_kernel() {
    echo "正在优化内核参数..."
    sysctl_file="/etc/sysctl.d/kubernetes.conf"
    echo "net.bridge.bridge-nf-call-ip6tables = 1" >$sysctl_file
    echo "net.bridge.bridge-nf-call-iptables = 1" >>$sysctl_file
    echo "net.ipv4.ip_forward=1" >>$sysctl_file
    echo "vm.max_map_count=262144" >>$sysctl_file
    modprobe br_netfilter #重新加载
   # 确保模块开机加载
   echo "br_netfilter" |  tee /etc/modules-load.d/br_netfilter.conf 
    sysctl -p $sysctl_file
}

# 禁用透明大页
disable_transparent_hugepage() {
    echo "禁用透明大页..."
    thp_file="/etc/systemd/system/disable-thp.service"
    echo "[Unit]" >$thp_file
    echo "Description=Disable Transparent Huge Pages (THP)" >>$thp_file
    echo "DefaultDependencies=no" >>$thp_file
    echo "After=local-fs.target" >>$thp_file
    echo "Before=apparmor.service" >>$thp_file
    echo "" >>$thp_file
    echo "[Service]" >>$thp_file
    echo "Type=oneshot" >>$thp_file
    echo "ExecStart=/bin/sh -c 'echo never > /sys/kernel/mm/transparent_hugepage/enabled && echo never > /sys/kernel/mm/transparent_hugepage/defrag'" >>$thp_file
    echo "" >>$thp_file
    echo "[Install]" >>$thp_file
    echo "WantedBy=multi-user.target" >>$thp_file
    chmod 664 $thp_file
    systemctl daemon-reload
    systemctl enable disable-thp
    systemctl start disable-thp
}

# 安装 kubeadm、kubelet 和 kubectl
install_kubernetes() {
    echo "正在安装 kubeadm、kubelet 和 kubectl(版本:$kubernetes_version)..."

    apt update
    apt install -y apt-transport-https ca-certificates curl bridge-utils
    modprobe br_netfilter # 加载所需的内核模块
    
    # 添加Kubernetes GPG密钥和源
    curl -fsSL $apt_repository/core/stable/$k8s_version/deb/Release.key | gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
    echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] $apt_repository/core/stable/$k8s_version/deb/ /" | tee /etc/apt/sources.list.d/kubernetes.list
    
    apt update
    apt install -y kubeadm=$kubernetes_version-1.1 kubelet=$kubernetes_version-1.1 kubectl=$kubernetes_version-1.1
    
    # 安装并配置自动补全
    apt install bash-completion -y
    source /usr/share/bash-completion/bash_completion
    source <(kubectl completion bash)
    echo "source <(kubectl completion bash)" >> ~/.bashrc
}

# 安装 Containerd
install_containerd() {
    echo "正在安装 Containerd..."
    apt install -y containerd
    
    echo "生成默认配置并修改..."
    mkdir -p /etc/containerd
    containerd config default >/etc/containerd/config.toml
    
    # 添加registry配置路径
    sed -i '162s/config_path = ""/config_path = "\/etc\/containerd\/certs.d"/' /etc/containerd/config.toml
     # 更新 pause 镜像版本为 3.10
         sed -i "s|sandbox_image = .*|sandbox_image = \"${docker_image_repository}/pause:3.10\"|g" /etc/containerd/config.toml
    # 配置 systemd cgroup 驱动和镜像仓库
    sed -i 's#SystemdCgroup = false#SystemdCgroup = true#' /etc/containerd/config.toml
    sed -i "s#registry.k8s.io#${docker_image_repository}#" /etc/containerd/config.toml
    
    echo "配置镜像加速器..."
    # 创建docker.io加速器配置
    mkdir -p /etc/containerd/certs.d/docker.io
    cat > /etc/containerd/certs.d/docker.io/hosts.toml <<EOF
server = "https://docker.io"
[host."https://dockerpull.com"]
  capabilities = ["pull", "resolve"]

[host."https://docker.m.daocloud.io"]
  capabilities = ["pull", "resolve"]

[host."https://hub.nat.tf"]
  capabilities = ["pull", "resolve"]
EOF

    # 创建k8s加速器配置
    mkdir -p /etc/containerd/certs.d/registry.k8s.io
    cat > /etc/containerd/certs.d/registry.k8s.io/hosts.toml <<EOF
server = "https://registry.k8s.io"
[host."https://k8s.m.daocloud.io"]
  capabilities = ["pull", "resolve", "push"]
EOF

    # 创建私有仓库配置
    mkdir -p /etc/containerd/certs.d/10.20.220.230
    cat > /etc/containerd/certs.d/10.20.220.230/hosts.toml <<EOF
server = "http://10.20.220.230"
[host."http://10.20.220.230"]
  capabilities = ["pull", "resolve", "push"]
  skip_verify = true
EOF

    echo "重启containerd服务..."
    systemctl restart containerd
    systemctl enable containerd
    echo "Containerd配置完成。"
}
# 执行 kubeadm init 并复制 kubeconfig 文件
initialize_kubernetes_cluster() {
    if command -v kubeadm &>/dev/null; then
        kubeadm reset -f
    else
        echo "kubeadm 未找到,无法执行重置操作。请手动重置 Kubernetes。"
        exit 1
    fi

    echo "正在执行 kubeadm init..."
    kubeadm init --kubernetes-version=v${kubernetes_version} \
        --image-repository=${docker_image_repository} \
        --service-cidr=10.96.0.0/16 \
        --pod-network-cidr=10.244.0.0/16 \
        --control-plane-endpoint=192.168.10.100:6443  -v=5 # ← 这里换成你的 VIP:PORT
        
    # --kubernetes-version 指定要安装的Kubernetes版本
    # --image-repository=registry.k8s.io  容器镜像仓库默认地址
    # --service-cidr  Kubernetes Service的IP地址范围
    # --pod-network-cidr Kubernetes Pod的IP地址范围
    # --control-plane-endpoint=test-k8s-lb.opsbase.cn:6443 控制平面终结点地址,用于在高可用集群中指定负载均衡器的地址。
    echo "已成功执行 kubeadm init。"
    local node_name=$(hostname)   
    # 复制 kubeconfig 文件
    echo "正在复制 kubeconfig 文件..."
    mkdir -p $HOME/.kube
    \cp /etc/kubernetes/admin.conf $HOME/.kube/config
    chown $(id -u):$(id -g) $HOME/.kube/config
    echo "kubeconfig 文件已复制到 $HOME/.kube/config。"
     # 去除 master 节点的污点,允许在 master 节点上调度 Pod
    echo "正在去除 master 节点的污点..."
    # 等待 API 服务器可用
    for i in {1..20}; do
        if kubectl get nodes &>/dev/null; then
            break
        fi
        echo "等待 API 服务器就绪 ($i/20)..."
        sleep 5
    done
    
  
}

# 安装网络组件(Flannel)
install_network_plugin_flannel() {
    echo "正在安装 Flannel 网络组件..."
    kubectl apply -f $flannel
}

# 安装网络组件(Calico)
install_network_plugin_calico() {
    echo "正在安装 Calico 网络组件..."
    kubectl create -f $calico
}

# 主函数
main() {
    select_mirror
    check_root_user
    configure_apt_sources
    check_kubernetes_installed
    disable_firewall
    disable_swap
    disable_transparent_hugepage
    install_kubernetes
    install_containerd
    optimize_kernel
    
    if [[ "$node_type" = "master" ]]; then
        initialize_kubernetes_cluster
        install_network_plugin_flannel
        # 如果想使用 Calico 网络组件,注释掉上面的 "flannel" 函数,然后取消"calico" 行的注释
        # install_network_plugin_calico
    else
        echo "worker节点,跳过集群初始化操作。"
        echo "请使用kubeadm join命令加入集群"
    fi
}

# 执行主函数
main

k8s-master01操作

第一台机器直接执行,上面的脚本就可以

root@k8s-master01:~# bash install_k8s.sh  #不加参数默认安装master节点

image-20250721140323363

image-20250721140541279

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

  export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

You can now join any number of control-plane nodes by copying certificate authorities
and service account keys on each node and then running the following as root:

  kubeadm join 192.168.10.100:6443 --token pqf2kd.muipoy10ung4c6fm \
	--discovery-token-ca-cert-hash sha256:caa227a66409901e6333e6e838868ffc0cfc2ca1bf937d67016d8e76c7664975 \
	--control-plane 

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.10.100:6443 --token pqf2kd.muipoy10ung4c6fm \
	--discovery-token-ca-cert-hash sha256:caa227a66409901e6333e6e838868ffc0cfc2ca1bf937d67016d8e76c7664975 

修改root可以登录系统

image-20250721141552595

ubuntu默认不允许root登录,因为另外两台master机器,需从master01拷贝证书,所有master01需要启用root

vim /etc/ssh/sshd_config

PermitRootLogin yes


root@k8s-master01:~# passwd  root
New password: 
Retype new password: 
passwd: password updated successfully
root@k8s-master01:~# systemctl  restart sshd
root@k8s-master01:~# 

k8s-master02 k8s-master03操作

运行安装脚本

两台机器方法一样

root@k8s-master02:~# bash install_k8s.sh  node #加入安装,安node的方式安装,在添加进去变成master节点
root@k8s-master03:~# bash  install_k8s.sh node #和master02一样

image-20250721141003351

image-20250721141128741

其它master节点拷贝集群证书

这些证书是 Kubernetes 控制平面(Master)节点之间互信、API Server 与 etcd 通信、组件鉴权 的“身份证”。
同一套 CA 及密钥 拷贝到新的 Master 节点,是为了让它 无需再次 kubeadm init 就能 平滑加入集群被集群信任

证书/密钥作用一览

文件名 作用
ca.crt / ca.key 根 CA:负责签发所有其它证书。所有节点必须一致,否则互不信任。
sa.key / sa.pub ServiceAccount 签名密钥:用于为 Pod 生成 JWT Token;所有 Master 必须相同,否则 Controller-Manager 无法验证 Token。
front-proxy-ca.crt / .key 聚合 API(metrics-server、自定义 API)的代理 CA;同样要求全部 Master 共用。
etcd/ca.crt / ca.key etcd 集群专用 CA:让新 Master 的 etcd 实例也能加入现有 etcd 集群,并与 kube-apiserver 互信。

不拷贝会怎样?

  • kubeadm join --control-plane 会尝试重新生成证书 → 新 CA 与现有集群 不匹配 → 节点无法加入或 etcd 通信失败。
  • ServiceAccount Token 验证失败 → Pod 无法被 Controller-Manager 管理。
  • API Server 与 etcd TLS 握手失败 → 控制平面启动报错。

一句话总结

“把同一套 CA/SA/Proxy-CA/etcd-CA 复制过去,就是让新 Master 拿着‘官方身份证’直接上岗,而不是自己再办一套假证。”

root@k8s-master02:~# cd /root && mkdir -p /etc/kubernetes/pki/etcd &&mkdir -p ~/.kube/
root@k8s-master03:~# cd /root && mkdir -p /etc/kubernetes/pki/etcd &&mkdir -p ~/.kube/

root@k8s-master02:~# scp -rp root@192.168.10.101:/etc/kubernetes/pki/{ca.*,sa.*,front-proxy-ca.*} /etc/kubernetes/pki/  #-rp递归复制目录时保留权限

root@k8s-master02:~# scp -rp root@192.168.10.101:/etc/kubernetes/pki/etcd/ca.* /etc/kubernetes/pki/etcd/

root@k8s-master03:/etc/kubernetes/pki# scp -rp root@192.168.10.101:/etc/kubernetes/pki/{ca.*,sa.*,front-proxy-ca.*} /etc/kubernetes/pki/  #-rp递归复制目录时保留权限
root@k8s-master03:/etc/kubernetes/pki# scp -rp root@192.168.10.101:/etc/kubernetes/pki/etcd/ca.* /etc/kubernetes/pki/etcd/

image-20250721145206258

加入master节点

另外两台机器都需要操作

kubeadm join 192.168.10.100:6443 --token pqf2kd.muipoy10ung4c6fm \
	--discovery-token-ca-cert-hash sha256:caa227a66409901e6333e6e838868ffc0cfc2ca1bf937d67016d8e76c7664975 \
	--control-plane 

image-20250721142744865

image-20250721142802063

image-20250721145457795

image-20250721145512323

image-20250721145608092

	mkdir -p $HOME/.kube
	sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
	sudo chown $(id -u):$(id -g) $HOME/.kube/config

image-20250721150037776

node节点加入命令

我这里不演示了,你有node节点,使用脚本安装后k8s,执行这个命令加入就可以了

kubeadm join 192.168.10.100:6443 --token pqf2kd.muipoy10ung4c6fm \
	--discovery-token-ca-cert-hash sha256:caa227a66409901e6333e6e838868ffc0cfc2ca1bf937d67016d8e76c7664975 

安装k8s命令补全插件

#k8s命令太多了,非常有必要配置补全

apt install bash-completion -y
source /usr/share/bash-completion/bash_completion
source <(kubectl completion bash)
echo "source <(kubectl completion bash)" >> ~/.bashrc

查看etcd数据库

3节点的,最多可以坏一台。

apt  install etcd-client -y

root@k8s-master01:~# ETCDCTL_API=3  etcdctl  -w table --endpoints=https://127.0.0.1:2379 --cacert="/etc/kubernetes/pki/etcd/ca.crt" --cert="/etc/kubernetes/pki/etcd/healthcheck-client.crt" --key="/etc/kubernetes/pki/etcd/healthcheck-client.key" member list
+------------------+---------+--------------+-----------------------------+-----------------------------+
|        ID        | STATUS  |     NAME     |         PEER ADDRS          |        CLIENT ADDRS         |
+------------------+---------+--------------+-----------------------------+-----------------------------+
|  5759d94160392a7 | started | k8s-master02 | https://192.168.10.102:2380 | https://192.168.10.102:2379 |
| 7d48497ef5c85dee | started | k8s-master03 | https://192.168.10.103:2380 | https://192.168.10.103:2379 |
| 8f0b2f8f5a943bdf | started | k8s-master01 | https://192.168.10.101:2380 | https://192.168.10.101:2379 |
+------------------+---------+--------------+-----------------------------+-----------------------------+





root@k8s-master01:~# ETCDCTL_API=3  etcdctl  --endpoints=https://192.168.10.101:2379,https://192.168.10.102:2379,https://192.168.10.103:2379 --cacert="/etc/kubernetes/pki/etcd/ca.crt" --cert="/etc/kubernetes/pki/etcd/healthcheck-client.crt" --key="/etc/kubernetes/pki/etcd/healthcheck-client.key"   endpoint status --write-out=table
+-----------------------------+------------------+---------+---------+-----------+-----------+------------+
|          ENDPOINT           |        ID        | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+-----------------------------+------------------+---------+---------+-----------+-----------+------------+
| https://192.168.10.101:2379 | 8f0b2f8f5a943bdf |  3.5.15 |  2.1 MB |      true |         2 |       7362 |
| https://192.168.10.102:2379 |  5759d94160392a7 |  3.5.15 |  2.1 MB |     false |         2 |       7362 |
| https://192.168.10.103:2379 | 7d48497ef5c85dee |  3.5.15 |  2.0 MB |     false |         2 |       7362 |
+-----------------------------+------------------+---------+---------+-----------+-----------+------------+

image-20250721150839611

模拟k8s节点故障

image-20250721151447086

image-20250721151940301

posted @ 2025-07-21 16:23  挖挖挖  阅读(43)  评论(0)    收藏  举报