部署高可用kubernetes集群

部署高可用kubernetes集群

视频教程: https://www.bilibili.com/video/bv15z4y1r7Kw

#k8s二进制安装及运维教程博客

https://www.oiox.cn/index.php/155.html

#安装命令行自动补全功能

yum install bash-completion -y

source /usr/share/bash-completion/bash_completion

source <(kubectl completion bash)

echo "source <(kubectl completion bash)" >> ~/.bashrc

#查看集群状态

[root@master01 ~]# kubectl get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME STATUS MESSAGE ERROR
scheduler Unhealthy Get "http://127.0.0.1:10251/healthz": dial tcp 127.0.0.1:10251: connect: connection refused
controller-manager Unhealthy Get "http://127.0.0.1:10252/healthz": dial tcp 127.0.0.1:10252: connect: connection refused
etcd-0 Healthy {"health":"true"}

准备开始

  • 配置三台机器 kubeadm 的最低要求 给主节点
  • 配置三台机器 kubeadm 的最低要求 给工作节点
  • 在集群中,所有计算机之间的完全网络连接(公网或私网)
  • 所有机器上的 sudo 权限
  • 每台设备对系统中所有节点的 SSH 访问
  • 在所有机器上安装 kubeadm 和 kubelet,kubectl 是可选的。

  安装前准备

1、准备4台,2G或更大内存,2核或以上CPU,30G以上硬盘 物理机或云主机或虚拟机 2、系统centos 7.x

环境准备

#在本地执行,把我本机的密匙批量发送到所有主机上,就可以实现免密登录
bash ./local_copy_ssh_to_host.sh

#根据规划设置主机名(在4台机上分别运行)
hostnamectl set-hostname master01
hostnamectl set-hostname master02
hostnamectl set-hostname master03
hostnamectl set-hostname worker01

#在所有机器上执行
cat >> /etc/hosts << EOF
192.168.0.199 k8svip
192.168.0.200 master01
192.168.0.201 master02
192.168.0.202 master03
192.168.0.210 worker01
EOF

#设置免登录
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa &> /dev/null
ssh-copy-id root@master01
ssh-copy-id root@master02
ssh-copy-id root@master03
ssh-copy-id root@worker01

#关闭防火墙(在3台机运行)
systemctl stop firewalld && systemctl disable firewalld

#关闭selinux(在3台机运行)
sed -i 's/enforcing/disabled/' /etc/selinux/config && setenforce 0

#关闭swap(在3台机运行)
swapoff -a && sed -ri 's/.*swap.*/#&/' /etc/fstab

#时间同步(在3台机运行)
yum install ntpdate -y && ntpdate time.windows.com

#升级系统内核,默认3.10.0-514.el7.x86_64版本内核与docker,k8s有不稳定因素问题,最好升级到4.4.182-1.el7.elrepo

  #查看内核版本
   [root@master01 ~]# uname -a

   Linux master01 3.10.0-514.el7.x86_64 #1 SMP Tue Nov 22 16:42:41 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux

# 安装ELRepo
 rpm -Uvh http://www.elrepo.org/elrepo-release-7.0-3.el7.elrepo.noarch.rpm
   #安装完成后检查 /boot/grub2/grub.cfg 中对应内核memuentry 中是否包含initrd16配置,如果没有,再安装一次!
   yum  --enablerepo=elrepo-kernel install -y kernel-lt
 
   #设置开机从新内核启动
   grub2-set-default 'CentOS Linux (4.4.182.el7.elrepo.x86_64) 7 (Core)'  && reboot
 
  #查看内核版本
  [root@master01 ~]# uname -a

  Linux master01 34.4.182.el7.elrepo.x86_64 #1 SMP Tue Nov 22 16:42:41 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux

安装Docker
# step 1: 安装必要的一些系统工具
sudo yum install -y yum-utils device-mapper-persistent-data lvm2
# Step 2: 添加软件源信息
sudo yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
# Step 3: 更新并安装Docker-CE
sudo yum makecache fast
sudo yum -y install docker-ce-20.10.0
# Step 4: 开启Docker服务
sudo systemctl start docker && systemctl enable docker

# 注意:
# 官方软件源默认启用了最新的软件,您可以通过编辑软件源的方式获取各个版本的软件包。例如官方并没有将测试版本的软件源置为可用,您可以通过以下方式开启。同理可以开启各种测试版本等。
# vim /etc/yum.repos.d/docker-ee.repo
#   将[docker-ce-test]下方的enabled=0修改为enabled=1
#
# 安装指定版本的Docker-CE:
# Step 1: 查找Docker-CE的版本:
# yum list docker-ce.x86_64 --showduplicates | sort -r
#   Loading mirror speeds from cached hostfile
#   Loaded plugins: branch, fastestmirror, langpacks
#   docker-ce.x86_64            17.03.1.ce-1.el7.centos            docker-ce-stable
#   docker-ce.x86_64            17.03.1.ce-1.el7.centos            @docker-ce-stable
#   docker-ce.x86_64            17.03.0.ce-1.el7.centos            docker-ce-stable
#   Available Packages
# Step2: 安装指定版本的Docker-CE: (VERSION例如上面的17.03.0.ce.1-1.el7.centos)
# sudo yum -y install docker-ce-[VERSION]

# docker镜像加速,"https://s2q9fn53.mirror.aliyuncs.com"这个地址建议自己登陆阿里云,在容器镜像服务中找到。
# 可以通过修改daemon配置文件/etc/docker/daemon.json来使用加速器
sudo mkdir -p /etc/docker
sudo tee /etc/docker/daemon.json <<-'EOF'
{
  "registry-mirrors": ["https://s2q9fn53.mirror.aliyuncs.com"]
}
EOF
sudo systemctl daemon-reload && sudo systemctl restart docker

安装kubelet、kubeadm、kubectl

  • kubeadm:用来初始化集群的指令。
  • kubelet:在集群中的每个节点上用来启动 pod 和容器等。
  • kubectl:用来与集群通信的命令行工具。

kubeadm 不能 帮您安装或者管理 kubelet 或 kubectl,所以您需要确保它们与通过 kubeadm 安装的控制平面的版本相匹配。 如果不这样做,则存在发生版本偏差的风险,可能会导致一些预料之外的错误和问题。 然而,控制平面与 kubelet 间的相差一个次要版本不一致是支持的,但 kubelet 的版本不可以超过 API 服务器的版本。 例如,1.7.0 版本的 kubelet 可以完全兼容 1.8.0 版本的 API 服务器,反之则不可以。

#添加kubernetes阿里YUM源
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF

yum install -y kubelet-1.19.4 kubeadm-1.19.4 kubectl-1.19.4 && systemctl enable kubelet && systemctl start kubelet

部署Kubernetes Master

在192.168.0.200(Master)执行

#注意,kubeadm init 前,先准备k8s运行所需的容器
#可查询到kubernetes所需镜像
kubeadm config images list

#写了个sh脚本,把所需的镜像拉下来
cat >> alik8simages.sh << EOF
#!/bin/bash
list='kube-apiserver:v1.19.4
kube-controller-manager:v1.19.4
kube-scheduler:v1.19.4
kube-proxy:v1.19.4
pause:3.2
etcd:3.4.13-0
coredns:1.7.0'
for item in \$list
  do

    docker pull registry.aliyuncs.com/google_containers/\$item && docker tag registry.aliyuncs.com/google_containers/\$item k8s.gcr.io/\$item && docker rmi registry.aliyuncs.com/google_containers/\$item

  done
EOF
#运行脚本下载
bash alik8simages.sh

keepalived + haproxy 搭建高用集群

#https://github.com/kubernetes/kubeadm/blob/master/docs/ha-considerations.md#options-for-software-load-balancing
#master 上执行
yum install haproxy keepalived -y
mv /etc/keepalived/keepalived.conf /etc/keepalived/keepalived.conf.bak
mv /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy.cfg.bak

#从本地复制到master主机
scp ./etc/haproxy/haproxy.cfg root@192.168.0.200:/etc/haproxy/haproxy.cfg
scp ./etc/keepalived/check_apiserver.sh root@192.168.0.200:/etc/keepalived/check_apiserver.sh
scp ./etc/keepalived/keepalived.conf root@192.168.0.200:/etc/keepalived/keepalived.conf

scp ./etc/haproxy/haproxy.cfg root@192.168.0.201:/etc/haproxy/haproxy.cfg
scp ./etc/keepalived/check_apiserver.sh root@192.168.0.201:/etc/keepalived/check_apiserver.sh
scp ./etc/keepalived/keepalived.conf root@192.168.0.201:/etc/keepalived/keepalived.conf

scp ./etc/haproxy/haproxy.cfg root@192.168.0.202:/etc/haproxy/haproxy.cfg
scp ./etc/keepalived/check_apiserver.sh root@192.168.0.202:/etc/keepalived/check_apiserver.sh
scp ./etc/keepalived/keepalived.conf root@192.168.0.202:/etc/keepalived/keepalived.conf

#master 上执行
systemctl enable keepalived --now
systemctl enable haproxy --now
#初始化k8s集群
kubeadm init \
--control-plane-endpoint k8svip:8443 \
--kubernetes-version=v1.19.4 \
--service-cidr=10.96.0.0/12 \
--pod-network-cidr=10.244.0.0/16 \
--upload-certs

#提示initialized successfully!表示初始化成功
#To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

# You should now deploy a pod network to the cluster.
# Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
#   https://kubernetes.io/docs/concepts/cluster-administration/addons/

# You can now join any number of the control-plane node running the following command on each as root:
#复制你屏幕上显示的
  kubeadm join k8svip:8443 --token s8nl1g.samn73s2wzmpvl1x \
    --discovery-token-ca-cert-hash sha256:xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx \
    --control-plane --certificate-key xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

# Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
# As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
# "kubeadm init phase upload-certs --upload-certs" to reload certs afterward.

# Then you can join any number of worker nodes by running the following on each as root:
#复制你屏幕上显示的
kubeadm join k8svip:8443 --token s8nl1g.samn73s2wzmpvl1x \
    --discovery-token-ca-cert-hash sha256:xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

部署CNI网络插件

#下载flannel网络插件,master\worker主机都需要执行

    #kubernetes支持多种网络插件,如:flannel、calico、canal等(面试常问flannel跟calico的区别?)

    #只在master节点安装flannel插件即可,该插件使用的是DaemonSet控制器,该控制器会在每个节点上都运行

wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml 
kubectl apply -f  kube-flannel.yml 
#默认镜像地址无法访问,可以用set命令修改
sed  -i  's/quay.azk8s.cn/quay-mirror.qiniu.com/g'   kube-flannel.yml

kubectl apply -f kube-flannel.yml
#查看下集群节点状态,可以看到是notready状态,执行下网络文件就可以 ,二进制安装是kubelet和docker服务没启动
[root@master ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master NotReady master 3m16s v1.17.4
work1 NotReady <none> 17s v1.17.4
work2 NotReady <none> 32s v1.17.4
work3 NotReady <none> 32s v1.17.4

[root@master ~]# kubectl apply -f kube-flannel.yml
namespace/kube-flannel unchanged
clusterrole.rbac.authorization.k8s.io/flannel unchanged
clusterrolebinding.rbac.authorization.k8s.io/flannel unchanged
serviceaccount/flannel unchanged
configmap/kube-flannel-cfg unchanged
daemonset.apps/kube-flannel-ds unchanged

[root@master ~]# kubectl get nodes
NAME     STATUS     ROLES    AGE     VERSION
master   Ready      master   7m22s   v1.17.4
work1    Ready      worker   4m23s   v1.17.4
work2    Ready      worker   4m38s   v1.17.4
work3    Ready      worker   4m38s   v1.17.4

#新加入的node节点显示NoReady
这种情况是因为有某些关键的 pod 没有运行起来,首先使用如下命令来看一下kube-system的 pod 状态:

[root@master01 ~]# kubectl get pod -n kube-system

kube-proxy-qtg5b 1/1 Running 5 5d20h
kube-proxy-s9vhm 0/1 ContainerCreating 0 17s
kube-proxy-vgp2g 1/1 Running 6 5d21h

[root@master01 ~]# kubectl describe pod kube-proxy-gwtv7 -n kube-system

Warning FailedCreatePodSandBox 24s kubelet Failed to create pod sandbox: rpc error: code = Unknown desc = failed pulling image "k8s.gcr.io/pause:3.2": Error response from daemon: Get https://k8s.gcr.io/v2/: dial tcp 142.251.8.82:443: connect: connection timed out

[root@worker02 scripts]# docker pull registry.aliyuncs.com/google_containers/kube-proxy:v1.19.4

[root@worker02 scripts]# docker pull registry.aliyuncs.com/google_containers/pause:3.2

[root@worker02 scripts]# docker tag registry.aliyuncs.com/google_containers/kube-proxy:v1.19.4  k8s.gcr.io/kube-proxy:v1.19.4

[root@worker02 scripts]# docker tag registry.aliyuncs.com/google_containers/pause:3.2  k8s.gcr.io/pause:3.2

#安装参考
https://www.cnblogs.com/zhaohongting/p/16395198.html
问题解决记录

1、解决k8s集群在节点运行kubectl出现的错误: The connection to the server localhost:8080 was refused - did you specify the right host or port? 出现这个问题的原因是kubectl命令需要使用kubernetes-admin来运行 在master主机上复制此文件到worker主机上:/etc/kubernetes/admin.conf

#在master上运行
scp /etc/kubernetes/admin.conf root@worker01:/etc/kubernetes/
#在worker01上运行
echo "export KUBECONFIG=/etc/kubernetes/admin.conf" >> ~/.bash_profile
source ~/.bash_profile

2.nodes节点没有标签 https://blog.csdn.net/Lingoesforstudy/article/details/116484624
[root@master ~]# kubectl get nodes
NAME     STATUS     ROLES    AGE     VERSION
master   Ready      master   7m22s   v1.17.4
work1    Ready               4m23s   v1.17.4
work2    Ready               4m38s   v1.17.4

[root@master01 tools]# kubectl label no worker01 kubernetes.io/role=worker

3.报错

[WARNING SystemVerification]: this Docker version is not on the list of validated versions: 20.10.0. Latest validated version: 19.03
error execution phase preflight: [preflight] Some fatal errors occurred:
[ERROR FileContent--proc-sys-net-bridge-bridge-nf-call-iptables]: /proc/sys/net/bridge/bridge-nf-call-iptables contents are not set to 1
[preflight] If you know what you are doing, you can make a check non-fatal with `--ignore-preflight-errors=...`
To see the stack trace of this error execute with --v=5 or higher

#解决办法(开启网桥模式)

echo 1 > /proc/sys/net/bridge/bridge-nf-call-iptables
echo 1 > /proc/sys/net/bridge/bridge-nf-call-ip6tables

 4.alik8ssimages.sh脚本

[root@master01 /]# cat /server/scripts/alik8simages.sh
#!/bin/bash
list='kube-apiserver:v1.19.4
kube-controller-manager:v1.19.4
kube-scheduler:v1.19.4
kube-proxy:v1.19.4
pause:3.2
etcd:3.4.13-0
coredns:1.7.0'
for item in $list
do

docker pull registry.aliyuncs.com/google_containers/$item && docker tag registry.aliyuncs.com/google_containers/$item k8s.gcr.io/$item && docker rmi registry.aliyuncs.com/google_containers/$item

done

5.新节点worker02加入集群

当k8s集群资源不够用,则需要往集群中加node节点,而默认的token有效期为24h,当过期之后便不可用,以下内容将说明master节点重新生成token信息,同时node节点正确加入集群资源

https://www.cnblogs.com/kazihuo/p/12661238.html

# master上操作

# 生成token

[root@master1 ~]# kubeadm token create

k2dkax.gtz3odrb47owoum9

# 查看token

[root@master1 ~]# kubeadm token list

# 获取ca证书sha256编码hash值

[root@master1 ~]# openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'

eb92768acb748d722ef7d97bc60751a375b67b12a46c7a7232c54cdb378d2e61

# 需新增node节点上操作

# 配置完docker环境后安装kube相关组件(组件版本最好和master一致)

[root@node4 ~]# yum install -y kubelet-1.19.4 kubeadm-1.19.4 kubectl-1.19.4

# node节点加入集群

[root@node4 ~]# kubeadm join k8svip:6443 --token k2dkax.gtz3odrb47owoum9 --discovery-token-ca-cert-hash sha256:eb92768acb748d722ef7d97bc60751a375b67b12a46c7a7232c54cdb378d2e61

说明:--token 和 --discovery-token-ca-cert-hash 后分别填写在master上生成的信息! 

#master节点加入集群

https://www.cnblogs.com/qianyuliang/p/17044626.html

1.生成certificate-key

kubeadm init phase upload-certs --upload-certs

2.生成token

kubeadm token create --print-join-command

3.新节点加入集群

kubeadm join k8svip:6443 --token xxx --discovery-token-ca-cert-hash xxx --control-plane --certificate-key xxx
注意事项:
  1. 不要使用 --experimental-control-plane,会报错
  2. 要加上--control-plane --certificate-key ,不然就会添加为node节点而不是master
  3. join的时候节点上不要部署,如果部署了kubeadm reset后再join

 #查看kube-controller-manager和kube-scheduler运行状态和运行节点

[root@master01 scripts]# kubectl get endpoints kube-controller-manager -n kube-system -o yaml

    control-plane.alpha.kubernetes.io/leader: '{"holderIdentity":"master03_2736bcdf-60fd-460d-a25c-6b50a6dc36fe","leaseDurationSeconds":15,"acquireTime":"2023-02-15T02:57:24Z","renewTime":"2023-02-15T08:15:24Z","leaderTransitions":15}'

[root@master01 scripts]# kubectl get endpoints kube-scheduler -n kube-system -o yaml
    control-plane.alpha.kubernetes.io/leader: '{"holderIdentity":"master01_f92909da-87ba-40af-a064-7383a7e9bc9f","leaseDurationSeconds":15,"acquireTime":"2023-02-15T02:56:47Z","renewTime":"2023-02-15T08:18:09Z","leaderTransitions":14}'

#重置集群

1.所有节点 :

kubeadm resrt -f

rm -rf $HOME/.kube/config

2.重新初始化

[root@master01 ~]# which kubeadm
/usr/bin/kubeadm

kubeadm init --control-plane-endpoint k8svip:8443  \

--kubernetes-version=v1.19.4 \

--service-cidr=10.96.0.0/12 \

--pod-network-cidr=10.244.0.0/16 \

--upload-certs

3.初始化失败,重置kubeadm ,使用/usr/bin/kubeadm加入master节点 ,/usr/local/bin/kubeadm会报错

[root@master03 ~]# which kubeadm
/usr/local/bin/kubeadm
[root@master03 ~]# /usr/bin/kubeadm join k8svip:8443 --token jralh9.danq8t0cqyurszu6 \
> --discovery-token-ca-cert-hash sha256:147dce999161b1853c6cd8348dd04e2ba72d7aaded375445660cd0152aa10219 \
> --control-plane --certificate-key 09ddc6e545cb2cf6b5244c473e0a3627d356caf402c798f4718cd3f359d9d27d

 

posted @ 2023-02-07 17:31  跳出圈子  阅读(204)  评论(0)    收藏  举报