1. k8s部署集群
环境准备(这是v1.33.4 ,新版去看官方文档,很详细)
3个节点,系统为:ubuntu-24.04.1-live-server-amd64,最小安装。
安装vim
sudo apt update
sudo apt install -y vim
IPV4转发
# 临时开启(立即生效)
sudo sysctl -w net.ipv4.ip_forward=1
# 永久开启(下次重启仍然生效)
echo "net.ipv4.ip_forward = 1" | sudo tee -a /etc/sysctl.conf
sudo sysctl -p
br_netfilter
作用:br_netfilter模块可以使 iptables 规则可以在 Linux Bridges 上面工作,用于将桥接的流量转发至iptables链
sudo modprobe br_netfilter
echo "br_netfilter" | sudo tee /etc/modules-load.d/br_netfilter.conf
sudo modprobe overlay
sudo tee /etc/sysctl.d/k8s.conf<<EOF
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF
sudo sysctl --system
# 确保 net.ipv4.ip_forward=1
修改host
sudo vim /etc/hosts
192.168.236.101 master
192.168.236.102 node1
192.168.236.103 node2
禁用 SELINUX(ubuntu除外)
setenforce 0
# 使用下面命令验证是否禁用成功
cat /etc/selinux/config
SELINUX=disabled
禁用原因:
-
SELINUX安全机制较复杂,可能会与k8s本身的流量机制冲突,因为k8s本身会在netfilter里设置
流量规则,也即:iptables规则 -
这是允许容器访问主机文件系统所必需的,而这些操作是为了例如 Pod 网络工作正常,
关闭swap 分区
swap:类似windows的虚拟内存,物理内存不够时,把部分内存数据换到磁盘存储,以释放 RAM 给活跃进程
# 临时
swapoff -a
# 永久
sudo vim /etc/fstab
# 注释掉最后一行 /swap.img
sudo reboot
禁用原因:
- 高负载下导致使用过多磁盘存储,导致K8s整体性能下降并有可能导致应用程序的崩溃。
安装 Containerd(容器运行时)
安装
# 更新索引
sudo apt update
# 安装依赖
sudo apt install -y apt-transport-https ca-certificates curl software-properties-common
# 添加密钥
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
# Docker APT 软件源
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
# 更新
sudo apt update
# 安装
sudo apt-get install -y containerd.io
# 验证
containerd --version
# 开机自启
sudo systemctl restart containerd
sudo systemctl enable containerd
sudo systemctl status containerd
配置修改
# 生成配置文件
sudo mkdir -p /etc/containerd
containerd config default | sudo tee /etc/containerd/config.toml
sudo vim /etc/containerd/config.toml
# 找到配置并修改 SystemdCgroup 为 true
# [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]"")
# [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
# SystemdCgroup = true
# ....
原因:
- 对于使用 systemd 作为 init system 的 Linux 的发行版,使用 systemd 作为容器的 cgroup driver 可以确保节点在资源紧张的情况更加稳定,所以推荐将 containerd 的 cgroup driver 配置为 systemd。
加速地址
sudo vim /etc/containerd/config.toml
修改如下(添加仓库地址和修改image地址):
[plugins."io.containerd.grpc.v1.cri"]
...
# sandbox_image = "registry.k8s.io/pause:3.8"
sandbox_image = "registry.aliyuncs.com/k8sxio/pause:3.8"
...
[plugins."io.containerd.grpc.v1.cri".registry]
config_path = ""
[plugins."io.containerd.grpc.v1.cri".registry.auths]
[plugins."io.containerd.grpc.v1.cri".registry.configs]
[plugins."io.containerd.grpc.v1.cri".registry.headers]
[plugins."io.containerd.grpc.v1.cri".registry.mirrors]
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
endpoint = ["https://yls4sbo2.mirror.aliyuncs.com"]
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."k8s.gcr.io"]
endpoint = ["https://registry.aliyuncs.com/k8sxio"]
# 重启
sudo systemctl daemon-reload
sudo systemctl restart containerd
初始化集群
安装Kubeadm
# 更新索引
sudo apt-get update
# 安装依赖
sudo apt-get install -y apt-transport-https ca-certificates curl
# 添加密钥
curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.33/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
# 添加软件源
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.33/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list
# 更新索引
sudo apt-get update
# 安装 kubelet、kubeadm、kubectl
sudo apt-get install -y kubelet kubeadm kubectl
sudo apt-mark hold kubelet kubeadm kubectl
# 验证
kubeadm version
kubectl version --client
# 开机自启
sudo systemctl enable --now kubelet
这里用的是 containerd,可以到各个节点上看什么情况
# 列出正在运行的容器
crictl ps
# 列出所有容器(包括已退出的)
crictl ps -a
# 列出节点上的镜像
crictl images
# 拉取镜像
crictl pull <image>
# 删除镜像
crictl rmi <image>
# 查看容器或 Pod 的详细信息
crictl inspect <container-id|pod-id>
# 查看容器日志
crictl logs <container-id>
# 查看容器资源使用(CPU/内存)
crictl stats
# 停止容器
crictl stop <container-id>
# 删除容器
crictl rm <container-id>
# 列出 Pod
crictl pods
# 查看 Pod 级资源使用情况
crictl podstats
# 运行 Pod(需要 runtime spec 配置文件)
crictl runp <pod-config-file>
# 停止 Pod
crictl stopp <pod-id>
# 删除 Pod
crictl rmp <pod-id>
复制镜像
到这里,初始化已经完成,可以拷贝镜像,固定IP了。
修改IP地址
sudo vim /etc/netplan/50-cloud-init.yaml
# 修改如下
network:
version: 2
renderer: networkd
ethernets:
ens33:
dhcp4: no
addresses:
- 192.168.236.102/24
routes:
- to: default
via: 192.168.236.2
nameservers:
addresses:
- 8.8.8.8
- 114.114.114.114
# 重启生效
sudo netplan apply
sudo systemctl restart systemd-networkd
初始化master
在master 节点上执行命令,打印默认初始化配置
kubeadm config print init-defaults --component-configs KubeletConfiguration > kubeadm.yaml
然后根据我们自己的需求修改配置。
apiVersion: kubeadm.k8s.io/v1beta4
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
# 本节点对外公布的 API 地址(通常是宿主机 IP)
advertiseAddress: 192.168.236.101
bindPort: 6443
nodeRegistration:
criSocket: unix:///var/run/containerd/containerd.sock
imagePullPolicy: IfNotPresent
imagePullSerial: true
# 节点名称
name: master
# 禁止调度master
taints:
- key: "node-role.kubernetes.io/control-plane"
effect: "NoSchedule"
timeouts:
controlPlaneComponentHealthCheck: 4m0s
discovery: 5m0s
etcdAPICall: 2m0s
kubeletHealthCheck: 4m0s
kubernetesAPICall: 1m0s
tlsBootstrap: 5m0s
upgradeManifests: 5m0s
---
apiServer: {}
apiVersion: kubeadm.k8s.io/v1beta4
caCertificateValidityPeriod: 87600h0m0s
certificateValidityPeriod: 8760h0m0s
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns: {}
encryptionAlgorithm: RSA-2048
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: registry.k8s.io
kind: ClusterConfiguration
kubernetesVersion: 1.33.0
networking:
dnsDomain: cluster.local
serviceSubnet: 10.96.0.0/12
# Flannel 使用的 Pod 网段
podSubnet: 10.244.0.0/16
proxy: {}
scheduler: {}
---
apiVersion: kubelet.config.k8s.io/v1beta1
authentication:
anonymous:
enabled: false
webhook:
cacheTTL: 0s
enabled: true
x509:
clientCAFile: /etc/kubernetes/pki/ca.crt
authorization:
mode: Webhook
webhook:
cacheAuthorizedTTL: 0s
cacheUnauthorizedTTL: 0s
cgroupDriver: systemd
clusterDNS:
- 10.96.0.10
clusterDomain: cluster.local
containerRuntimeEndpoint: ""
cpuManagerReconcilePeriod: 0s
crashLoopBackOff: {}
evictionPressureTransitionPeriod: 0s
fileCheckFrequency: 0s
healthzBindAddress: 127.0.0.1
healthzPort: 10248
httpCheckFrequency: 0s
imageMaximumGCAge: 0s
imageMinimumGCAge: 0s
kind: KubeletConfiguration
logging:
flushFrequency: 0
options:
json:
infoBufferSize: "0"
text:
infoBufferSize: "0"
verbosity: 0
memorySwap: {}
nodeStatusReportFrequency: 0s
nodeStatusUpdateFrequency: 0s
rotateCertificates: true
runtimeRequestTimeout: 0s
shutdownGracePeriod: 0s
shutdownGracePeriodCriticalPods: 0s
staticPodPath: /etc/kubernetes/manifests
streamingConnectionIdleTimeout: 0s
syncFrequency: 0s
volumeStatsAggPeriod: 0s
备注版本:
# ------------------- kubeadm InitConfiguration -------------------
apiVersion: kubeadm.k8s.io/v1beta4 # kubeadm API 版本
kind: InitConfiguration # 初始化单节点的配置
bootstrapTokens: # kubeadm join 时使用的 bootstrap token
- groups:
- system:bootstrappers:kubeadm:default-node-token # token 属于的用户组
token: abcdef.0123456789abcdef # token 值(生产环境应随机生成)
ttl: 24h0m0s # token 有效期 24 小时
usages: # token 用途
- signing # 用于签名
- authentication # 用于认证
localAPIEndpoint: # kube-apiserver 对外监听的地址
advertiseAddress: 1.2.3.4 # 本节点对外公布的 API 地址(通常是宿主机 IP)
bindPort: 6443 # kube-apiserver 监听端口(默认 6443)
nodeRegistration: # 节点注册配置
criSocket: unix:///var/run/containerd/containerd.sock # CRI socket(containerd)
imagePullPolicy: IfNotPresent # 镜像拉取策略(本地有则不拉)
imagePullSerial: true # 镜像顺序拉取
name: node # 节点名称
taints: null # 节点污点(这里是 null,表示无污点)
timeouts: # kubeadm 内部超时设置
controlPlaneComponentHealthCheck: 4m0s # 控制面组件健康检查超时
discovery: 5m0s # 节点发现超时
etcdAPICall: 2m0s # etcd API 调用超时
kubeletHealthCheck: 4m0s # kubelet 健康检查超时
kubernetesAPICall: 1m0s # kube-apiserver 调用超时
tlsBootstrap: 5m0s # TLS 启动超时
upgradeManifests: 5m0s # 升级静态 Pod 清单超时
---
# ------------------- kubeadm ClusterConfiguration -------------------
apiVersion: kubeadm.k8s.io/v1beta4 # kubeadm API 版本
kind: ClusterConfiguration # 集群全局配置
clusterName: kubernetes # 集群名字
kubernetesVersion: 1.33.0 # 集群版本(kubeadm 初始化时会拉取对应版本的镜像)
certificatesDir: /etc/kubernetes/pki # 证书存放目录
caCertificateValidityPeriod: 87600h0m0s # 根 CA 证书有效期(10 年)
certificateValidityPeriod: 8760h0m0s # 普通证书有效期(1 年)
imageRepository: registry.k8s.io # 默认镜像仓库(拉取 k8s 组件镜像)
encryptionAlgorithm: RSA-2048 # 使用的加密算法
etcd: # etcd 配置
local: # 使用本地 etcd
dataDir: /var/lib/etcd # etcd 数据目录
networking: # 网络配置
dnsDomain: cluster.local # 集群 DNS 域名
serviceSubnet: 10.96.0.0/12 # Service 虚拟 IP 网段
apiServer: {} # kube-apiserver 配置(空表示默认)
controllerManager: {} # kube-controller-manager 配置
scheduler: {} # kube-scheduler 配置
dns: {} # kube-dns/CoreDNS 配置
proxy: {} # kube-proxy 配置
---
# ------------------- KubeletConfiguration -------------------
apiVersion: kubelet.config.k8s.io/v1beta1 # kubelet API 版本
kind: KubeletConfiguration # kubelet 配置
authentication: # kubelet 认证配置
anonymous:
enabled: false # 禁止匿名访问 kubelet
webhook:
cacheTTL: 0s # webhook 缓存时间(0 表示禁用缓存)
enabled: true # 开启 webhook 认证
x509:
clientCAFile: /etc/kubernetes/pki/ca.crt # 客户端 CA 证书
authorization: # kubelet 授权配置
mode: Webhook # 授权模式:Webhook
webhook:
cacheAuthorizedTTL: 0s # 已授权缓存(禁用)
cacheUnauthorizedTTL: 0s # 未授权缓存(禁用)
cgroupDriver: systemd # 使用 systemd 管理 cgroup
clusterDNS:
- 10.96.0.10 # 集群内 DNS 地址(CoreDNS 服务 IP)
clusterDomain: cluster.local # 集群 DNS 域名
containerRuntimeEndpoint: "" # 容器运行时接口(留空表示默认)
cpuManagerReconcilePeriod: 0s # CPU 管理器调和周期
crashLoopBackOff: {} # kubelet 容器崩溃回退设置
evictionPressureTransitionPeriod: 0s # 驱逐压力状态切换周期
fileCheckFrequency: 0s # 文件检测频率
healthzBindAddress: 127.0.0.1 # kubelet 健康检查绑定地址
healthzPort: 10248 # kubelet 健康检查端口
httpCheckFrequency: 0s # HTTP 检测频率
imageMaximumGCAge: 0s # 镜像最长保留时间
imageMinimumGCAge: 0s # 镜像最短保留时间
logging: # 日志配置
flushFrequency: 0 # 刷新频率
options:
json:
infoBufferSize: "0" # JSON 日志缓冲大小
text:
infoBufferSize: "0" # 文本日志缓冲大小
verbosity: 0 # 日志详细等级(0=普通)
memorySwap: {} # 内存 swap 设置
nodeStatusReportFrequency: 0s # 节点状态上报频率
nodeStatusUpdateFrequency: 0s # 节点状态更新频率
rotateCertificates: true # 自动轮换证书
runtimeRequestTimeout: 0s # 运行时请求超时
shutdownGracePeriod: 0s # 节点关闭优雅退出时间
shutdownGracePeriodCriticalPods: 0s # 关键 Pod 优雅退出时间
staticPodPath: /etc/kubernetes/manifests # 静态 Pod 路径
streamingConnectionIdleTimeout: 0s # 流式连接空闲超时
syncFrequency: 0s # 同步频率
volumeStatsAggPeriod: 0s # 卷统计聚合周期
配置完可以预先拉镜像试试
ubuntu@ubuntu:~$ sudo kubeadm config images pull --config kubeadm.yaml
[config/images] Pulled registry.k8s.io/kube-apiserver:v1.33.0
[config/images] Pulled registry.k8s.io/kube-controller-manager:v1.33.0
[config/images] Pulled registry.k8s.io/kube-scheduler:v1.33.0
[config/images] Pulled registry.k8s.io/kube-proxy:v1.33.0
[config/images] Pulled registry.k8s.io/coredns/coredns:v1.12.0
[config/images] Pulled registry.k8s.io/pause:3.10
[config/images] Pulled registry.k8s.io/etcd:3.5.21-0
ubuntu@ubuntu:~$
初始化
sudo kubeadm init --config kubeadm.yaml
# 记录下最后输出
kubeadm join 192.168.236.101:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:dec1d4e189cb54f7c4a6a77e3d9a590f31e089d8e5d6f92a6a42124b1b2229c9
# 拷贝kubeconfig文件
sudo mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
# 验证
kubectl get nodes
NAME STATUS ROLES AGE VERSION
master NotReady control-plane 2m59s v1.33.4
配置网络插件
一个节点操作即可。
kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
# 等待节点准备就绪
kubectl get pods -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-flannel kube-flannel-ds-66rxl 1/1 Running 1 (2s ago) 5s
kube-flannel kube-flannel-ds-khjhj 1/1 Running 1 (2s ago) 5s
kube-flannel kube-flannel-ds-mh2n4 1/1 Running 1 (3s ago) 5s
kube-system coredns-674b8bbfcf-8xqdw 0/1 ContainerCreating 0 140m
kube-system coredns-674b8bbfcf-gd68t 0/1 ContainerCreating 0 140m
kube-system etcd-master 1/1 Running 0 140m
kube-system kube-apiserver-master 1/1 Running 0 140m
kube-system kube-controller-manager-master 1/1 Running 0 140m
kube-system kube-proxy-bt64q 1/1 Running 0 140m
kube-system kube-proxy-j7lgt 1/1 Running 0 15m
kube-system kube-proxy-q5mtp 1/1 Running 0 15m
kube-system kube-scheduler-master 1/1 Running 0 140m
# 查看网络
kubectl describe node master
kubectl describe node node1
kubectl describe node node2
# 等一会儿 看看 nodes 和 pods 状态
kubectl get nodes
kubectl get pods -A
# 持续观察
watch kubectl get pods -A
watch kubectl get nodes
# 查看日志
journalctl -u kubelet -f
# 查看具体的pod报错
kubectl logs -n <namespace> <pod-name>
kubectl logs -n kubernetes-dashboard kubernetes-dashboard-kong-648658d45f-jz4bh
清理操作
sudo rm -rf /etc/cni/net.d/*
kubectl delete -f https://github.com/flannel-io/flannel/releases/latest/download/kube-flannel.yml
# 重启加载
sudo systemctl restart kubelet
kubectl delete daemonset -n kube-flannel kube-flannel-ds
节点加入操作
kubeadm join 192.168.236.101:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:dec1d4e189cb54f7c4a6a77e3d9a590f31e089d8e5d6f92a6a42124b1b2229c9 \
--node-name node1
清理操作
sudo kubeadm reset -f
sudo systemctl stop kubelet
sudo rm -rf /etc/kubernetes /var/lib/kubelet /var/lib/cni /var/run/kubernetes
sudo systemctl restart kubelet
安装面板(后面换成 Kuboard或者Rancher)
helm 安装(官方推荐)
curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
helm version
安装面板
# 添加 kubernetes-dashboard 仓库
helm repo add kubernetes-dashboard https://kubernetes.github.io/dashboard/
# 使用 kubernetes-dashboard Chart 部署名为 `kubernetes-dashboard` 的 Helm Release
helm upgrade --install kubernetes-dashboard kubernetes-dashboard/kubernetes-dashboard --create-namespace --namespace kubernetes-dashboard
# 默认 Dashboard 是 ClusterIP 类型,只能在集群内部访问。
# 通过 NodePort方式暴露
kubectl -n kubernetes-dashboard edit svc kubernetes-dashboard-web
# 将 type: ClusterIP 改为 type: NodePort 或者直接执行下面这个
# kubectl -n kubernetes-dashboard patch svc kubernetes-dashboard-web -p '{"spec": {"type": "NodePort"}}'
kubectl -n kubernetes-dashboard patch svc kubernetes-dashboard-web -p '{"spec": {"type": "NodePort"}}'
# 查看IP和端口
kubectl get svc -n kubernetes-dashboard
ubuntu@ubuntu:~$ kubectl get svc -n kubernetes-dashboard
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kubernetes-dashboard-api ClusterIP 10.110.248.210 <none> 8000/TCP 14m
kubernetes-dashboard-auth ClusterIP 10.107.79.232 <none> 8000/TCP 14m
kubernetes-dashboard-kong-proxy ClusterIP 10.110.94.152 <none> 443/TCP 14m
kubernetes-dashboard-metrics-scraper ClusterIP 10.108.193.38 <none> 8000/TCP 14m
kubernetes-dashboard-web NodePort 10.96.248.181 <none> 8000:32297/TCP 14m
# 访问 http://192.168.236.101:32297/#/login
# 生成 admin账号
kubectl create serviceaccount admin -n kubernetes-dashboard
# 给 admin ServiceAccount 绑定 ClusterRole(管理员权限)
kubectl create clusterrolebinding admin --clusterrole=cluster-admin --serviceaccount=kubernetes-dashboard:admin
# 获取token
kubectl -n kubernetes-dashboard create token admin
证书问题
# 临时处理
kubectl -n kubernetes-dashboard port-forward --address 0.0.0.0 svc/kubernetes-dashboard-kong-proxy 8443:443
# 重启kong副本
kubectl delete pod -n kubernetes-dashboard -l app.kubernetes.io/name=kong
卸载
helm uninstall kubernetes-dashboard --namespace kubernetes-dashboard
kubectl delete namespace kubernetes-dashboard
清理集群
master 节点
kubectl delete -f https://github.com/flannel-io/flannel/releases/latest/download/kube-flannel.yml
kubectl delete daemonset -n kube-flannel kube-flannel-ds
sudo kubeadm reset -f
sudo rm -rf /etc/cni/
sudo rm -rf /var/lib/etcd
sudo rm -rf $HOME/.kube
sudo systemctl restart kubelet
sudo ip link delete flannel.1
sudo ip link delete cni0
node 节点
sudo kubeadm reset -f
sudo rm -rf /etc/cni/net.d
sudo systemctl restart kubelet
sudo ip link delete flannel.1
sudo ip link delete cni0

浙公网安备 33010602011771号