k8s 监控之Prometheus部署安装
部署监控pod https://github.com/ruidongchenxi/k8s-ack/blob/main/node-export.yaml
[root@k8s-master cka]# cat node-export.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
namespace: monitor-sa
labels:
name: node-exporter
spec:
selector:
matchLabels:
name: node-exporter
template:
metadata:
labels:
name: node-exporter
spec:
hostPID: true
hostIPC: true
hostNetwork: true
containers:
- name: node-exporter
image: prom/node-exporter:v0.16.0
imagePullPolicy: IfNotPresent
ports:
- containerPort: 9100
resources:
requests:
cpu: 0.15
securityContext:
privileged: true
args:
- --path.procfs
- /host/proc
- --path.sysfs
- /host/sys
- --collector.filesystem.ignored-mount-points
- '"^/(sys|proc|dev|host|etc)($|/)"'
volumeMounts:
- name: dev
mountPath: /host/dev
- name: proc
mountPath: /host/proc
- name: sys
mountPath: /host/sys
- name: rootfs
mountPath: /rootfs
tolerations:
- key: "node-role.kubernetes.io/master"
operator: "Exists"
effect: "NoSchedule"
volumes:
- name: proc
hostPath:
path: /proc
- name: dev
hostPath:
path: /dev
- name: sys
hostPath:
path: /sys
- name: rootfs
hostPath:
path: /
[root@k8s-master cka]# kubectl create ns monitor-sa
namespace/monitor-sa created
[root@k8s-master cka]# kubectl apply -f node-export.yaml
daemonset.apps/node-exporter created
查看cpu监控指标
[root@k8s-master cka]# curl http://192.168.10.50:9100/metrics | grep cpu
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 63228 100 63228 0 0 5437k 0 --:--:-- --:--:-- --:--:-- 5613k
# HELP go_memstats_gc_cpu_fraction The fraction of this program's available CPU time used by the GC since the program started.
# TYPE go_memstats_gc_cpu_fraction gauge
go_memstats_gc_cpu_fraction 4.159327529117454e-06
# HELP node_cpu_guest_seconds_total Seconds the cpus spent in guests (VMs) for each mode.
# TYPE node_cpu_guest_seconds_total counter
node_cpu_guest_seconds_total{cpu="0",mode="nice"} 0
node_cpu_guest_seconds_total{cpu="0",mode="user"} 0
node_cpu_guest_seconds_total{cpu="1",mode="nice"} 0
node_cpu_guest_seconds_total{cpu="1",mode="user"} 0
node_cpu_guest_seconds_total{cpu="2",mode="nice"} 0
node_cpu_guest_seconds_total{cpu="2",mode="user"} 0
node_cpu_guest_seconds_total{cpu="3",mode="nice"} 0
node_cpu_guest_seconds_total{cpu="3",mode="user"} 0
# HELP node_cpu_seconds_total Seconds the cpus spent in each mode.
# TYPE node_cpu_seconds_total counter
node_cpu_seconds_total{cpu="0",mode="idle"} 82923.84
node_cpu_seconds_total{cpu="0",mode="iowait"} 46.33
node_cpu_seconds_total{cpu="0",mode="irq"} 0
node_cpu_seconds_total{cpu="0",mode="nice"} 0.04
node_cpu_seconds_total{cpu="0",mode="softirq"} 7.79
node_cpu_seconds_total{cpu="0",mode="steal"} 0
node_cpu_seconds_total{cpu="0",mode="system"} 51.35
node_cpu_seconds_total{cpu="0",mode="user"} 58.6
node_cpu_seconds_total{cpu="1",mode="idle"} 82673.51
node_cpu_seconds_total{cpu="1",mode="iowait"} 1.07
node_cpu_seconds_total{cpu="1",mode="irq"} 0
node_cpu_seconds_total{cpu="1",mode="nice"} 0.04
node_cpu_seconds_total{cpu="1",mode="softirq"} 7.89
node_cpu_seconds_total{cpu="1",mode="steal"} 0
node_cpu_seconds_total{cpu="1",mode="system"} 70.05
node_cpu_seconds_total{cpu="1",mode="user"} 80.02
node_cpu_seconds_total{cpu="2",mode="idle"} 82655.69
node_cpu_seconds_total{cpu="2",mode="iowait"} 1.05
node_cpu_seconds_total{cpu="2",mode="irq"} 0
node_cpu_seconds_total{cpu="2",mode="nice"} 0.11
node_cpu_seconds_total{cpu="2",mode="softirq"} 7.9
node_cpu_seconds_total{cpu="2",mode="steal"} 0
node_cpu_seconds_total{cpu="2",mode="system"} 72.63
node_cpu_seconds_total{cpu="2",mode="user"} 87.56
node_cpu_seconds_total{cpu="3",mode="idle"} 82495.3
node_cpu_seconds_total{cpu="3",mode="iowait"} 0.94
node_cpu_seconds_total{cpu="3",mode="irq"} 0
node_cpu_seconds_total{cpu="3",mode="nice"} 0.04
node_cpu_seconds_total{cpu="3",mode="softirq"} 11.34
node_cpu_seconds_total{cpu="3",mode="steal"} 0
node_cpu_seconds_total{cpu="3",mode="system"} 77.3
node_cpu_seconds_total{cpu="3",mode="user"} 80.99
# HELP node_memory_Percpu_bytes Memory information field Percpu_bytes.
# TYPE node_memory_Percpu_bytes gauge
node_memory_Percpu_bytes 3.407872e+07
node_scrape_collector_duration_seconds{collector="cpu"} 0.000650834
node_scrape_collector_success{collector="cpu"} 1
# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total 0.22
创建运行pod 的sa
[root@k8s-master cka]# kubectl create serviceaccount monitor -n monitor-sa serviceaccount/monitor created [root@k8s-master cka]# kubectl create clusterrolebinding monitor-clusterrolebinding -n monitor-sa --clusterrole=cluster-admin --serviceaccount=monitor-sa:monitor clusterrolebinding.rbac.authorization.k8s.io/monitor-clusterrolebinding created [root@k8s-master cka]# kubectl create clusterrolebinding monitor-clusterrolebinding-1 --clusterrole=cluster-admin --user=system:serviceaccount:monitor:monitor-sa -n monitor-sa clusterrolebinding.rbac.authorization.k8s.io/monitor-clusterrolebinding-1 created [root@k8s-master cka]# kubectl get clusterrolebinding | grep clusterrolebinding-1 monitor-clusterrolebinding-1 ClusterRole/cluster-admin 17s
创建数据目录
[root@k8s-node1 ~]# mkdir /data 您在 /var/spool/mail/root 中有新邮件 [root@k8s-node1 ~]# chmod 777 /data [root@k8s-node2 ~]# mkdir /data && chmod 777 /data 您在 /var/spool/mail/root 中有新邮件
创建prometheus数据存储目录
---
kind: ConfigMap
apiVersion: v1
metadata:
labels:
app: prometheus
name: prometheus-config
namespace: monitor-sa
data:
prometheus.yml: | # 具体配置文件
global: # 全局
scrape_interval: 15s #采集时间间隔
scrape_timeout: 10s # 采集数据超时时间
evaluation_interval: 1m # 告警检测时间间隔
scrape_configs:
- job_name: 'kubernetes-node' # 采集数据原
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:9100'
target_label: __address__
action: replace
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- job_name: 'kubernetes-node-cadvisor'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-apiserver'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
[root@k8s-master cka]# kubectl apply -f prometheus-cfg.yaml
configmap/prometheus-config created
通过deployment部署prometheus
[root@k8s-master cka]# cat prometheus-deploy.yaml
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-server
namespace: monitor-sa
labels:
app: prometheus
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
component: server
#matchExpressions:
#- {key: app, operator: In, values: [prometheus]}
#- {key: component, operator: In, values: [server]}
template:
metadata:
labels:
app: prometheus
component: server
annotations:
prometheus.io/scrape: 'false'
spec:
nodeName: k8s-node1 # 部署节点
serviceAccountName: monitor
containers:
- name: prometheus
image: prom/prometheus:v2.2.1
imagePullPolicy: IfNotPresent
command:
- prometheus
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
- --storage.tsdb.retention=720h
- --web.enable-lifecycle
ports:
- containerPort: 9090
protocol: TCP
volumeMounts:
- mountPath: /etc/prometheus
name: prometheus-config
- mountPath: /prometheus/
name: prometheus-storage-volume
volumes:
- name: prometheus-config
configMap:
name: prometheus-config
- name: prometheus-storage-volume
hostPath:
path: /data
type: Directory
[root@k8s-master cka]# kubectl apply -f prometheus-deploy.yaml
deployment.apps/prometheus-server created
部署service
[root@k8s-master cka]# cat prometheus-svc.yaml
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: monitor-sa
labels:
app: prometheus
spec:
type: NodePort
ports:
- port: 9090
targetPort: 9090
protocol: TCP
selector:
app: prometheus
component: server
[root@k8s-master cka]# kubectl apply -f prometheus-svc.yaml
service/prometheus created
查看pod 与service
[root@k8s-master cka]# kubectl get svc -n monitor-sa NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE prometheus NodePort 10.110.21.221 <none> 9090:30194/TCP 94s [root@k8s-master cka]# kubectl get pod -n monitor-sa NAME READY STATUS RESTARTS AGE node-exporter-8q9vb 1/1 Running 0 86m node-exporter-fv8n8 1/1 Running 0 86m node-exporter-xjzdc 1/1 Running 0 86m prometheus-server-5b5bb44bb5-7xcn7 1/1 Running 0 7m54s
浏览器访问http://192.168.10.50:30194/targets

service 添加如下注解会监控service
apiVersion: v1
kind: Service
metadata:
annotations: #添加下面注解
prometheus.io/port: "9153"
prometheus.io/scrape: "true"
creationTimestamp: "2023-10-16T19:47:18Z"
labels:
k8s-app: kube-dns
kubernetes.io/cluster-service: "true"
kubernetes.io/name: CoreDNS
name: kube-dns
namespace: kube-system
resourceVersion: "236"
uid: 7162ceef-a1a2-4da8-a4da-d387e619170d
热加载
kube-dns ClusterIP 10.96.0.10 <none> 53/UDP,53/TCP,9153/TCP 23h [root@k8s-master cka]# kubectl edit svc -n kube-system kube-dns # Please edit the object below. Lines beginning with a '#' will be ignored, # and an empty file will abort the edit. If an error occurs while saving this file will be # reopened with the relevant failures. # apiVersion: v1 kind: Service metadata: # annotations: # prometheus.io/port: "9153" # prometheus.io/scrape: "true" creationTimestamp: "2023-10-16T19:47:18Z" labels: service/kube-dns edited
加载配置
[root@k8s-master cka]# kubectl get pod -n monitor-sa -owide NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES node-exporter-8q9vb 1/1 Running 0 22h 192.168.10.51 k8s-node1 <none> <none> node-exporter-fv8n8 1/1 Running 0 22h 192.168.10.50 k8s-master <none> <none> node-exporter-xjzdc 1/1 Running 0 22h 192.168.10.52 k8s-node2 <none> <none> prometheus-server-5b5bb44bb5-7xcn7 1/1 Running 0 21h 10.244.36.65 k8s-node1 <none> <none> [root@k8s-master cka]# curl -X POST http://10.244.36.65:9090/-/reload [root@k8s-master cka]# kubectl edit svc -n kube-system kube-dns service/kube-dns edited [root@k8s-master cka]# curl -X POST http://10.244.36.65:9090/-/reload
草都可以从石头缝隙中长出来更可况你呢

浙公网安备 33010602011771号