k8s搭建prometheus监控集群
一、搭建prometheus
1、存储采用localpv的方式
创建prometheus-pv.yaml
apiVersion: v1
kind: PersistentVolume
metadata:
name: prometheus-pv
namespace: monitoring
labels:
type: local
spec:
capacity:
storage: 20Gi
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
storageClassName: local-storage
local:
path: "/data"
nodeAffinity:
required:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- k8s-node1
创建StorageClass.yaml
kind: StorageClass apiVersion: storage.k8s.io/v1 metadata: name: local-storage namespace: monitoring provisioner: kubernetes.io/no-provisioner volumeBindingMode: WaitForFirstConsumer reclaimPolicy: Retain
创建prometheus-pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: prometheus-pvc
namespace: monitoring
labels:
app: prometheus-pvc
spec:
accessModes:
- ReadWriteOnce #此处需要和pv对应才能匹配
resources:
requests:
storage: 20Gi
storageClassName: local-storage #此处需要和StorageClass.yaml匹配
创建prometheus-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: monitoring
data:
prometheus.yml: |
global:
scrape_interval: 60s
scrape_timeout: 60s
scrape_configs:
- job_name: kubernetes-cadvisor
honor_timestamps: true
scrape_interval: 1m
scrape_timeout: 1m
metrics_path: /metrics
scheme: https
kubernetes_sd_configs:
- role: node
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: false
relabel_configs:
- separator: ;
regex: __meta_kubernetes_node_label_(kubernetes_io_hostname)
replacement: $1
action: labelmap
- separator: ;
regex: (.*)
target_label: __address__
replacement: kubernetes.default.svc:443
action: replace
- source_labels: [__meta_kubernetes_node_name]
separator: ;
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
action: replace
- job_name: kubernetes-service-endpoints
honor_timestamps: true
scrape_interval: 1m
scrape_timeout: 1m
metrics_path: /metrics
scheme: http
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
separator: ;
regex: "true"
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
separator: ;
regex: (https?)
target_label: __scheme__
replacement: $1
action: replace
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
separator: ;
regex: (.+)
target_label: __metrics_path__
replacement: $1
action: replace
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
separator: ;
regex: ([^:]+)(?::\d+)?;(\d+)
target_label: __address__
replacement: $1:$2
action: replace
- separator: ;
regex: __meta_(kubernetes_namespace|kubernetes_pod_name|kubernetes_pod_host_ip)
replacement: $1
action: labelmap
- separator: ;
regex: __meta_kubernetes_pod_label_(app_kubernetes_io_name)
replacement: $1
action: labelmap
- separator: ;
regex: __meta_kubernetes_service_label_(k8s_app)
replacement: $1
action: labelmap
- source_labels: [kubernetes_pod_name]
separator: ;
regex: .*kube-state-metrics.*
target_label: kubernetes_pod_name
action: replace
- source_labels: [__meta_kubernetes_pod_container_name]
separator: ;
regex: (.*)
target_label: kubernetes_name
replacement: $1
action: replace
- source_labels: [kubernetes_name]
separator: ;
regex: .*kube-state-metrics.*
target_label: kubernetes_name
action: replace
- job_name: kubernetes-pods
honor_timestamps: true
scrape_interval: 1m
scrape_timeout: 1m
metrics_path: /metrics
scheme: http
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
separator: ;
regex: "true"
replacement: $1
action: keep
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
separator: ;
regex: (.+)
target_label: __metrics_path__
replacement: $1
action: replace
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
separator: ;
regex: ([^:]+)(?::\d+)?;(\d+)
target_label: __address__
replacement: $1:$2
action: replace
- separator: ;
regex: __meta_kubernetes_pod_label_(k8s_app|dolphin_part_of_instance|app_kubernetes_io_component|app_kubernetes_io_instance|dolphin_client_id|app_kubernetes_io_part_of|app_kubernetes_io_name|name|release)
replacement: $1
action: labelmap
- separator: ;
regex: __meta_(kubernetes_namespace|kubernetes_pod_name|kubernetes_pod_host_ip)
replacement: $1
action: labelmap
- source_labels: [kubernetes_pod_name]
separator: ;
regex: (node-exporter.*|process-exporter.*)
target_label: kubernetes_pod_name
action: replace
- source_labels: [name]
separator: ;
regex: (.*node-exporter|.*process-exporter)
target_label: name
action: replace
- source_labels: [__meta_kubernetes_pod_host_ip]
separator: ;
regex: (.*)
target_label: node
replacement: k8s-$1
action: replace
创建prometheus.yaml,pod的配置
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus
namespace: monitoring
labels:
app: prometheus
spec:
selector:
matchLabels:
app: prometheus
template:
metadata:
labels:
app: prometheus
spec:
serviceAccountName: prometheus
containers:
- image: prom/prometheus:v2.29.1
name: prometheus
command:
- "/bin/prometheus"
args:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention=14d"
- "--web.enable-admin-api" # 控制对admin HTTP API的访问,其中包括删除时间序列等功能
- "--web.enable-lifecycle" # 支持热更新,直接执行localhost:9090/-/reload立即生效
ports:
- containerPort: 9090
protocol: TCP
name: http
volumeMounts:
- mountPath: "/prometheus"
subPath: prometheus
name: data
- mountPath: "/etc/prometheus"
name: config-volume
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 500m
memory: 1024Mi
securityContext:
runAsUser: 0
volumes:
- name: data
persistentVolumeClaim:
claimName: prometheus-pvc
- configMap:
name: prometheus-config
name: config-volume
二、搭建exporter采集器(node-exporter、kube-state-metrics)
1、搭建node-exporter
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
namespace: monitoring
spec:
selector:
matchLabels:
app: node-exporter
template:
metadata:
annotations:
prometheus.io/path: /metrics
prometheus.io/port: "9100"
prometheus.io/scrape: "true"
labels:
app: node-exporter
spec:
hostNetwork: true
containers:
- name: node-exporter
image: prom/node-exporter:v1.2.2
ports:
- containerPort: 9100
imagePullPolicy: IfNotPresent
command:
- /bin/node_exporter
- --path.procfs
- /host/proc
- --path.sysfs
- /host/sys
- --collector.filesystem.ignored-mount-points
- ^/(sys|proc|dev|host|etc)($|/)
volumeMounts:
- name: proc
mountPath: /host/proc
- name: sys
mountPath: /host/sys
- name: root
mountPath: /rootfs
volumes:
- name: proc
hostPath:
path: /proc
- name: sys
hostPath:
path: /sys
- name: root
hostPath:
path: /
2、创建kube-state-metrics
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
namespace: kube-system
name: kube-state-metrics
rules:
- apiGroups: [""]
resources:
- pods
verbs: ["get"]
- apiGroups: ["apps"]
resources:
- deployments
resourceNames: ["kube-state-metrics"]
verbs: ["get", "update"]
- apiGroups: ["extensions"]
resources:
- deployments
resourceNames: ["kube-state-metrics"]
verbs: ["get", "update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: kube-state-metrics
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kube-state-metrics
rules:
- apiGroups: [""]
resources:
- configmaps
- secrets
- nodes
- pods
- services
- resourcequotas
- replicationcontrollers
- limitranges
- persistentvolumeclaims
- persistentvolumes
- namespaces
- endpoints
verbs: ["list", "watch"]
- apiGroups: ["extensions"]
resources:
- daemonsets
- deployments
- replicasets
- ingresses
verbs: ["list", "watch"]
- apiGroups: ["apps"]
resources:
- daemonsets
- deployments
- replicasets
- statefulsets
verbs: ["list", "watch"]
- apiGroups: ["batch"]
resources:
- cronjobs
- jobs
verbs: ["list", "watch"]
- apiGroups: ["autoscaling"]
resources:
- horizontalpodautoscalers
verbs: ["list", "watch"]
- apiGroups: ["policy"]
resources:
- poddisruptionbudgets
verbs: ["list", "watch"]
- apiGroups: ["certificates.k8s.io"]
resources:
- certificatesigningrequests
verbs: ["list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: kube-system
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: kube-state-metrics
namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
name: kube-state-metrics
namespace: kube-system
labels:
k8s-app: kube-state-metrics
annotations:
prometheus.io/scrape: 'true'
spec:
ports:
- name: http-metrics
port: 8080
targetPort: http-metrics
protocol: TCP
- name: telemetry
port: 8081
targetPort: telemetry
protocol: TCP
selector:
k8s-app: kube-state-metrics
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: kube-state-metrics
namespace: kube-system
spec:
selector:
matchLabels:
k8s-app: kube-state-metrics
replicas: 1
template:
metadata:
labels:
k8s-app: kube-state-metrics
spec:
serviceAccountName: kube-state-metrics
containers:
- name: kube-state-metrics
image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.2.0
ports:
- name: http-metrics
containerPort: 8080
- name: telemetry
containerPort: 8081
readinessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 5
timeoutSeconds: 5
三、展示
