k8s搭建prometheus监控集群

一、搭建prometheus

1、存储采用localpv的方式

创建prometheus-pv.yaml

apiVersion: v1
kind: PersistentVolume
metadata:
  name: prometheus-pv
  namespace: monitoring
  labels:
    type: local
spec:
  capacity:
    storage: 20Gi
  accessModes:
  - ReadWriteOnce
  persistentVolumeReclaimPolicy: Retain
  storageClassName: local-storage
  local:
    path: "/data"
  nodeAffinity:
    required:
      nodeSelectorTerms:
      - matchExpressions:
        - key:  kubernetes.io/hostname
          operator: In
          values:
          - k8s-node1

  创建StorageClass.yaml

kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
  name: local-storage
  namespace: monitoring
provisioner: kubernetes.io/no-provisioner
volumeBindingMode: WaitForFirstConsumer
reclaimPolicy: Retain

  创建prometheus-pvc.yaml

apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: prometheus-pvc
  namespace: monitoring
  labels:
    app: prometheus-pvc
spec:
  accessModes:
  - ReadWriteOnce #此处需要和pv对应才能匹配
  resources:
    requests:
      storage: 20Gi
  storageClassName: local-storage #此处需要和StorageClass.yaml匹配

  创建prometheus-configmap.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: monitoring
data:
  prometheus.yml: |
    global:
      scrape_interval: 60s
      scrape_timeout: 60s
    scrape_configs:
    - job_name: kubernetes-cadvisor
      honor_timestamps: true
      scrape_interval: 1m
      scrape_timeout: 1m
      metrics_path: /metrics
      scheme: https
      kubernetes_sd_configs:
      - role: node
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        insecure_skip_verify: false
      relabel_configs:
      - separator: ;
        regex: __meta_kubernetes_node_label_(kubernetes_io_hostname)
        replacement: $1
        action: labelmap
      - separator: ;
        regex: (.*)
        target_label: __address__
        replacement: kubernetes.default.svc:443
        action: replace
      - source_labels: [__meta_kubernetes_node_name]
        separator: ;
        regex: (.+)
        target_label: __metrics_path__
        replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
        action: replace
    - job_name: kubernetes-service-endpoints
      honor_timestamps: true
      scrape_interval: 1m
      scrape_timeout: 1m
      metrics_path: /metrics
      scheme: http
      kubernetes_sd_configs:
      - role: endpoints
      relabel_configs:
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
        separator: ;
        regex: "true"
        replacement: $1
        action: keep
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
        separator: ;
        regex: (https?)
        target_label: __scheme__
        replacement: $1
        action: replace
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
        separator: ;
        regex: (.+)
        target_label: __metrics_path__
        replacement: $1
        action: replace
      - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
        separator: ;
        regex: ([^:]+)(?::\d+)?;(\d+)
        target_label: __address__
        replacement: $1:$2
        action: replace
      - separator: ;
        regex: __meta_(kubernetes_namespace|kubernetes_pod_name|kubernetes_pod_host_ip)
        replacement: $1
        action: labelmap
      - separator: ;
        regex: __meta_kubernetes_pod_label_(app_kubernetes_io_name)
        replacement: $1
        action: labelmap
      - separator: ;
        regex: __meta_kubernetes_service_label_(k8s_app)
        replacement: $1
        action: labelmap
      - source_labels: [kubernetes_pod_name]
        separator: ;
        regex: .*kube-state-metrics.*
        target_label: kubernetes_pod_name
        action: replace
      - source_labels: [__meta_kubernetes_pod_container_name]
        separator: ;
        regex: (.*)
        target_label: kubernetes_name
        replacement: $1
        action: replace
      - source_labels: [kubernetes_name]
        separator: ;
        regex: .*kube-state-metrics.*
        target_label: kubernetes_name
        action: replace
    - job_name: kubernetes-pods
      honor_timestamps: true
      scrape_interval: 1m
      scrape_timeout: 1m
      metrics_path: /metrics
      scheme: http
      kubernetes_sd_configs:
      - role: pod
      relabel_configs:
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
        separator: ;
        regex: "true"
        replacement: $1
        action: keep
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
        separator: ;
        regex: (.+)
        target_label: __metrics_path__
        replacement: $1
        action: replace
      - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
        separator: ;
        regex: ([^:]+)(?::\d+)?;(\d+)
        target_label: __address__
        replacement: $1:$2
        action: replace
      - separator: ;
        regex: __meta_kubernetes_pod_label_(k8s_app|dolphin_part_of_instance|app_kubernetes_io_component|app_kubernetes_io_instance|dolphin_client_id|app_kubernetes_io_part_of|app_kubernetes_io_name|name|release)
        replacement: $1
        action: labelmap
      - separator: ;
        regex: __meta_(kubernetes_namespace|kubernetes_pod_name|kubernetes_pod_host_ip)
        replacement: $1
        action: labelmap
      - source_labels: [kubernetes_pod_name]
        separator: ;
        regex: (node-exporter.*|process-exporter.*)
        target_label: kubernetes_pod_name
        action: replace
      - source_labels: [name]
        separator: ;
        regex: (.*node-exporter|.*process-exporter)
        target_label: name
        action: replace
      - source_labels: [__meta_kubernetes_pod_host_ip]
        separator: ;
        regex: (.*)
        target_label: node
        replacement: k8s-$1
        action: replace

  创建prometheus.yaml,pod的配置

apiVersion: apps/v1
kind: Deployment
metadata:
  name: prometheus
  namespace: monitoring
  labels:
    app: prometheus
spec:
  selector:
    matchLabels:
      app: prometheus
  template:
    metadata:
      labels:
        app: prometheus
    spec:
      serviceAccountName: prometheus
      containers:
      - image: prom/prometheus:v2.29.1
        name: prometheus
        command:
        - "/bin/prometheus"
        args:
        - "--config.file=/etc/prometheus/prometheus.yml"
        - "--storage.tsdb.path=/prometheus"
        - "--storage.tsdb.retention=14d"
        - "--web.enable-admin-api"  # 控制对admin HTTP API的访问,其中包括删除时间序列等功能
        - "--web.enable-lifecycle"  # 支持热更新,直接执行localhost:9090/-/reload立即生效
        ports:
        - containerPort: 9090
          protocol: TCP
          name: http
        volumeMounts:
        - mountPath: "/prometheus"
          subPath: prometheus
          name: data
        - mountPath: "/etc/prometheus"
          name: config-volume
        resources:
          requests:
            cpu: 100m
            memory: 512Mi
          limits:
            cpu: 500m
            memory: 1024Mi
      securityContext:
        runAsUser: 0
      volumes:
      - name: data
        persistentVolumeClaim:
          claimName: prometheus-pvc
      - configMap:
          name: prometheus-config
        name: config-volume

  二、搭建exporter采集器(node-exporter、kube-state-metrics)

1、搭建node-exporter

apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: node-exporter
  namespace: monitoring
spec:
  selector:
    matchLabels:
      app: node-exporter
  template:
    metadata:
      annotations:
        prometheus.io/path: /metrics
        prometheus.io/port: "9100"
        prometheus.io/scrape: "true"
      labels:
        app: node-exporter
    spec:
      hostNetwork: true
      containers:
      - name: node-exporter
        image: prom/node-exporter:v1.2.2
        ports:
          - containerPort: 9100
        imagePullPolicy: IfNotPresent
        command:
        - /bin/node_exporter
        - --path.procfs
        - /host/proc
        - --path.sysfs
        - /host/sys
        - --collector.filesystem.ignored-mount-points
        - ^/(sys|proc|dev|host|etc)($|/)
        volumeMounts:
        - name: proc
          mountPath: /host/proc
        - name: sys
          mountPath: /host/sys
        - name: root
          mountPath: /rootfs
      volumes:
      - name: proc
        hostPath:
          path: /proc
      - name: sys
        hostPath:
          path: /sys
      - name: root
        hostPath:
          path: /

 2、创建kube-state-metrics

---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  namespace: kube-system
  name: kube-state-metrics
rules:
- apiGroups: [""]
  resources:
  - pods
  verbs: ["get"]
- apiGroups: ["apps"]
  resources:
  - deployments
  resourceNames: ["kube-state-metrics"]
  verbs: ["get", "update"]
- apiGroups: ["extensions"]
  resources:
  - deployments
  resourceNames: ["kube-state-metrics"]
  verbs: ["get", "update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: kube-state-metrics
  namespace: kube-system
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: kube-state-metrics
subjects:
- kind: ServiceAccount
  name: kube-state-metrics
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: kube-state-metrics
rules:
- apiGroups: [""]
  resources:
  - configmaps
  - secrets
  - nodes
  - pods
  - services
  - resourcequotas
  - replicationcontrollers
  - limitranges
  - persistentvolumeclaims
  - persistentvolumes
  - namespaces
  - endpoints
  verbs: ["list", "watch"]
- apiGroups: ["extensions"]
  resources:
  - daemonsets
  - deployments
  - replicasets
  - ingresses
  verbs: ["list", "watch"]
- apiGroups: ["apps"]
  resources:
  - daemonsets
  - deployments
  - replicasets
  - statefulsets
  verbs: ["list", "watch"]
- apiGroups: ["batch"]
  resources:
  - cronjobs
  - jobs
  verbs: ["list", "watch"]
- apiGroups: ["autoscaling"]
  resources:
  - horizontalpodautoscalers
  verbs: ["list", "watch"]
- apiGroups: ["policy"]
  resources:
  - poddisruptionbudgets
  verbs: ["list", "watch"]
- apiGroups: ["certificates.k8s.io"]
  resources:
  - certificatesigningrequests
  verbs: ["list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: kube-state-metrics
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: kube-state-metrics
subjects:
- kind: ServiceAccount
  name: kube-state-metrics
  namespace: kube-system
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: kube-state-metrics
  namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
  name: kube-state-metrics
  namespace: kube-system
  labels:
    k8s-app: kube-state-metrics
  annotations:
    prometheus.io/scrape: 'true'
spec:
  ports:
  - name: http-metrics
    port: 8080
    targetPort: http-metrics
    protocol: TCP
  - name: telemetry
    port: 8081
    targetPort: telemetry
    protocol: TCP
  selector:
    k8s-app: kube-state-metrics
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: kube-state-metrics
  namespace: kube-system
spec:
  selector:
    matchLabels:
      k8s-app: kube-state-metrics
  replicas: 1
  template:
    metadata:
      labels:
        k8s-app: kube-state-metrics
    spec:
      serviceAccountName: kube-state-metrics
      containers:
      - name: kube-state-metrics
        image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.2.0
        ports:
        - name: http-metrics
          containerPort: 8080
        - name: telemetry
          containerPort: 8081
        readinessProbe:
          httpGet:
            path: /healthz
            port: 8080
          initialDelaySeconds: 5
          timeoutSeconds: 5

  三、展示

 

posted @ 2021-09-17 15:42  Lenwen  阅读(381)  评论(0)    收藏  举报