学习K8S之路.9---使用Prometheus和Grafana监控kubernetes集群

一:部署kube-state-metrics

  kube-state-metrics官方地址:https://quay.io/repository/coreos/kube-state-metrics?tab=info

1.1:准备kube-state-metrics镜像

[root@k8s-6-96 ~]# docker pull quay.io/coreos/kube-state-metrics:v1.7.0
[root@k8s-6-96 ~]# docker tag b7a474fd3864 harbor.auth.com/k8s/kube-state-metrics:v1.7.0
[root@k8s-6-96 ~]# docker push harbor.auth.com/k8s/kube-state-metrics:v1.7.0

1.2:准备资源配置清单

[root@k8s-6-96 ~]# mkdir /data/k8s-yaml/kube-state-metrics/
[root@k8s-6-96 ~]# cd /data/k8s-yaml/kube-state-metrics/
[root@k8s-6-96 kube-state-metrics]# cat rbac.yaml 
apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    addonmanager.kubernetes.io/mode: Reconcile
    kubernetes.io/cluster-service: "true"
  name: kube-state-metrics
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    addonmanager.kubernetes.io/mode: Reconcile
    kubernetes.io/cluster-service: "true"
  name: kube-state-metrics
rules:
- apiGroups:
  - ""
  resources:
  - configmaps
  - secrets
  - nodes
  - pods
  - services
  - resourcequotas
  - replicationcontrollers
  - limitranges
  - persistentvolumeclaims
  - persistentvolumes
  - namespaces
  - endpoints
  verbs:
  - list
  - watch
- apiGroups:
  - extensions
  resources:
  - daemonsets
  - deployments
  - replicasets
  verbs:
  - list
  - watch
- apiGroups:
  - apps
  resources:
  - statefulsets
  verbs:
  - list
  - watch
- apiGroups:
  - batch
  resources:
  - cronjobs
  - jobs
  verbs:
  - list
  - watch
- apiGroups:
  - autoscaling
  resources:
  - horizontalpodautoscalers
  verbs:
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    addonmanager.kubernetes.io/mode: Reconcile
    kubernetes.io/cluster-service: "true"
  name: kube-state-metrics
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: kube-state-metrics
subjects:
- kind: ServiceAccount
  name: kube-state-metrics
  namespace: kube-system
[root@k8s-6-96 kube-state-metrics]# cat dp.yaml 
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  annotations:
    deployment.kubernetes.io/revision: "2"
  labels:
    grafanak8sapp: "true"
    app: kube-state-metrics
  name: kube-state-metrics
  namespace: kube-system
spec:
  selector:
    matchLabels:
      grafanak8sapp: "true"
      app: kube-state-metrics
  strategy:
    rollingUpdate:
      maxSurge: 25%
      maxUnavailable: 25%
    type: RollingUpdate
  template:
    metadata:
      creationTimestamp: null
      labels:
        grafanak8sapp: "true"
        app: kube-state-metrics
    spec:
      containers:
      - image: harbor.auth.com/k8s/kube-state-metrics:v1.7.0
        name: kube-state-metrics
        ports:
        - containerPort: 8080
          name: http-metrics
          protocol: TCP
        readinessProbe:
          failureThreshold: 3
          httpGet:
            path: /healthz
            port: 8080
            scheme: HTTP
          initialDelaySeconds: 5
          periodSeconds: 10
          successThreshold: 1
          timeoutSeconds: 5
        imagePullPolicy: IfNotPresent
      imagePullSecrets:
      - name: harbor
      restartPolicy: Always
      serviceAccount: kube-state-metrics
      serviceAccountName: kube-state-metrics

1.3:在任意运算节点应用资源配置清单

[root@k8s-6-94 ~]# kubectl apply -f http://k8s-yaml.auth.com/kube-state-metrics/rbac.yaml
[root@k8s-6-94 ~]# kubectl apply -f http://k8s-yaml.auth.com/kube-state-metrics/dp.yaml

1.4:检查启动情况

[root@k8s-6-94 ~]# kubectl get pods -n kube-system -o wide |grep kube-state-metrics
kube-state-metrics-579cf49ffb-xzfsq     1/1     Running   0          4m15s   172.6.94.6   k8s-6-94.host.com   <none>           <none>
[root@k8s-6-94 ~]# curl http://172.6.94.6:8080/healthz
OK

二:部署node-exporter

  dockerhub官方地址:https://hub.docker.com/r/prom/node-exporter
  github官方地址:https://github.com/prometheus/node_exporter
2.1:准备node-exporter镜像

[root@k8s-6-96 ~]# docker pull prom/node-exporter:v0.16.0
[root@k8s-6-96 ~]# docker tag 188af75e2de0 harbor.auth.com/k8s/node-exporter:v0.16.0
[root@k8s-6-96 ~]# docker push harbor.auth.com/k8s/node-exporter:v0.16.0

2.2:准备资源配置清单

[root@k8s-6-96 ~]# mkdir /data/k8s-yaml/node-exporter/
[root@k8s-6-96 ~]# cd /data/k8s-yaml/node-exporter/
[root@k8s-6-96 node-exporter]# cat node-exporter-ds.yaml 
kind: DaemonSet
apiVersion: extensions/v1beta1
metadata:
  name: node-exporter
  namespace: kube-system
  labels:
    daemon: "node-exporter"
    grafanak8sapp: "true"
spec:
  selector:
    matchLabels:
      daemon: "node-exporter"
      grafanak8sapp: "true"
  template:
    metadata:
      name: node-exporter
      labels:
        daemon: "node-exporter"
        grafanak8sapp: "true"
    spec:
      volumes:
      - name: proc
        hostPath: 
          path: /proc
          type: ""
      - name: sys
        hostPath:
          path: /sys
          type: ""
      containers:
      - name: node-exporter
        image: harbor.auth.com/k8s/node-exporter:v0.16.0
        args:
        - --path.procfs=/host_proc
        - --path.sysfs=/host_sys
        ports:
        - name: node-exporter
          hostPort: 9100
          containerPort: 9100
          protocol: TCP
        volumeMounts:
        - name: sys
          readOnly: true
          mountPath: /host_sys
        - name: proc
          readOnly: true
          mountPath: /host_proc
      hostNetwork: true

2.3:在任意运算节点应用资源配置清单

[root@k8s-6-94 ~]# kubectl apply -f http://k8s-yaml.auth.com/node-exporter/node-exporter-ds.yaml

2.4:检查启动情况,是共享网络IP,所以直接查看运算节点的9100端口

[root@k8s-6-94 ~]# netstat -nlput | grep 9100
tcp6       0      0 :::9100                 :::*                    LISTEN      3021/node_exporter

三:部署cadvisor
  dockerhub官方地址:https://hub.docker.com/r/google/cadvisor
  github官方地址:https://github.com/google/cadvisor
3.1:准备cadvisor镜像

[root@k8s-6-96 ~]# docker pull google/cadvisor:v0.29.2
[root@k8s-6-96 ~]# docker images | grep cadvisor
[root@k8s-6-96 ~]# docker tag 80e8c78a19c0 harbor.auth.com/k8s/cadvisor:v0.29.2
[root@k8s-6-96 ~]# docker push harbor.auth.com/k8s/cadvisor:v0.29.2

3.2:准备资源配置清单

[root@k8s-6-96 ~]# mkdir /data/k8s-yaml/cadvisor/
[root@k8s-6-96 ~]# cd /data/k8s-yaml/cadvisor/
[root@k8s-6-96 cadvisor]# cat dp.yaml 
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: cadvisor
  namespace: kube-system
  labels:
    app: cadvisor
spec:
  selector:
    matchLabels:
      name: cadvisor
  template:
    metadata:
      labels:
        name: cadvisor
    spec:
      hostNetwork: true
      tolerations:
      - key: node-role.kubernetes.io/master
        effect: NoSchedule
      containers:
      - name: cadvisor
        image: harbor.auth.com/k8s/cadvisor:v0.29.2
        imagePullPolicy: IfNotPresent
        volumeMounts:
        - name: rootfs
          mountPath: /rootfs
        - name: var-run
          mountPath: /var/run
        - name: sys
          mountPath: /sys
        - name: docker
          mountPath: /var/lib/docker
        ports:
          - name: http
            containerPort: 4194
            protocol: TCP
        readinessProbe:
          tcpSocket:
            port: 4194
          initialDelaySeconds: 5
          periodSeconds: 10
        args:
          - --housekeeping_interval=10s
          - --port=4194
      imagePullSecrets:
      - name: harbor
      terminationGracePeriodSeconds: 30
      volumes:
      - name: rootfs
        hostPath:
          path: /
      - name: var-run
        hostPath:
          path: /var/run
      - name: sys
        hostPath:
          path: /sys
      - name: docker
        hostPath:
          path: /data/docker

3.3:修改所有运算节点软连接

[root@k8s-6-94 ~]# mount -o remount,rw /sys/fs/cgroup/
[root@k8s-6-94 ~]# ln -s /sys/fs/cgroup/cpu,cpuacct/ /sys/fs/cgroup/cpuacct,cpu

3.4:在任意运算节点应用资源配置清单

[root@k8s-6-94 ~]# kubectl apply -f http://k8s-yaml.auth.com/cadvisor/dp.yaml

3.5:检查启动情况,是共享网络IP,所以直接查看运算节点的4194端口

[root@k8s-6-94 ~]# netstat -luntp|grep 4194
tcp6       0      0 :::4194                 :::*                    LISTEN      11996/cadvisor   

四:部署blackbox-exporter
  dockerhub官方地址:https://hub.docker.com/r/prom/blackbox-exporter
  github官方地址:https://github.com/prometheus/blackbox_exporter
4.1:准备blackbox-exporter镜像

[root@k8s-6-96 ~]# docker pull prom/blackbox-exporter:v0.15.0
[root@k8s-6-96 ~]# docker tag 3a694f0e8f6b harbor.auth.com/k8s/blackbox-exporter:v0.15.0
[root@k8s-6-96 ~]# docker push harbor.auth.com/k8s/blackbox-exporter:v0.15.0

4.2:准备资源配置清单

[root@k8s-6-96 ~]# mkdir /data/k8s-yaml/blackbox-exporter/
[root@k8s-6-96 ~]# cd /data/k8s-yaml/blackbox-exporter/
[root@k8s-6-96 blackbox-exporter]# cat configmap.yaml 
apiVersion: v1
kind: ConfigMap
metadata:
  labels:
    app: blackbox-exporter
  name: blackbox-exporter
  namespace: kube-system
data:
  blackbox.yml: |-
    modules:
      http_2xx:
        prober: http
        timeout: 2s
        http:
          valid_http_versions: ["HTTP/1.1", "HTTP/2"]
          valid_status_codes: [200,301,302]
          method: GET
          preferred_ip_protocol: "ip4"
      tcp_connect:
        prober: tcp
        timeout: 2s
[root@k8s-6-96 blackbox-exporter]# cat dp.yaml 
kind: Deployment
apiVersion: extensions/v1beta1
metadata:
  name: blackbox-exporter
  namespace: kube-system
  labels:
    app: blackbox-exporter
  annotations:
    deployment.kubernetes.io/revision: 1
spec:
  replicas: 1
  selector:
    matchLabels:
      app: blackbox-exporter
  template:
    metadata:
      labels:
        app: blackbox-exporter
    spec:
      volumes:
      - name: config
        configMap:
          name: blackbox-exporter
          defaultMode: 420
      containers:
      - name: blackbox-exporter
        image: harbor.auth.com/k8s/blackbox-exporter:v0.15.0
        args:
        - --config.file=/etc/blackbox_exporter/blackbox.yml
        - --log.level=debug
        - --web.listen-address=:9115
        ports:
        - name: blackbox-port
          containerPort: 9115
          protocol: TCP
        resources:
          limits:
            cpu: 200m
            memory: 256Mi
          requests:
            cpu: 100m
            memory: 50Mi
        volumeMounts:
        - name: config
          mountPath: /etc/blackbox_exporter
        readinessProbe:
          tcpSocket:
            port: 9115
          initialDelaySeconds: 5
          timeoutSeconds: 5
          periodSeconds: 10
          successThreshold: 1
          failureThreshold: 3
[root@k8s-6-96 blackbox-exporter]# cat service.yaml 
kind: Service
apiVersion: v1
metadata:
  name: blackbox-exporter
  namespace: kube-system
spec:
  selector:
    app: blackbox-exporter
  ports:
    - protocol: TCP
      port: 9115
      name: http
[root@k8s-6-96 blackbox-exporter]# cat ingress.yaml 
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
  name: blackbox-exporter
  namespace: kube-system
spec:
  rules:
  - host: blackbox.auth.com
    http:
      paths:
      - backend:
          serviceName: blackbox-exporter
          servicePort: 9115

4.3:在任意运算节点应用资源配置清单

[root@k8s-6-94 ~]# kubectl apply -f http://k8s-yaml.auth.com/blackbox-exporter/configmap.yaml
[root@k8s-6-94 ~]# kubectl apply -f http://k8s-yaml.auth.com/blackbox-exporter/dp.yaml
[root@k8s-6-94 ~]# kubectl apply -f http://k8s-yaml.auth.com/blackbox-exporter/ingress.yaml
[root@k8s-6-94 ~]# kubectl apply -f http://k8s-yaml.auth.com/blackbox-exporter/service.yaml

4.4:解析域名

在DNS服务器上解析域名
[root@k8s-6-92 ~]# vi /var/named/auth.com.zone
blackbox           A    192.168.6.89
注:serial  值进行+1

[root@k8s-6-92 ~]# systemctl restart named

4.5:浏览器访问:

  http://blackbox.auth.com

五:部署prometheus

  dockerhub官方地址:https://hub.docker.com/r/prom/prometheus/tags

  github官方地址:https://github.com/prometheus/prometheus

5.1:准备prometheus镜像

[root@k8s-6-96 ~]# docker pull prom/prometheus:v2.15.0
[root@k8s-6-96 ~]# docker images | grep prometheus
[root@k8s-6-96 ~]# docker tag c4d5198f680a harbor.auth.com/k8s/prometheus:v2.15.0
[root@k8s-6-96 ~]# docker push harbor.auth.com/k8s/prometheus:v2.15.0

5.2:准备资源配置清单

[root@k8s-6-96 ~]# mkdir /data/k8s-yaml/prometheus && cd /data/k8s-yaml/prometheus
[root@k8s-6-96 prometheus]# cat rbac.yaml 
apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    addonmanager.kubernetes.io/mode: Reconcile
    kubernetes.io/cluster-service: "true"
  name: prometheus
  namespace: infra
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    addonmanager.kubernetes.io/mode: Reconcile
    kubernetes.io/cluster-service: "true"
  name: prometheus
rules:
- apiGroups:
  - ""
  resources:
  - nodes
  - nodes/metrics
  - services
  - endpoints
  - pods
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - configmaps
  verbs:
  - get
- nonResourceURLs:
  - /metrics
  verbs:
  - get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    addonmanager.kubernetes.io/mode: Reconcile
    kubernetes.io/cluster-service: "true"
  name: prometheus
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: prometheus
subjects:
- kind: ServiceAccount
  name: prometheus
  namespace: infra
[root@k8s-6-96 prometheus]# cat deployment.yaml 
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  annotations:
    deployment.kubernetes.io/revision: "5"
  labels:
    name: prometheus
  name: prometheus
  namespace: infra
spec:
  progressDeadlineSeconds: 600
  replicas: 1
  revisionHistoryLimit: 7
  selector:
    matchLabels:
      app: prometheus
  strategy:
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 1
    type: RollingUpdate
  template:
    metadata:
      labels:
        app: prometheus
    spec:
      nodeName: k8s-6-94.host.com
      containers:
      - image: harbor.auth.com/k8s/prometheus:v2.15.0
        args:
        - --config.file=/data/etc/prometheus.yml
        - --storage.tsdb.path=/data/prom-db
        - --storage.tsdb.retention=72h
        - --storage.tsdb.min-block-duration=10m
        command:
        - /bin/prometheus
        name: prometheus
        ports:
        - containerPort: 9090
          protocol: TCP
        resources:
          requests:
            cpu: "1000m"
            memory: "1.5Gi"
          limits:
            cpu: "2000m"
            memory: "3Gi"
        volumeMounts:
        - mountPath: /data
          name: data
      imagePullSecrets:
      - name: harbor
      securityContext:
        runAsUser: 0
      serviceAccountName: prometheus
      volumes:
      - name: data
        nfs:
          server: 192.168.6.96
          path: /data/nfs-volume/prometheus

注:nodeName: 指定在那个运算节点进行运行容器
--storage.tsdb.min-block-duration=10m    是10分钟的数据加载在内存中,生产中可以设置为2h,根据情况而定
[root@k8s-6-96 prometheus]# cat service.yaml 
apiVersion: v1
kind: Service
metadata:
  name: prometheus
  namespace: infra
spec:
  ports:
  - port: 9090
    protocol: TCP
    name: prometheus
  selector:
    app: prometheus
  type: ClusterIP
[root@k8s-6-96 prometheus]# cat ingress.yaml 
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
  annotations:
    kubernetes.io/ingress.class: traefik
  name: prometheus
  namespace: infra
spec:
  rules:
  - host: prometheus.auth.com
    http:
      paths:
      - path: /
        backend:
          serviceName: prometheus
          servicePort: 9090

5.3:拷贝证书

[root@k8s-6-96 ~]# mkdir -pv /data/nfs-volume/prometheus/{etc,prom-db}
[root@k8s-6-96 ~]# cd /data/nfs-volume/prometheus/etc/
[root@k8s-6-96 etc]# cp /opt/certs/ca.pem .
[root@k8s-6-96 etc]# cp /opt/certs/client.pem .
[root@k8s-6-96 etc]# cp /opt/certs/client-key.pem .

5.4:准备prometheus的配置文件

[root@k8s-6-96 etc]# cat prometheus.yml 
global:
  scrape_interval:     15s
  evaluation_interval: 15s
scrape_configs:
- job_name: 'etcd'
  tls_config:
    ca_file: /data/etc/ca.pem
    cert_file: /data/etc/client.pem
    key_file: /data/etc/client-key.pem
  scheme: https
  static_configs:
  - targets:
    - '192.168.6.93:2379'
    - '192.168.6.94:2379'
    - '192.168.6.95:2379'
- job_name: 'kubernetes-apiservers'
  kubernetes_sd_configs:
  - role: endpoints
  scheme: https
  tls_config:
    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  relabel_configs:
  - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
    action: keep
    regex: default;kubernetes;https
- job_name: 'kubernetes-pods'
  kubernetes_sd_configs:
  - role: pod
  relabel_configs:
  - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
    action: keep
    regex: true
  - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
    action: replace
    target_label: __metrics_path__
    regex: (.+)
  - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
    action: replace
    regex: ([^:]+)(?::\d+)?;(\d+)
    replacement: $1:$2
    target_label: __address__
  - action: labelmap
    regex: __meta_kubernetes_pod_label_(.+)
  - source_labels: [__meta_kubernetes_namespace]
    action: replace
    target_label: kubernetes_namespace
  - source_labels: [__meta_kubernetes_pod_name]
    action: replace
    target_label: kubernetes_pod_name
- job_name: 'kubernetes-kubelet'
  kubernetes_sd_configs:
  - role: node
  relabel_configs:
  - action: labelmap
    regex: __meta_kubernetes_node_label_(.+)
  - source_labels: [__meta_kubernetes_node_name]
    regex: (.+)
    target_label: __address__
    replacement: ${1}:10255
- job_name: 'kubernetes-cadvisor'
  kubernetes_sd_configs:
  - role: node
  relabel_configs:
  - action: labelmap
    regex: __meta_kubernetes_node_label_(.+)
  - source_labels: [__meta_kubernetes_node_name]
    regex: (.+)
    target_label: __address__
    replacement: ${1}:4194
- job_name: 'kubernetes-kube-state'
  kubernetes_sd_configs:
  - role: pod
  relabel_configs:
  - action: labelmap
    regex: __meta_kubernetes_pod_label_(.+)
  - source_labels: [__meta_kubernetes_namespace]
    action: replace
    target_label: kubernetes_namespace
  - source_labels: [__meta_kubernetes_pod_name]
    action: replace
    target_label: kubernetes_pod_name
  - source_labels: [__meta_kubernetes_pod_label_grafanak8sapp]
    regex: .*true.*
    action: keep
  - source_labels: ['__meta_kubernetes_pod_label_daemon', '__meta_kubernetes_pod_node_name']
    regex: 'node-exporter;(.*)'
    action: replace
    target_label: nodename
- job_name: 'blackbox_http_pod_probe'
  metrics_path: /probe
  kubernetes_sd_configs:
  - role: pod
  params:
    module: [http_2xx]
  relabel_configs:
  - source_labels: [__meta_kubernetes_pod_annotation_blackbox_scheme]
    action: keep
    regex: http
  - source_labels: [__address__, __meta_kubernetes_pod_annotation_blackbox_port,  __meta_kubernetes_pod_annotation_blackbox_path]
    action: replace
    regex: ([^:]+)(?::\d+)?;(\d+);(.+)
    replacement: $1:$2$3
    target_label: __param_target
  - action: replace
    target_label: __address__
    replacement: blackbox-exporter.kube-system:9115
  - source_labels: [__param_target]
    target_label: instance
  - action: labelmap
    regex: __meta_kubernetes_pod_label_(.+)
  - source_labels: [__meta_kubernetes_namespace]
    action: replace
    target_label: kubernetes_namespace
  - source_labels: [__meta_kubernetes_pod_name]
    action: replace
    target_label: kubernetes_pod_name
- job_name: 'blackbox_tcp_pod_probe'
  metrics_path: /probe
  kubernetes_sd_configs:
  - role: pod
  params:
    module: [tcp_connect]
  relabel_configs:
  - source_labels: [__meta_kubernetes_pod_annotation_blackbox_scheme]
    action: keep
    regex: tcp
  - source_labels: [__address__, __meta_kubernetes_pod_annotation_blackbox_port]
    action: replace
    regex: ([^:]+)(?::\d+)?;(\d+)
    replacement: $1:$2
    target_label: __param_target
  - action: replace
    target_label: __address__
    replacement: blackbox-exporter.kube-system:9115
  - source_labels: [__param_target]
    target_label: instance
  - action: labelmap
    regex: __meta_kubernetes_pod_label_(.+)
  - source_labels: [__meta_kubernetes_namespace]
    action: replace
    target_label: kubernetes_namespace
  - source_labels: [__meta_kubernetes_pod_name]
    action: replace
    target_label: kubernetes_pod_name
- job_name: 'traefik'
  kubernetes_sd_configs:
  - role: pod
  relabel_configs:
  - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
    action: keep
    regex: traefik
  - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
    action: replace
    target_label: __metrics_path__
    regex: (.+)
  - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
    action: replace
    regex: ([^:]+)(?::\d+)?;(\d+)
    replacement: $1:$2
    target_label: __address__
  - action: labelmap
    regex: __meta_kubernetes_pod_label_(.+)
  - source_labels: [__meta_kubernetes_namespace]
    action: replace
    target_label: kubernetes_namespace
  - source_labels: [__meta_kubernetes_pod_name]
    action: replace
    target_label: kubernetes_pod_name

5.5:在任意运算节点应用资源配置清单

[root@k8s-6-94 ~]# kubectl apply -f http://k8s-yaml.auth.com/prometheus/rbac.yaml
[root@k8s-6-94 ~]# kubectl apply -f http://k8s-yaml.auth.com/prometheus/deployment.yaml
[root@k8s-6-94 ~]# kubectl apply -f http://k8s-yaml.auth.com/prometheus/service.yaml
[root@k8s-6-94 ~]# kubectl apply -f http://k8s-yaml.auth.com/prometheus/ingress.yaml

5.6:在DNS服务器上解析域名

[root@k8s-6-92 ~]# vi /var/named/auth.com.zone
prometheus           A    192.168.6.89
注:serial  值进行+1

[root@k8s-6-92 ~]# systemctl restart named

5.7:浏览器访问http://prometheus.auth.com/

  Prometheus监控内容说明:

etcd监控etcd服务
kubernetes-apiserver监控apiserver服务
kubernetes-kubelet监控kubelet服务

kubernetes-kube-state监控基本信息:
    node-exporter监控Node节点信息
    kube-state-metrics监控pod信息

traefik监控traefik-ingress-controller
    注意:在traefik的pod控制器上加annotations,并重启pod,监控生效
    配置范例:

    "annotations": {
      "prometheus_io_scheme": "traefik",
      "prometheus_io_path": "/metrics",
      "prometheus_io_port": "8080"
    }

blackbox*监控服务是否存活:
    blackbox_tcp_pod_porbe监控tcp协议服务是否存活:
        注意:在pod控制器上加annotations,并重启pod,监控生效
        配置范例:

        "annotations": {
          "blackbox_port": "20880",
          "blackbox_scheme": "tcp"
        }

    blackbox_http_pod_probe监控http协议服务是否存活:
        注意:在pod控制器上加annotations,并重启pod,监控生效
        配置范例:

        "annotations": {
          "blackbox_path": "/",
          "blackbox_port": "8080",
          "blackbox_scheme": "http"
        }

kubernetes-pods*监控JVM信息
    注意:在pod控制器上加annotations,并重启pod,监控生效
    配置范例:

    "annotations": {
      "prometheus_io_scrape": "true",
      "prometheus_io_port": "12346",
      "prometheus_io_path": "/"
    }

5.8:相应的服务服务接入prometheus监控

示例1:修改traefik服务接入prometheus监控
    kube-system名称空间->daemonset->traefik-ingress-controller->spec->template->metadata下,添加
    "annotations": {
      "prometheus_io_scheme": "traefik",
      "prometheus_io_path": "/metrics",
      "prometheus_io_port": "8080"
    }
    删除pod,重启traefik,观察监控


示例2:java程序监控jvm 和 TCP
    "annotations": {
      "prometheus_io_scrape": "true",
      "prometheus_io_path": "/",
      "prometheus_io_port": "12346",
      "blackbox_port": "20880",
      "blackbox_scheme": "tcp"
    }
    删除pod,观察监控


示例3:java程序监控jvm 和 http
    "annotations": {
      "prometheus_io_scrape": "true",
      "prometheus_io_path": "/",
      "prometheus_io_port": "12346",
      "blackbox_path": "/hello",
      "blackbox_port": "8080",
      "blackbox_scheme": "http"
    }
    删除pod,观察监控

 六:部署Grafana

  grafana官网:https://grafana.com

  dockerhub官方地址:https://hub.docker.com/r/grafana/grafana/tags

  github官方地址:https://github.com/grafana/grafana

6.1:准备grafana镜像

[root@k8s-6-96 ~]# docker pull grafana/grafana:6.1.6
[root@k8s-6-96 ~]# docker images | grep grafana
[root@k8s-6-96 ~]# docker tag f96bf1723e2a harbor.auth.com/k8s/grafana:6.1.6
[root@k8s-6-96 ~]# docker push harbor.auth.com/k8s/grafana:6.1.6

6.2:准备资源配置清单

[root@k8s-6-96 ~]# mkdir /data/k8s-yaml/grafana
[root@k8s-6-96 ~]# mkdir /data/nfs-volume/grafana
[root@k8s-6-96 ~]# cd /data/k8s-yaml/grafana/
[root@k8s-6-96 grafana]# cat rbac.yaml 
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    addonmanager.kubernetes.io/mode: Reconcile
    kubernetes.io/cluster-service: "true"
  name: grafana
rules:
- apiGroups:
  - "*"
  resources:
  - namespaces
  - deployments
  - pods
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    addonmanager.kubernetes.io/mode: Reconcile
    kubernetes.io/cluster-service: "true"
  name: grafana
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: grafana
subjects:
- kind: User
  name: k8s-node
[root@k8s-6-96 grafana]# cat dp.yaml 
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  labels:
    app: grafana
    name: grafana
  name: grafana
  namespace: infra
spec:
  progressDeadlineSeconds: 600
  replicas: 1
  revisionHistoryLimit: 7
  selector:
    matchLabels:
      name: grafana
  strategy:
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 1
    type: RollingUpdate
  template:
    metadata:
      labels:
        app: grafana
        name: grafana
    spec:
      containers:
      - image: harbor.auth.com/k8s/grafana:6.1.6
        imagePullPolicy: IfNotPresent
        name: grafana
        ports:
        - containerPort: 3000
          protocol: TCP
        volumeMounts:
        - mountPath: /var/lib/grafana
          name: data
      imagePullSecrets:
      - name: harbor
      nodeName: k8s-6-95.host.com
      restartPolicy: Always
      securityContext:
        runAsUser: 0
      volumes:
      - nfs:
          server: 192.168.6.96
          path: /data/nfs-volume/grafana
        name: data
[root@k8s-6-96 grafana]# cat svc.yaml 
apiVersion: v1
kind: Service
metadata:
  name: grafana
  namespace: infra
spec:
  ports:
  - port: 3000
    protocol: TCP
  selector:
    app: grafana
  type: ClusterIP
[root@k8s-6-96 grafana]# cat ingress.yaml 
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
  name: grafana
  namespace: infra
spec:
  rules:
  - host: grafana.auth.com
    http:
      paths:
      - path: /
        backend:
          serviceName: grafana
          servicePort: 3000

6.3:在任意运算节点上应用资源配置清单

[root@k8s-6-94 ~]# kubectl apply -f http://k8s-yaml.auth.com/grafana/rbac.yaml 
[root@k8s-6-94 ~]# kubectl apply -f http://k8s-yaml.auth.com/grafana/dp.yaml 
[root@k8s-6-94 ~]# kubectl apply -f http://k8s-yaml.auth.com/grafana/svc.yaml 
[root@k8s-6-94 ~]# kubectl apply -f http://k8s-yaml.auth.com/grafana/ingress.yaml 

6.4:在DNS服务器上解析域名

[root@k8s-6-92 ~]# vi /var/named/auth.com.zone
grafana           A    192.168.6.89
注:serial  值进行+1

[root@k8s-6-92 ~]# systemctl restart named

6.5:浏览器访问http://grafana.auth.com

Step1:默认用户名:admin   密 码:admin   登录成功后,进行修改密码,admin123
Step2:配置grafana页面外观
    Configuration -> Preferences
        UI Theme -> Light
        Home Dashboard -> Default
        Timezone -> Local browser time
    save
Step3:安装插件
    安装Kubernetes App插件
        安装方法一:进入grafana容器中执行命令进行安装
        grafana-cli plugins install grafana-kubernetes-app
        安装方法二:
        进入到/data/nfs-volume/grafana/plugins目录下,进行下载,解压到当前目录下
    安装Clock Pannel插件
        grafana-cli plugins install grafana-clock-panel
    安装Pie Chart插件
        grafana-cli plugins install grafana-piechart-panel
    安装D3 Gauge插件
        grafana-cli plugins install briangann-gauge-panel
    安装Discrete插件
        grafana-cli plugins install natel-discrete-panel

    重启grafana的pod
Step4:配置grafana数据源 Configuration
-> Data Sources,选择prometheus,在Settings中进行配置: HTTP模块中配置: URL:http://prometheus.auth.com Access:Server(Default) Auth模块中配置: 勾选:TLS Client Auth 勾选:With CA Cert 将ca.pem、client.pem和client-key.pem粘贴至文本框内 HTTP Method 选择 GET 点击”Save & Test“ Step5:配置Plugins/Kubernetes Configuration -> Plugins -> Kubernetes -> Config -> Enable -> Update 此时左边菜单出现Kubernetes图标 Step6:配置Kubernetes 点击kubernetes图标 -> New Cluster Name:myk8s HTTP模块中配置: URL:https://192.168.6.89:7443 Access:Server(Default) Auth模块中配置: 勾选:TLS Client Auth 勾选:With CA Cert 将ca.pem、client.pem和client-key.pem粘贴至文本框内 Prometheus Read模块中配置: Datasource:Prometheus 点击”Save“ Step7:点击 Configuration 选择刚刚添加的 myk8s 点击 ”Save & Test“ Step8:点击kubernetes图标,点击 Overview Step9:配置自定义dashboard 由于原有的dashboard存在异常,需要重新自定义, 点击Home 依次删除存在的4个dashboard,添加自己的dashboard

至此监控已经部署安装完成。

posted @ 2020-08-04 17:27  为生活而努力  阅读(562)  评论(0)    收藏  举报