Prometheus Deployment部署 + 二进制部署的prometheus实现服务发现 + Grafana配置 (一)
Node-exporter 安装脚本
[root@xksnode1 prometheus]# cat node-exporter-1.3.1-onekey-install.sh
#!/bin/bash
PKG="node_exporter-1.3.1.linux-amd64.tar.gz"
S_DIR="node_exporter-1.3.1.linux-amd64"
mkdir -p /apps
tar xvf ${PKG} -C /apps/
ln -sv /apps/${S_DIR} /apps/node_exporter
\cp ./node-exporter.service /etc/systemd/system/node-exporter.service
systemctl daemon-reload && systemctl restart node-exporter && systemctl enable node-exporter
echo "node-exporter install successful"
Prometheus安装脚本
[root@xksnode1 prometheus]# cat prometheus-install.sh
#!/bin/bash
VERSION="2.38.0"
PKG="prometheus-${VERSION}.linux-amd64.tar.gz"
S_DIR="prometheus-${VERSION}.linux-amd64"
mkdir -p /apps
tar xvf ${PKG} -C /apps/
ln -sv /apps/${S_DIR} /apps/prometheus
\cp ./prometheus.service /etc/systemd/system/prometheus.service
systemctl daemon-reload && systemctl restart prometheus && systemctl enable prometheus
echo "prometheus Server install successful"
Prometheus前端管理界面(http://192.168.19.180:9090)

Node管理查看前端界面(http://192.168.19.180:9100/metrics)

Prometheus配置文件监控多节点
查看代码
[root@xksmaster1 prometheus]# vim /apps/prometheus/prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
#监控节点配置地方
- job_name: "prometheus-node-xks"
static_configs:
- targets: ["192.168.19.180:9100","192.168.19.181:9100","192.168.19.182:9100"]
Grafana配置
Red Hat, CentOS, RHEL, and Fedora(64 Bit)SHA256: d83363cb10035789499553aa51e889455576ceecf9c56a9dc188b179a761c752
wget https://dl.grafana.com/enterprise/release/grafana-enterprise-9.4.7-1.x86_64.rpm
sudo yum install grafana-enterprise-9.4.7-1.x86_64.rpm
[root@xianchaonode2 prometheus]# systemctl start grafana-server
[root@xianchaonode2 prometheus]# systemctl status grafana-server
https://grafana.com/grafana/dashboards/ #导入模板ID 11072
/var/lib/grafana/plugins #插件位置

Prometheus Deployment 部署server
[root@xksmaster1 case]# kubectl create sa monitor -n monitoring
serviceaccount/monitor created
[root@xksmaster1 case]# kubectl create clusterrolebinding monitor-clusterrolebinding -n monitoring --clusterrole=cluster-admin --serviceaccount=monitoring:monitor
clusterrolebinding.rbac.authorization.k8s.io/monitor-clusterrolebinding created
[root@xksmaster1 case]# cat case3-1-prometheus-cfg.yaml
---
kind: ConfigMap
apiVersion: v1
metadata:
labels:
app: prometheus
name: prometheus-config
namespace: monitoring
data:
prometheus.yml: |
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 1m
scrape_configs:
- job_name: 'kubernetes-node'
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:9100'
target_label: __address__
action: replace
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- job_name: 'kubernetes-node-cadvisor'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-apiserver'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_service_name
[root@xksmaster1 case]# kubectl apply -f case3-1-prometheus-cfg.yaml
configmap/prometheus-config created
#将Prometheus部署在node2节点上
[root@xksnode2 ~]# mkdir -p /data/prometheusdata
[root@xksnode2 ~]# chmod 777 /data/prometheusdata/
#Prometheus 容器中默认中就是nobody
[root@xksnode2 ~]# chown -R 65534.65534 /data/prometheusdata/
[root@xksmaster1 case]# cat case3-2-prometheus-deployment.yaml
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-server
namespace: monitoring
labels:
app: prometheus
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
component: server
#matchExpressions:
#- {key: app, operator: In, values: [prometheus]}
#- {key: component, operator: In, values: [server]}
template:
metadata:
labels:
app: prometheus
component: server
annotations:
prometheus.io/scrape: 'false'
spec:
nodeName: xksnode2
serviceAccountName: monitor
containers:
- name: prometheus
#image: registry.cn-hangzhou.aliyuncs.com/birkhoff/prometheus:v2.31.2
image: docker.io/prom/prometheus:v2.31.2
imagePullPolicy: IfNotPresent
command:
- prometheus
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
- --storage.tsdb.retention=720h
ports:
- containerPort: 9090
protocol: TCP
volumeMounts:
- mountPath: /etc/prometheus/prometheus.yml
name: prometheus-config
subPath: prometheus.yml
- mountPath: /prometheus/
name: prometheus-storage-volume
volumes:
- name: prometheus-config
configMap:
name: prometheus-config
items:
- key: prometheus.yml
path: prometheus.yml
mode: 0644
- name: prometheus-storage-volume
hostPath:
path: /data/prometheusdata
type: Directory
[root@xksmaster1 case]# cat case3-3-prometheus-svc.yaml
---
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: monitoring
labels:
app: prometheus
spec:
type: NodePort
ports:
- port: 9090
targetPort: 9090
nodePort: 30090
protocol: TCP
selector:
app: prometheus
component: server
[root@xksmaster1 case]# kubectl get svc -n monitoring
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
prometheus NodePort 10.96.241.118 <none> 9090:30090/TCP 25m
[root@xksmaster1 case]# kubectl get pods -n monitoring
NAME READY STATUS RESTARTS AGE
cadvisor-ckc52 1/1 Running 1 (70m ago) 4h10m
cadvisor-qqdj9 1/1 Running 1 (71m ago) 4h10m
prometheus-server-7965c5f565-69h9c 1/1 Running 1 (21m ago) 21m
[root@xksmaster1 case]# kubectl get ep -n monitoring
NAME ENDPOINTS AGE
prometheus 10.244.207.100:9090 26m
[root@xksmaster1 case]#
[root@xksmaster1 case]# kubectl get pods -n monitoring
NAME READY STATUS RESTARTS AGE
cadvisor-ckc52 1/1 Running 1 (72m ago) 4h12m
cadvisor-qqdj9 1/1 Running 1 (73m ago) 4h12m
prometheus-server-7965c5f565-69h9c 1/1 Running 1 (23m ago) 23m
[root@xksmaster1 case]# kubectl exec -it prometheus-server-7965c5f565-69h9c sh -n monitoring
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
/prometheus $ id
uid=65534(nobody) gid=65534(nobody)
在prometheus实现kubernetes-apiserver及coredns服务发现


浙公网安备 33010602011771号