20220306作业 prometheus1

1.二进制安装prometheus

mkdir /apps/
ln -sf  prometheus-2.33.4.linux-amd64 prometheus
检查配置文件
cd prometheus;./prometheus check config prometheus.yml
创建service文件
vim /etc/systemd/system/prometheus.service 
[Unit]
Description=Prometheus Server 
Documentation=https://prometheus.io/docs/introduction/overview/
After=network.target
[Service]
Restart=on-failure
WorkingDirectory=/apps/prometheus/
ExecStart=/apps/prometheus/prometheus --config.file=/apps/prometheus/prometheus.yml
[Instal1]
WantedBy=multi-user.target
启动prometheus并设置开机自启动
systemctl start prometheus ;systemctl enable prometheus

2.二进制安装node_export

mkdir /apps
ln -sf node_exporter-1.3.1.linux-amd64 node_exporter
创建service文件
vim /etc/systemd/system/node-exporter.service
[Unit]
Description=Prometheus Node Exporter 
After=network.target
[Service]
ExecStart=/apps/node_exporter/node_exporter
[Instal1]
WantedBy=multi-user.target
启动node-export 并设置开机自启动
systemctl status node-exporter ; systemctl enable node-exporter

3.添加node节点收集数据

vim /apps/prometheus/prometheus.yml
  - job_name: "prometheus-node"
	static_configs:
  		- targets: ["192.168.1.242:9100"]

4.安装grafana

wget https://dl.grafana.com/enterprise/release/grafana-enterprise-8.4.3-1.x86_64.rpm
sudo yum install grafana-enterprise-8.4.3-1.x86_64.rpm
启动grafana并设置开机自启动
systemctl restart grafana-server;systemctl enable grafana-server

5.daemon部署node-export

先创建命名空间
kubectl create namespace monitoring
根据yaml创建node-export
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: node-exporter
  namespace: monitoring 
  labels:
    k8s-app: node-exporter
spec:
  selector:
    matchLabels:
        k8s-app: node-exporter
  template:
    metadata:
      labels:
        k8s-app: node-exporter
    spec:
      tolerations:
        - effect: NoSchedule
          key: node-role.kubernetes.io/master
      containers:
      - image: prom/node-exporter:v1.3.1 
        imagePullPolicy: IfNotPresent
        name: prometheus-node-exporter
        ports:
        - containerPort: 9100
          hostPort: 9100
          protocol: TCP
          name: metrics
        volumeMounts:
        - mountPath: /host/proc
          name: proc
        - mountPath: /host/sys
          name: sys
        - mountPath: /host
          name: rootfs
        args:
        - --path.procfs=/host/proc
        - --path.sysfs=/host/sys
        - --path.rootfs=/host
      volumes:
        - name: proc
          hostPath:
            path: /proc
        - name: sys
          hostPath:
            path: /sys
        - name: rootfs
          hostPath:
            path: /
      hostNetwork: true
      hostPID: true
---
apiVersion: v1
kind: Service
metadata:
  annotations:
    prometheus.io/scrape: "true"
  labels:
    k8s-app: node-exporter
  name: node-exporter
  namespace: monitoring 
spec:
  type: NodePort
  ports:
  - name: http
    port: 9100
    nodePort: 39100
    protocol: TCP
  selector:
    k8s-app: node-exporter

6.deployment 部署prometheus server

创建configmap:
---
kind: ConfigMap
apiVersion: v1
metadata:
  labels:
    app: prometheus
  name: prometheus-config
  namespace: monitoring 
data:
  prometheus.yml: |
    global:
      scrape_interval: 15s
      scrape_timeout: 10s
      evaluation_interval: 1m
    scrape_configs:
    - job_name: 'kubernetes-node'
      kubernetes_sd_configs:
      - role: node
      relabel_configs:
      - source_labels: [__address__]
        regex: '(.*):10250'
        replacement: '${1}:9100'
        target_label: __address__
        action: replace
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)
    - job_name: 'kubernetes-node-cadvisor'
      kubernetes_sd_configs:
      - role:  node
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)
      - target_label: __address__
        replacement: kubernetes.default.svc:443
      - source_labels: [__meta_kubernetes_node_name]
        regex: (.+)
        target_label: __metrics_path__
        replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor

    - job_name: 'kubernetes-service-endpoints'
      kubernetes_sd_configs:
      - role: endpoints
      relabel_configs:
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
        action: keep
        regex: true
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
        action: replace
        target_label: __scheme__
        regex: (https?)
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
        action: replace
        target_label: __metrics_path__
        regex: (.+)
      - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
        action: replace
        target_label: __address__
        regex: ([^:]+)(?::\d+)?;(\d+)
        replacement: $1:$2
      - action: labelmap
        regex: __meta_kubernetes_service_label_(.+)
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: kubernetes_namespace
      - source_labels: [__meta_kubernetes_service_name]
        action: replace
        target_label: kubernetes_name



    - job_name: 'kubernetes-apiserver'
      kubernetes_sd_configs:
      - role: endpoints
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
        action: keep
        regex: default;kubernetes;https

创建serviceaccount:
kubectl create serviceaccount monitor -n monitoring
账号授权:
kubectl create clusterrolebinding monitor-clusterrolebinding -n monitoring --clusterrole=cluster-admin --serviceaccount=monitoring:monitor
部署prometheus server 
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: prometheus-server
  namespace: monitoring
  labels:
    app: prometheus
spec:
  replicas: 1
  selector:
    matchLabels:
      app: prometheus
      component: server
    #matchExpressions:
    #- {key: app, operator: In, values: [prometheus]}
    #- {key: component, operator: In, values: [server]}
  template:
    metadata:
      labels:
        app: prometheus
        component: server
      annotations:
        prometheus.io/scrape: 'false'
    spec:
      #nodeName: 192.168.1.73
      serviceAccountName: monitor
      containers:
      - name: prometheus
        image: prom/prometheus:v2.31.2
        imagePullPolicy: IfNotPresent
        command:
          - prometheus
          - --config.file=/etc/prometheus/prometheus.yml
          - --storage.tsdb.path=/prometheus
          - --storage.tsdb.retention=720h
        ports:
        - containerPort: 9090
          protocol: TCP
        volumeMounts:
        - mountPath: /etc/prometheus/prometheus.yml
          name: prometheus-config
          subPath: prometheus.yml
        - mountPath: /prometheus/
          name: prometheus-storage-volume
      volumes:
        - name: prometheus-config
          configMap:
            name: prometheus-config
            items:
              - key: prometheus.yml
                path: prometheus.yml
                mode: 0644
        - name: prometheus-storage-volume
          nfs:
            server: 192.168.1.76
            path: /data/prometheus

7.daemon部署cadvisor

docker load -i cadvisor-v0.39.2.tar.gz
docker tag gcr.io/cadvisor/cadvisor:v0.39.2 192.168.1.138/baseimages/cadvisor:v0.39.2
docker push 192.168.1.138/baseimages/cadvisor:v0.39.2
kubectl apply -f case1-daemonset-deploy-cadvisor.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: cadvisor
  namespace: monitoring
spec:
  selector:
    matchLabels:
      app: cAdvisor
  template:
    metadata:
      labels:
        app: cAdvisor
    spec:
      tolerations:    #污点容忍,忽略master的NoSchedule
        - effect: NoSchedule
          key: node-role.kubernetes.io/master
      hostNetwork: true
      restartPolicy: Always   # 重启策略
      containers:
      - name: cadvisor
        image: 192.168.1.138/baseimages/cadvisor:v0.39.2 
        imagePullPolicy: IfNotPresent  # 镜像策略
        ports:
        - containerPort: 8080
        volumeMounts:
          - name: root
            mountPath: /rootfs
          - name: run
            mountPath: /var/run
          - name: sys
            mountPath: /sys
          - name: docker
            mountPath: /var/lib/docker
      volumes:
      - name: root
        hostPath:
          path: /
      - name: run
        hostPath:
          path: /var/run
      - name: sys
        hostPath:
          path: /sys
      - name: docker
        hostPath:
          path: /var/lib/docker

手动增加cAdvisor监控
- job_name: "prometheus-pod"
  static_configs:
  	- targets:["192.168.1.34:8080","192.168.1.36:8080","192.168.1.37:8080","192.168.1.72:8080","192.168.1.73:8080","192.168.1.74:8080"]

8.ProQL语句

8.1 数据分类
	瞬时向量,瞬时数据(instant vector):是一组时间序列,每个时间序列包含单个数据样本
	范围向量,范围数据(range vector):是指在任何一个时间范围内,抓取的所有度量指标数据,比如最近一天的网卡流量
	标量、纯量数据(scalar):是一个浮点数类型的数据值,使用node_load1获取到时一个瞬时向量,但是可使用内置函数scalar()将瞬时向量转换为标量,例如:scalar(sum(node_load1))
	字符串(string):字符串类型的数据,目前使用较少
	
8.2 数据类型
	counter:
		计数器,counter类型代表一个累积的指标数据,在没有被重置的前提下只增不减,比如磁盘io总数,nginx的请求总数,网卡流经的报文总数等
	Gauge:
		仪表盘,Gauge类型代表一个可以任意变化的指标数据,值可以随时增高或减少,如带宽速录,cpu负载、内存利用率,nginx活动连接数等
	Histogram:
		累积直方图,Histogram会在一段时间范围内对数据进行采样(通常是请求持续时间或响应大小等),假如每分钟产生一个当前的活跃连接数,那么一天就会产生1440个数据,查看数据的每间隔的绘图跨度为2小时,2点的柱状图包括0-2小时内的数据,4点包括0-4小时的数据
	Summary:摘要,也是一组数据,统计的不是区间的个数而是统计分位数,从0-1,表示的是0%-100%,如下统计的是0、0.25、0.5、0.75、1的数据量分别是多少
	
8.3 指标数据
node_memory_MemFree_bytes #查询node节点的总剩余内存
node_memory_MemFree_bytes{instance="192.168.1.34:9100"} #指定节点查询

8.4 匹配器
	= : 选择与提供的字符串完全相同的标签,精确匹配
		node_memory_MemFree_bytes{instance="192.168.1.34:9100"}
	!= :选择与提供的字符串不相同的标签,取反
		node_memory_MemFree_bytes{instance!="192.168.1.34:9100"}
	=~ : 选择正则表达式与提供的字符串相匹配的标签
		node_memory_MemFree_bytes{instance=~"192.168.1.*:9100"}
	!~ : 选择正则表达式与提供的字符串不匹配的标签
		node_memory_MemFree_bytes{instance!~"192.168.1.*:9100"}
	
8.5 时间范围
	s - 秒
	m - 分钟
	h - 小时
	d - 天
	w - 周
	y - 年

8.6 运算符
	+ 
	-
	*
	/
	%
	^等
8.7 聚合运算
	max() 最大值;min() 最小值 ; avg() 平均值
	sum() 求和 count() 统计返回值总数  count_values() 对value个数进行统计 #count_values("node_os_version",node_os_version)
	abs() 返回指标数据的值  absent() 如果监控指标有数据返回空,没有返回1,用于对监控项设置告警
	stdddev() 标准差 stdvar() 求方差
	topk() 样本值排名最大的N个数据 #topk(5,promhttp_metric_handler_requests_total) bottomk() 样本值排名最小的N个数据
	rate() 专门用来搭配counter数据类型使用,功能是取counter数据类型在这个时间段中平均每秒增量平均数 irate() 专门用来搭配counter数据类型使用,功能是取counter数据类型在这个时间段中平均每秒的峰值
	by 在计算结果中,只保留by指定的标签值;without() 与by相反

9.prometheus 的服务发现机制

常用的服务发现:https://prometheus.io/docs/prometheus/latest/configuration/configuration/
9.1 kubernetes_sd_config
	k8s内部prometheus配置服务发现见 6
	k8s外部prometheus配置服务发现
		创建用户,获取到证书
		自动发现配置如下:

  - job_name: 'kubernetes-apiservers-monitor' 
    kubernetes_sd_configs: 
    - role: endpoints
      api_server: https://192.168.1.198:6443
      tls_config: 
        insecure_skip_verify: true  
      bearer_token_file: /apps/prometheus/k8s.token 
    scheme: https 
    tls_config: 
      insecure_skip_verify: true 
    bearer_token_file: /apps/prometheus/k8s.token 
    relabel_configs: 
    - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] 
      action: keep 
      regex: default;kubernetes;https 
    - target_label: __address__ 
      replacement: 192.168.1.198:6443



  - job_name: 'kubernetes-nodes-monitor' 
    scheme: http 
    tls_config: 
      insecure_skip_verify: true 
    bearer_token_file: /apps/prometheus/k8s.token 
    kubernetes_sd_configs: 
    - role: node 
      api_server: https://192.168.1.198:6443 
      tls_config: 
        insecure_skip_verify: true 
      bearer_token_file: /apps/prometheus/k8s.token 
    relabel_configs: 
      - source_labels: [__address__] 
        regex: '(.*):10250' 
        replacement: '${1}:9100' 
        target_label: __address__ 
        action: replace 
      - source_labels: [__meta_kubernetes_node_label_failure_domain_beta_kubernetes_io_region] 
        regex: '(.*)' 
        replacement: '${1}' 
        action: replace 
        target_label: LOC 
      - source_labels: [__meta_kubernetes_node_label_failure_domain_beta_kubernetes_io_region] 
        regex: '(.*)' 
        replacement: 'NODE' 
        action: replace 
        target_label: Type 
      - source_labels: [__meta_kubernetes_node_label_failure_domain_beta_kubernetes_io_region] 
        regex: '(.*)' 
        replacement: 'K3S-test' 
        action: replace 
        target_label: Env 
      - action: labelmap 
        regex: __meta_kubernetes_node_label_(.+) 





  - job_name: 'kubernetes-pods-monitor' 
    kubernetes_sd_configs: 
    - role: pod 
      api_server: https://192.168.1.198:6443 
      tls_config: 
        insecure_skip_verify: true 
      bearer_token_file: /apps/prometheus/k8s.token 
    relabel_configs: 
    - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] 
      action: keep 
      regex: true 
    - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] 
      action: replace 
      target_label: __metrics_path__ 
      regex: (.+) 
    - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] 
      action: replace 
      regex: ([^:]+)(?::\d+)?;(\d+) 
      replacement: $1:$2 
      target_label: __address__ 
    - action: labelmap 
      regex: __meta_kubernetes_pod_label_(.+) 
    - source_labels: [__meta_kubernetes_namespace] 
      action: replace 
      target_label: kubernetes_namespace 
    - source_labels: [__meta_kubernetes_pod_name] 
      action: replace 
      target_label: kubernetes_pod_name 
    - source_labels: [__meta_kubernetes_pod_label_pod_template_hash] 
      regex: '(.*)' 
      replacement: 'K8S-test' 
      action: replace 
      target_label: Env
9.2 consul_sd_config
	nohup  ./consul agent -server -bootstrap -bind=192.168.1.191 -client=192.168.1.191 -data-dir=/data/consul -ui -node=192.168.1.191 &
	nohup ./consul agent -bind=192.168.1.192 -client=192.168.1.192 -data-dir=/data/consul -node=192.168.1.192 -join=192.168.1.191  &
	nohup ./consul agent -bind=192.168.1.193 -client=192.168.1.193 -data-dir=/data/consul -node=192.168.1.193 -join=192.168.1.191 &
	
	二进制部署prometheus中consul配置:
点击查看代码
  - job_name: consul
    honor_labels: true
    metrics_path: /metrics
    scheme: http
    consul_sd_configs:
      - server: 192.168.1.191:8500
        services: []  #发现的目标服务名称,空为所有服务,可以写servicea,servcieb,servicec
      - server: 192.168.1.192:8500
        services: []
      - server: 192.168.1.193:8500
        services: []
    relabel_configs:
    - source_labels: ['__meta_consul_tags']
      target_label: 'product'
    - source_labels: ['__meta_consul_dc']
      target_label: 'idc'
    - source_labels: ['__meta_consul_service']
      regex: "consul"
      action: drop
	测试写入数据:
		curl -X PUT -d '{"id": "node-exporter72","name": "node-exporter72","address": "192.168.1.72","port":9100,"tags": ["node-exporter"],"checks": [{"http": "http://192.168.1.72:9100/","interval": "5s"}]}' http://192.168.1.191:8500/v1/agent/service/register

		curl -X PUT -d '{"id": "node-exporter73","name": "node-exporter73","address": "192.168.1.73","port":9100,"tags": ["node-exporter"],"checks": [{"http": "http://192.168.1.73:9100/","interval": "5s"}]}' http://192.168.1.191:8500/v1/agent/service/register

		curl -X PUT -d '{"id": "node-exporter74","name": "node-exporter74","address": "192.168.1.74","port":9100,"tags": ["node-exporter"],"checks": [{"http": "http://192.168.1.74:9100/","interval": "5s"}]}' http://192.168.1.191:8500/v1/agent/service/register


		curl -X PUT -d '{"id": "cadvisor72","name": "cadvisor72","address": "192.168.1.72","port":8080,"tags": ["cadvisor"],"checks": [{"http": "http://192.168.1.72:8080/","interval": "5s"}]}' http://192.168.1.191:8500/v1/agent/service/register
	删除数据:
		curl --request PUT http://192.168.1.191:8500/v1/agent/service/deregister/node-exporter72
9.3 dns_sd_config
	  - job_name: 'webapp'
		dns_sd_configs:
 			- names: [ 'www.92.com']
   			  type: A
   			  port: 9100
	
9.4 file_sd_config
sd_my_server.json
[
  {
    "targets":  ["192.168.1.72:9100","192.168.1.73:9100","192.168.1.74:9100"]
  }
]
prometheus.yml
  - job_name: 'file_ds'   #此处定义了自动发现的采集任务名称,可以依据自己的业务定义多个自动发现任务
    file_sd_configs:
      - files:
        - /apps/prometheus/file_sd/sd_my_server.json         #采集文件路径
        refresh_interval: 10s #自动发现间隔时间,默认5m
9.5 static_config
	  - job_name: "prometheus-pod"
	    static_configs:
	      - targets: ["192.168.1.198:8080","192.168.1.36:8080","192.168.1.37:8080","192.168.1.72:8080","192.168.1.73:8080","192.168.1.74:8080"]
posted @ 2022-03-08 20:54  没有猫的猫奴  阅读(24)  评论(0)    收藏  举报