Prometheus常用查询

Prometheus常用查询

集群指标

# CPU核心总数
sum(machine_cpu_cores{cluster="$cluster",job="kubelet"}) by(cluster)

# CPU请求总数
sum(kube_pod_container_resource_requests{cluster="$cluster",job="kube-state-metrics",unit="core"}) by (cluster)

# CPU限制总数
sum (kube_pod_container_resource_limits{cluster="$cluster",job="kube-state-metrics",unit="core"}) by (cluster)


# 内存总量
sum(machine_memory_bytes{cluster="$cluster",job="kubelet"}) by (cluster)

# 内存请求总量
sum(kube_pod_container_resource_requests{cluster="$cluster",job="kube-state-metrics",unit="byte"})

# 内存限制总量
sum(kube_pod_container_resource_limits{cluster="$cluster",job="kube-state-metrics",unit="byte"})


# 磁盘分区已用空间
sum(node_filesystem_avail_bytes{cluster="$cluster",mountpoint=~"/var/log/pods|/data|/"} and on (instance) (label_replace(up{cluster="$cluster",job="kubelet",metrics_path="/metrics/probes"},"instance","$1:9100","node","([0-9.]+)"))) by (mountpoint,cluster)

# 磁盘分区总空间
sum(node_filesystem_size_bytes{cluster="$cluster",mountpoint=~"/var/log/pods|/data|/"} and on (instance) (label_replace(up{cluster="$cluster",job="kubelet",metrics_path="/metrics/probes"},"instance","$1:9100","node","([0-9.]+)"))) by (mountpoint,cluster)

Pod 指标

# CPU 使用率
sum by (namespace,pod) (irate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",pod=~"$pod",image!=""}[5m]))
/
sum by (namespace,pod) (kube_pod_container_resource_limits{cluster="$cluster",namespace=~"$namespace",pod=~"$pod",unit="core"}) * 100

# CPU 资源请求
sum by (namespace,pod) (kube_pod_container_resource_requests{cluster="$cluster",namespace=~"$namespace",pod=~"$pod",unit="core"})

# CPU 资源限制
sum by (namespace,pod) (kube_pod_container_resource_limits{cluster="$cluster",namespace=~"$namespace",pod=~"$pod",unit="core"})


# Memory 使用率
sum(container_memory_working_set_bytes{cluster="$cluster",image!="",namespace=~"$namespace",pod=~"$pod",image!=""}) by(pod, namespace)
/
sum(kube_pod_container_resource_limits{cluster="$cluster",namespace=~"$namespace",pod=~"$pod"}) by(pod, namespace) * 100 != +inf

# Memory 资源请求
sum by (namespace,pod) (kube_pod_container_resource_requests{cluster="$cluster",namespace=~"$namespace",pod=~"$pod",unit="byte"})

# Memory 资源限制
sum by (namespace,pod) (kube_pod_container_resource_limits{cluster="$cluster",namespace=~"$namespace",pod=~"$pod",unit="byte"})


# Pod 1小时内重启次数
delta(kube_pod_container_status_restarts_total{cluster="$cluster",job="kube-state-metrics",namespace=~"$namespace",pod=~"$pod"}[1h]) > 0
posted @ 2023-07-17 09:27  liy36  阅读(661)  评论(0)    收藏  举报