实践节点亲和和trace

https://imroc.cc/kubernetes/best-practices/performance-optimization/cpu

https://www.cloudpods.org/blog/cgroups-kubernetes-pid-limits/

https://www.zhengwenfeng.com/pages/d9d0ce/#_4-4-手动模拟flannel实现host-gateway模式实验

https://help.aliyun.com/zh/ack/ack-managed-and-ack-dedicated/user-guide/ack-network-fabric-flannel

https://www.thebyte.com.cn/content/chapter1/network-namespace.html

 

trace

https://www.cnblogs.com/linhaostudy/p/16908978.html

https://blog.csdn.net/SweeNeil/article/details/90038286

https://cloud.tencent.com/developer/article/2407426

https://jailbreakfox.github.io/2021/07/05/Linux进程防杀/

 

 

https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/

https://blog.csdn.net/qq_49018210/article/details/141157389

https://ecloud.10086.cn/op-help-center/doc/article/72390#section59542620588

  • preferredDuringSchedulingIgnoredDuringExecution:软策略
  • requiredDuringSchedulingIgnoredDuringExecution:硬策略

操作符取值说明

您可以使用操作符(operator字段)来设置使用规则的逻辑关系,operator取值如下:

  • In:亲和/反亲和对象的标签在标签值列表(values字段)中。
  • NotIn:亲和/反亲和对象的标签不在标签值列表(values字段)中。
  • Exists:亲和/反亲和对象存在指定标签名。
  • DoesNotExist:亲和/反亲和对象不存在指定标签名。
  • Gt:仅在节点亲和性中设置,调度节点的标签值大于列表值 (字符串比较)。
  • Lt:仅在节点亲和性中设置,调度节点的标签值小于列表值 (字符串比较)。

 

@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
/data
  ├── config
  │   ├── app-config.yaml
  │   └── db-config.yaml
  └── certs
      ├── server.crt
      └── server.key
	  
	  
apiVersion: v1
kind: Pod
metadata:
  name: example-pod
spec:
  containers:
  - name: example-container
    image: nginx
    volumeMounts:
    - name: config-volume
      mountPath: /etc/config/app-config.yaml
      subPath: config/app-config.yaml
    - name: certs-volume
      mountPath: /etc/ssl/certs/server.crt
      subPath: certs/server.crt
  volumes:
  - name: config-volume
    hostPath:
      path: /data
  - name: certs-volume
    hostPath:
      path: /data
	  
	  
        volumeMounts:
        - mountPath: /etc/hosts-custom
          name: hosts-custom
        - mountPath: /etc/coredns
          name: config-volume
          readOnly: true
      dnsPolicy: Default
      nodeSelector:
        kubernetes.io/os: linux
      priorityClassName: system-cluster-critical
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext: {}
      serviceAccount: coredns
      serviceAccountName: coredns
      terminationGracePeriodSeconds: 30
      tolerations:
      - key: CriticalAddonsOnly
        operator: Exists
      - effect: NoSchedule
        key: node-role.kubernetes.io/master
      volumes:
      - hostPath:
          path: /etc/hosts-custom
          type: FileOrCreate
        name: hosts-custom
      - configMap:
          defaultMode: 420
          items:
          - key: Corefile
            path: Corefile
          name: coredns
        name: config-volume
		
	
subpath指定configmap的key	
        volumeMounts:
        - mountPath: /etc/nginx/certs.d/ssl.crt
          name: fm-ssl-crt
          subPath: fm-ssl-crt
        - mountPath: /etc/nginx/certs.d/ssl.key
          name: fm-ssl-key
          subPath: fm-ssl-key
      dnsPolicy: ClusterFirst
      imagePullSecrets:
      - name: harbor-ops-qianxin-inc-cn-zion
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext: {}
      terminationGracePeriodSeconds: 30
      volumes:
      - configMap:
          defaultMode: 420
          name: fm-ssl-crt
        name: fm-ssl-crt
      - configMap:
          defaultMode: 420
          name: fm-ssl-key
        name: fm-ssl-key
		

 

on-cpu
1,安装必要工具
# 安装 perf 工具
sudo apt-get install linux-tools-common linux-tools-generic linux-tools-`uname -r`  # Ubuntu/Debian
sudo yum install perf  # CentOS/RHEL


# 下载 FlameGraph 工具
git clone https://github.com/brendangregg/FlameGraph.git
cd FlameGraph

2,收集性能数据
# 记录指定进程的 CPU 栈信息(用实际的 PID 替换 $PID)
sudo perf record -F 99 -p $PID -g -- sleep 60    # 采样60秒
# 或者
sudo perf record -F 99 -a -g -- sleep 60         # 记录所有 CPU 活动

# 对特定命令进行分析
sudo perf record -F 99 -g your_command


3,生成火焰图
# 将 perf.data 转换为可读格式
sudo perf script > out.perf

# 使用 FlameGraph 工具生成火焰图
./FlameGraph/stackcollapse-perf.pl out.perf > out.folded
./FlameGraph/flamegraph.pl out.folded > cpu_flamegraph.svg



off-cpu
1. 指定 PID 方式
# 1. 使用 offcputime 采集数据(指定进程)
/usr/share/bcc/tools/offcputime -f -p $PID 30 > out.stacks

# 或使用 perf 方式
sudo perf record -e sched:sched_stat_sleep -e sched:sched_switch \
    -e sched:sched_process_exit -g -o perf.data -p $PID sleep 30

# 2. 生成火焰图
./FlameGraph/flamegraph.pl --colors=io --title="Off-CPU Time Flame Graph" out.stacks > offcpu_pid.svg

2. 不指定 PID 方式(全系统)
# 1. 使用 offcputime 采集所有进程数据
/usr/share/bcc/tools/offcputime -f 30 > out.stacks

# 或使用 perf 方式记录所有进程
sudo perf record -e sched:sched_stat_sleep -e sched:sched_switch \
    -e sched:sched_process_exit -g -o perf.data -a sleep 30

# 2. 生成火焰图
./FlameGraph/flamegraph.pl --colors=io --title="Off-CPU Time Flame Graph" out.stacks > offcpu_system.svg

3. 查找特定进程的 PID
# 通过进程名查找 PID
pgrep process_name

# 或者更详细的信息
ps aux | grep process_name

# 查看特定端口对应的进程
lsof -i :port_number

# 实时查看进程
top

4. 其他有用的选项
# 按用户过滤(不指定 PID,但只看特定用户的进程)
/usr/share/bcc/tools/offcputime -f -u username 30 > out.stacks

# 指定采样频率(每秒采样次数)
/usr/share/bcc/tools/offcputime -f -F 99 30 > out.stacks

# 指定最小阻塞时间(微秒),过滤掉短时阻塞
/usr/share/bcc/tools/offcputime -f -m 1000 30 > out.stacks

5. 火焰图生成的额外选项
# 生成差异火焰图(比较两次采样)
./FlameGraph/flamegraph.pl --colors=io --differential \
    out.stacks.1 out.stacks.2 > diff_offcpu.svg

# 反转火焰图
./FlameGraph/flamegraph.pl --colors=io --invert \
    out.stacks > inverted_offcpu.svg
	
	
@@@@@@@@@@@@@@@@@@@@@@@@@@@
1. 使用标签选择器(Label Selector)驱逐
# 语法:使用 --pod-selector 指定标签
kubectl drain <node-name> --pod-selector=<label-key>=<label-value>

# 示例:驱逐带有特定标签的 Pod
kubectl drain node1 --pod-selector=app=nginx --ignore-daemonsets

# 驱逐多个标签匹配的 Pod
kubectl drain node1 --pod-selector="app=nginx,environment=prod" --ignore-daemonsets

2. 使用命名空间(Namespace)限制
# 驱逐特定命名空间中的 Pod
kubectl drain node1 --pod-selector=app=mysql --namespace=database

# 组合使用命名空间和标签选择器
kubectl drain node1 --pod-selector=tier=frontend --namespace=production --ignore-daemonsets

3. 使用复杂的标签选择器
# 使用 in 运算符
kubectl drain node1 --pod-selector="app in (nginx,apache)" --ignore-daemonsets

# 使用 notin 运算符
kubectl drain node1 --pod-selector="app notin (mysql,redis)" --ignore-daemonsets

# 使用存在性判断
kubectl drain node1 --pod-selector="environment" --ignore-daemonsets

4. 实用组合命令
# 先查看符合条件的 Pod
kubectl get pods --selector=app=nginx -o wide

# 安全驱逐带宽限期
kubectl drain node1 --pod-selector=app=nginx --grace-period=300 --ignore-daemonsets

# 强制驱逐(慎用)
kubectl drain node1 --pod-selector=app=nginx --force --ignore-daemonsets --delete-local-data


5. 验证和监控
# 检查驱逐前的 Pod
kubectl get pods -l app=nginx -o wide

# 监控驱逐过程
kubectl get pods -l app=nginx -w

# 检查节点状态
kubectl describe node node1


@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# 添加到 crontab
0 3 * * * sync; echo 2 > /proc/sys/vm/drop_caches

# 查看详细的slab使用情况
slabtop -o

# 查看具体的内核对象缓存
cat /proc/slabinfo



vm.vfs_cache_pressure = 200-500

# 查看当前缓存使用情况
cat /proc/meminfo | grep -E "Cached|Dirty|Writeback"

# 查看详细的slab使用
slabtop

# 实时监控内存变化
watch -n 1 'cat /proc/meminfo | grep -E "Cached|Dirty|Writeback"'



# 高并发Web服务器
vm.min_free_kbytes = 1572864   # 1.5GB

# 数据库服务器
vm.min_free_kbytes = 2097152   # 2GB

# 监控可用内存
watch -n 1 'free -m'

# 查看详细内存统计
cat /proc/meminfo | grep -E "MemFree|MemAvailable"

# 监控内存回收活动
vmstat 1

 

  template:
    metadata:
      creationTimestamp: null
      labels:
        k8s-app: kube-dns
    spec:
      affinity:
        nodeAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
          - preference:
              matchExpressions:
              - key: kube-dns
                operator: In
                values:
                - coredns
            weight: 100
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
          - labelSelector:
              matchLabels:
                k8s-app: kube-dns
            topologyKey: kubernetes.io/hostname
      containers:


  template:
    metadata:
      creationTimestamp: null
      labels:
        k8s-app: kube-dns
    spec:
      affinity:
        nodeAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
          - preference:
              matchExpressions:
              - key: node-role.kubernetes.io/control-plane
                operator: Exists
            weight: 100
      containers:




  template:
    metadata:
      creationTimestamp: null
      labels:
        k8s-app: kube-dns
    spec:
      affinity:
        nodeAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
          - preference:
              matchExpressions:
              - key: kube-dns
                operator: Exists
            weight: 100
        podAntiAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
          - weight: 100
            podAffinityTerm:
              labelSelector:
                matchLabels:
                  k8s-app: kube-dns
              topologyKey: kubernetes.io/hostname
      containers:

  

posted on 2024-12-27 20:36  吃草的青蛙  阅读(29)  评论(0)    收藏  举报

导航