k8s 的网络topology 探测utils

REF:

How to find the network namespace of a veth peer ifindex?

Unix & Linux: How to find the network namespace of a veth peer ifindex? (3 Solutions!!)

Retrieving the netnsid of a network namespace in Python

ubuntu 

apt-get install python3-dev

centos

yum -y install gcc
yum install python3-devel

install python package

pip3 install psutil
pip3 install pyroute2
pip3 install nsenter   # NOTE: this version is too low

update:

yum install util-linux
mv /usr/local/bin/nsenter /usr/local/bin/nsenter_pip
ln -s /usr/bin/nsenter /usr/local/bin/nsenter

Get namespace 

LKNSID=1

id2ns()
{
LKNSID=${1}
cat <<EOF | python3
import psutil
import os
import pyroute2
from pyroute2.netlink import rtnl, NLM_F_REQUEST
from pyroute2.netlink.rtnl import nsidmsg
from nsenter import Namespace

# phase I: gather network namespaces from /proc/[0-9]*/ns/net
netns = dict()
for proc in psutil.process_iter():
    netnsref= '/proc/{}/ns/net'.format(proc.pid)
    netnsid = os.stat(netnsref).st_ino
    if netnsid not in netns:
        netns[netnsid] = netnsref

# phase II: ask kernel "oracle" about the local IDs for the
# network namespaces we've discovered in phase I, doing this
# from all discovered network namespaces
for id, ref in netns.items():
    with Namespace(ref, 'net'):
        ipr = pyroute2.IPRoute()
        for netnsid, netnsref in netns.items():
            with open(netnsref, 'r') as netnsf:
                req = nsidmsg.nsidmsg()
                req['attrs'] = [('NETNSA_FD', netnsf.fileno())]
                resp = ipr.nlm_request(req, rtnl.RTM_GETNSID, NLM_F_REQUEST)
                local_nsid = dict(resp[0]['attrs'])['NETNSA_NSID']
            if local_nsid == $LKNSID:
                print(netnsid)
                break
EOF
}

 

in file

cat > netns.py << EOF
import psutil
import os
import pyroute2
from pyroute2.netlink import rtnl, NLM_F_REQUEST
from pyroute2.netlink.rtnl import nsidmsg
from nsenter import Namespace

# phase I: gather network namespaces from /proc/[0-9]*/ns/net
netns = dict()
for proc in psutil.process_iter():
    netnsref= '/proc/{}/ns/net'.format(proc.pid)
    netnsid = os.stat(netnsref).st_ino
    if netnsid not in netns:
        netns[netnsid] = netnsref

# phase II: ask kernel "oracle" about the local IDs for the
# network namespaces we've discovered in phase I, doing this
# from all discovered network namespaces
for id, ref in netns.items():
    with Namespace(ref, 'net'):
        print('inside net:[{}]...'.format(id))
        ipr = pyroute2.IPRoute()
        for netnsid, netnsref in netns.items():
            with open(netnsref, 'r') as netnsf:
                req = nsidmsg.nsidmsg()
                req['attrs'] = [('NETNSA_FD', netnsf.fileno())]
                resp = ipr.nlm_request(req, rtnl.RTM_GETNSID, NLM_F_REQUEST)
                local_nsid = dict(resp[0]['attrs'])['NETNSA_NSID']
            if local_nsid != 2**32-1:
                print('  net:[{}] <--> nsid {}'.format(netnsid, local_nsid))
EOF

python3 netns.py

 

Get PID for namespace  

ns2pid(){
  echo $(lsns |grep $1|awk '{print $4}')
}

 more

dev2peernsid(){
  echo $(ip -o l |grep cali8f43c28540a | awk '{match($0, /.+link-netnsid\s([^ ]*)/, a);print a[1];exit}')
}

dev2peerpid(){
  nsid=$(ip -o l |grep $1| awk '{match($0, /.+link-netnsid\s([^ ]*)/, a);print a[1];exit}')
  echo $(lsns |grep $(id2ns $nsid)|awk '{print $4}') 
}

dev2peerip(){
  pid=$(dev2peerpid $1)
  PEER=$(nsdev2peer $1)
  IP=$(nsenter -t $pid -n ip -o -c -4 a show dev ${PEER%@*} | awk '{match($0, /inet\s([^ ]*)/, a);print a[1];exit}') 
  echo ${IP%/*}
}

 

对于calico来说,创建的interface都是veth类型

ipnet2dev(){
  echo $(ip route | grep $1 | awk '{match($0, /.+dev\s([^ ]*)/, a);print a[1];exit}')
}

dev2ipnet(){
  echo $(ip route | grep $1| awk '{print $1}')
}

devtype(){
  echo $(ethtool -i $1| grep "driver:" |awk '{print $2}')
}

# https://unix.stackexchange.com/questions/441876/how-to-find-the-network-namespace-of-a-veth-peer-ifindex
# ifindex=$(nsenter -t $pid -n ip link | sed -n -e 's/.*eth0@if\([0-9]*\):.*/\1/p')

ifindex(){
  echo $( ip link | sed -n -e 's/.*'"$1"'@if\([0-9]*\):.*/\1/p')
}

# veth=$(ip -o link | grep ^$ifindex | sed -n -e 's/.*\(veth[[:alnum:]]*@if[[:digit:]]*\).*/\1/p')

vethpeer(){
  echo $(ip -o link | grep ^$1 | sed -n -e 's/.*\(veth[[:alnum:]]*@if[[:digit:]]*\).*/\1/p')
}

index2peer(){
  for peer in `ls /sys/class/net/`; do
    INDEX=`cat /sys/class/net/$peer/ifindex`
    if [[ $INDEX == $1 ]]; then
      echo $peer 
      return 0
    fi
  done
  echo "Error, not find peer"
  return 1
}

index2peer(){
  echo $(ip -o link | grep ^${1}: | awk -F'[: ]' '{print $3}')
}

# for namesapce 
# nsenter -t $pid -n ip -o link | grep ^4: | sed -n -e 's/.*: \(.*@if[[:digit:]]*\).*/\1/p'
# usage:
# index2peer $link_index $pid
nsindex2peer(){
  echo $(nsenter -t ${2} -n ip -o link | grep ^${1}: | awk -F'[: ]' '{print $3}') 
}

 

ipnet2peer

ipnet2peer(){
  echo $(index2peer $(ifindex $(ipnet2dev $1)))
}

# nsipnet2peer 10.243.179.114 
nsipnet2peer(){
  pid=$(dev2peerpid $(ipnet2dev $1) )
  echo $(nsindex2peer $(ifindex $(ipnet2dev $1)) $pid) 
}

#  similar to vethpeer cali8f43c28540a in same namespace
# nsipnet2peer cali8f43c28540a
nsdev2peer(){
  pid=$(dev2peerpid $1)
  echo $(nsindex2peer $(ifindex $1) $pid) 
}

for calico

kubectl get pod $POD -o=jsonpath='{.metadata.annotations}' |jq '.["cni.projectcalico.org/podIP"]'

kubectl get pod $POD -o custom-columns=IP:status.podIP

kubectl get pod $POD -o=jsonpath='{.status.podIP}'


function getpodip()
{
  PODINFO=`kubectl get pod -A -o wide |grep $1`
  NMSP=`awk '{print $1}' <<< $PODINFO`
  NM=`awk '{print $2}' <<< $PODINFO`
  echo $(kubectl -n $NMSP get pod $NM -o=jsonpath='{.status.podIP}')
}

 Sniffer container in HOST  

CNFPOD=`kubectl get pod -l sdewanPurpose=sdewan-cnf -o name`

# 2 ways to find PID of container
# 1. 
kubectl get pods -o go-template --template="{{range .items}}{{.status.containerStatuses}}{{end}}"
CID=`kubectl get pod ${CNFPOD##*/} -o go-template --template="{{ (index .status.containerStatuses 0).containerID}}"`
# echo ${CID##*/}
pid=$(docker inspect -f '{{.State.Pid}}' ${CID##*/})

# 2.
# kubectl get pods -o go-template --template="{{ (index .status.containerStatuses 'containerID') }}"
# containerID
PODNAME=k8s_sdewan_${CNFPOD##*/}
CID=`docker ps |grep $PODNAME| awk '{print $1}'`
pid=$(docker inspect -f '{{.State.Pid}}' ${CID})


# 2 ways to run command in container
# 1.
nsenter -t $pid -n tcpdump -i net3 -nv

# 2.
mkdir -p /var/run/netns/
ln -sfT /proc/$pid/ns/net /var/run/netns/[$CID]
ip netns exec [$CID] ip a
ip netns exec [$CID] tcpdump -i net3 -nv

 

pid to conatiner name:

# https://stackoverflow.com/questions/24406743/coreos-get-docker-container-name-by-pid/24408480
# something wrong, can not works
pid2cid(){
  echo $(docker ps -q | xargs docker inspect --format '{{.State.Pid}}, {{.ID}}' | grep "^${1},")
}

pid2cname(){
  DOCINFO=$(cat /proc/${1}/cgroup |head -n 1)
  DOCINFO=${DOCINFO##*-}
  DOCINFO=${DOCINFO%%.*}
  echo $(docker inspect --format '{{.Name}}' ${DOCINFO} | sed 's/^\///')
}

dev2peercname(){
  echo $(pid2cname $(dev2peerpid $1))
}

# something wrong with pid2cid , can not works
dev2peercid(){
  echo $(pid2cid $(dev2peerpid $1))
}

For namespace:

 

DEV=cali8f43c28540a

nsdev2peer $DEV
dev2ipnet $DEV
dev2peerpid $DEV
dev2peercname $DEV
dev2peerip $DEV

 result 

# ip r
DEV=cali8f43c28540a
# nsdev2peer $DEV
eth0@if13745
# dev2ipnet $DEV
10.243.179.114
# dev2peerpid $DEV
72783
# dev2peercname $DEV
k8s_sdewan_sdewan-cnf-69cb655b64-ntlck_default_1f67c45a-770d-42ba-a8c6-d860c234b7a8_0
# dev2peerip $DEV
10.243.179.114

REF: 

calio  

calico网络原理、组网方式和使用  

【Calico系列】1 Calico官网文章列表  

【Calico系列】2 BGP入门笔记

【Calico系列】3 Calico的组件、架构与原理   

【Calico系列】4 数据中心网络简述  

Kubernetes中的网络解析——以calico为例   

白话flannel和calico网络原理  

容器网络Calico进阶实践 | 褚向阳   

calico网络模型中的路由原理   

flannel 和 calico 网络原理    

Calico  

kubernetes网络之---Calico原理解读   

 github

Flannel源码分析   

Flannel 源码分析   介绍了 添加 ARP 表,FDB 表,替换路由  

kubernetes flannel代码解析  详细的介绍 VXLAN 详细流程  

flannel 源码分析  

Flannel配置详解 

理解Kubernetes网络之Flannel网络    

而是由linux kernel引发一个”L3 MISS”事件并将arp请求发到用户空间的flanned程序。

 如果fdb中没有这个信息,那么kernel会向用户空间的flanned程序发起”L2 MISS”事件。

https://www.slideshare.net/enakai/how-vxlan-works-on-linux

github 资料

flannel github  

Documentation  

Documentation/kubernetes.md   

https://github.com/coreos/flannel/blob/master/main.go 

 

nsenter命令简介 

容器内抓包定位网络问题  

 

 

  

 

posted @ 2020-12-04 21:57  lvmxh  阅读(338)  评论(0编辑  收藏  举报