动态维护FDB表项实现VXLAN通信

参考http://www.just4coding.com/2020/04/20/vxlan-fdb/

基于BGP EVPN的VXLAN通信实践

http://www.just4coding.com/2020/04/26/vxlan-evpn/

下面我们通过实例来手动更新FDB表来实现VXLAN通信。我们的实验环境如下图, VTEP本地使用Linux bridge来挂载连接到network namespace中的veth pair虚拟网卡,我们要实现3.3.3.3二层访问3.3.3.4

分别两台主机执行命令构建环境,Host1上的命令如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
sysctl -w net.ipv4.ip_forward=1
ip netns add ns1
ip link add veth1 type veth peer name eth0 netns ns1
ip netns exec ns1 ip link set eth0 up
ip netns exec ns1 ip link set lo up
ip netns exec ns1 ip addr add 3.3.3.3/24 dev eth0
ip link set up dev veth1
ip link add br1 type bridge
ip link set br1 up
ip link set veth1 master br1
//没有配置vxlan remote
ip link add vxlan100 type vxlan id 100 dstport 4789 local 192.168.33.15 nolearning
ip link set vxlan100 master br1
ip link set up vxlan100

在创建VXLAN设备时指定了nolearning来禁用源地址学习。在Host2中修改相应IP同样进行配置:

1
2
3
4
5
6
7
8
9
10
11
12
13
sysctl -w net.ipv4.ip_forward=1
ip netns add ns1
ip link add veth1 type veth peer name eth0 netns ns1
ip netns exec ns1 ip link set eth0 up
ip netns exec ns1 ip link set lo up
ip netns exec ns1 ip addr add 3.3.3.4/24 dev eth0
ip link set up dev veth1
ip link add br1 type bridge
ip link set br1 up
ip link set veth1 master br1
//没有配置vxlan remote
ip link add vxlan100 type vxlan id 100 dstport 4789 local 192.168.33.16 nolearning
ip link set vxlan100 master br1
ip link set up vxlan100

在Host1的ns1中访问Host2中的3.3.3.4, 此时无法连通:

[root@bogon ~]# ip netns exec ns1 ping -c2 3.3.3.4
PING 3.3.3.4 (3.3.3.4) 56(84) bytes of data.
From 3.3.3.3 icmp_seq=1 Destination Host Unreachable
From 3.3.3.3 icmp_seq=2 Destination Host Unreachable

--- 3.3.3.4 ping statistics ---
2 packets transmitted, 0 received, +2 errors, 100% packet loss, time 1058ms
pipe 2
[root@bogon ~]#
[root@bogon ~]# tcpdump -i vxlan100 arp -env
tcpdump: listening on vxlan100, link-type EN10MB (Ethernet), capture size 262144 bytes
09:15:25.874319 3a:c1:45:47:6a:53 > Broadcast, ethertype ARP (0x0806), length 42: Ethernet (len 6), IPv4 (len 4), Request who-has 3.3.3.4 tell 3.3.3.3, length 28
09:15:26.932482 3a:c1:45:47:6a:53 > Broadcast, ethertype ARP (0x0806), length 42: Ethernet (len 6), IPv4 (len 4), Request who-has 3.3.3.4 tell 3.3.3.3, length 28
09:15:27.972485 3a:c1:45:47:6a:53 > Broadcast, ethertype ARP (0x0806), length 42: Ethernet (len 6), IPv4 (len 4), Request who-has 3.3.3.4 tell 3.3.3.3, length 28
root@ubuntu:/home/ubuntu# ip netns exec ns1  ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: eth0@if442: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 6e:5c:e1:bb:30:82 brd ff:ff:ff:ff:ff:ff link-netnsid 0
    inet 3.3.3.4/24 scope global eth0
       valid_lft forever preferred_lft forever
    inet6 fe80::6c5c:e1ff:febb:3082/64 scope link 
       valid_lft forever preferred_lft forever
root@ubuntu:/home/ubuntu# 

 

[root@bogon ~]# ip netns exec ns1  ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: eth0@if18: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 3a:c1:45:47:6a:53 brd ff:ff:ff:ff:ff:ff link-netnsid 0
    inet 3.3.3.3/24 scope global eth0
       valid_lft forever preferred_lft forever
    inet6 fe80::38c1:45ff:fe47:6a53/64 scope link 
       valid_lft forever preferred_lft forever
[root@bogon ~]#

 

[root@bogon ~]# bridge fdb append 6e:5c:e1:bb:30:82 dev vxlan100 dst 192.168.33.16
[root@bogon ~]# 
root@ubuntu:/home/ubuntu# bridge fdb append 3a:c1:45:47:6a:53 dev vxlan100 dst 192.168.33.15
root@ubuntu:/home/ubuntu# 
[root@bogon ~]# ip netns exec ns1 ping -c2 3.3.3.4
PING 3.3.3.4 (3.3.3.4) 56(84) bytes of data.
^C
--- 3.3.3.4 ping statistics ---
2 packets transmitted, 0 received, 100% packet loss, time 1017ms

[root@bogon ~]# 

 

再添加一个fdb项

 

root@ubuntu:/home/ubuntu# bridge fdb append 00:00:00:00:00:00 dev vxlan100 dst 192.168.33.15
root@ubuntu:/home/ubuntu#
[root@bogon ~]# bridge fdb append 00:00:00:00:00:00 dev vxlan100 dst 192.168.33.16
[root@bogon ~]# 
全零表项表示没有匹配的MAC地址时,就发送到该表项中的VTEP, 用于处理BUM流量。

可以访问了

[root@bogon ~]# ip netns exec ns1 ping -c2 3.3.3.4
PING 3.3.3.4 (3.3.3.4) 56(84) bytes of data.
64 bytes from 3.3.3.4: icmp_seq=1 ttl=64 time=0.542 ms
64 bytes from 3.3.3.4: icmp_seq=2 ttl=64 time=0.136 ms

--- 3.3.3.4 ping statistics ---
2 packets transmitted, 2 received, 0% packet loss, time 1038ms
rtt min/avg/max/mdev = 0.136/0.339/0.542/0.203 ms
[root@bogon ~]# 

 

[root@bogon ~]#  bridge fdb show brport vxlan100
66:7c:ea:83:54:db master br1 
6e:5c:e1:bb:30:82 master br1 
3a:01:87:9f:cb:e3 vlan 1 master br1 permanent
3a:01:87:9f:cb:e3 master br1 permanent
00:00:00:00:00:00 dst 192.168.33.16 self permanent
6e:5c:e1:bb:30:82 dst 192.168.33.16 self permanent
[root@bogon ~]# 

 

[root@bogon ~]# ip netns exec ns1 ip n
3.3.3.4 dev eth0 lladdr 6e:5c:e1:bb:30:82 STALE
[root@bogon ~]# ip netns exec ns1 ip n del 3.3.3.4 dev eth0 lladdr 6e:5c:e1:bb:30:82  --------删掉neighbor项
[root@bogon ~]# ip netns exec ns1 ip n
[root@bogon ~]# ip netns exec ns1 ping -c2 3.3.3.4
PING 3.3.3.4 (3.3.3.4) 56(84) bytes of data.
64 bytes from 3.3.3.4: icmp_seq=1 ttl=64 time=0.521 ms
64 bytes from 3.3.3.4: icmp_seq=2 ttl=64 time=0.129 ms

--- 3.3.3.4 ping statistics ---
2 packets transmitted, 2 received, 0% packet loss, time 1048ms
rtt min/avg/max/mdev = 0.129/0.325/0.521/0.196 ms
[root@bogon ~]#

 

[root@bogon ~]# tcpdump -i vxlan100 arp -env                ------------------- arp报文
tcpdump: listening on vxlan100, link-type EN10MB (Ethernet), capture size 262144 bytes
09:51:52.604485 3a:c1:45:47:6a:53 > Broadcast, ethertype ARP (0x0806), length 42: Ethernet (len 6), IPv4 (len 4), Request who-has 3.3.3.4 tell 3.3.3.3, length 28
09:51:52.604698 6e:5c:e1:bb:30:82 > 3a:c1:45:47:6a:53, ethertype ARP (0x0806), length 42: Ethernet (len 6), IPv4 (len 4), Reply 3.3.3.4 is-at 6e:5c:e1:bb:30:82, length 28
09:51:57.818120 6e:5c:e1:bb:30:82 > 3a:c1:45:47:6a:53, ethertype ARP (0x0806), length 42: Ethernet (len 6), IPv4 (len 4), Request who-has 3.3.3.3 tell 3.3.3.4, length 28
09:51:57.818161 3a:c1:45:47:6a:53 > 6e:5c:e1:bb:30:82, ethertype ARP (0x0806), length 42: Ethernet (len 6), IPv4 (len 4), Reply 3.3.3.3 is-at 3a:c1:45:47:6a:53, length 28

 

 

如果我们能获取MAC所在的VTEP,则可由VXLAN设备实现ARP代答,将ARP广播范围控制在本地,避免ARP广播请求发送到整个VXLAN网络环境中。Linux VXLAN设备支持通过proxy参数开启ARP代答。

root@ubuntu:/home/ubuntu# vi vxlan2.sh
ip link del vxlan100
ip link add vxlan100 type vxlan id 100 dstport 4789 local 192.168.33.16 nolearning proxy
ip link set vxlan100 master br1
ip link set up vxlan100
bridge fdb append 00:00:00:00:00:00 dev vxlan100 dst 192.168.33.15
bridge fdb append 3a:c1:45:47:6a:53 dev vxlan100 dst 192.168.33.15
[root@bogon ~]# vi vxlan2.sh
  1 ip link del vxlan100
  2 ip link add vxlan100 type vxlan id 100 dstport 4789 local 192.168.33.15 nolearning proxy
  3 ip link set vxlan100 master br1
  4 ip link set up vxlan100
  5 bridge fdb append 00:00:00:00:00:00 dev vxlan100 dst 192.168.33.16
  6 bridge fdb append 6e:5c:e1:bb:30:82 dev vxlan100 dst 192.168.33.16

 

[root@bogon ~]# ip netns exec ns1 ping -c2 3.3.3.4
PING 3.3.3.4 (3.3.3.4) 56(84) bytes of data.
From 3.3.3.3 icmp_seq=1 Destination Host Unreachable
From 3.3.3.3 icmp_seq=2 Destination Host Unreachable

--- 3.3.3.4 ping statistics ---
2 packets transmitted, 0 received, +2 errors, 100% packet loss, time 1037ms
pipe 2
[root@bogon ~]# 

[root@bogon ~]# tcpdump -i vxlan100 arp -env
tcpdump: listening on vxlan100, link-type EN10MB (Ethernet), capture size 262144 bytes
10:02:08.774777 3a:c1:45:47:6a:53 > Broadcast, ethertype ARP (0x0806), length 42: Ethernet (len 6), IPv4 (len 4), Request who-has 3.3.3.4 tell 3.3.3.3, length 28
10:02:09.812478 3a:c1:45:47:6a:53 > Broadcast, ethertype ARP (0x0806), length 42: Ethernet (len 6), IPv4 (len 4), Request who-has 3.3.3.4 tell 3.3.3.3, length 28
10:02:10.852463 3a:c1:45:47:6a:53 > Broadcast, ethertype ARP (0x0806), length 42: Ethernet (len 6), IPv4 (len 4), Request who-has 3.3.3.4 tell 3.3.3.3, length 28

vxlan设备代答arp

 

 

 ip neighbor add 3.3.3.4 lladdr 6e:5c:e1:bb:30:82  dev vxlan100

 ip neighbor add 3.3.3.3 lladdr 3a:c1:45:47:6a:53  dev vxlan100

10:06:28.394594 3a:c1:45:47:6a:53 > Broadcast, ethertype ARP (0x0806), length 42: Ethernet (len 6), IPv4 (len 4), Request who-has 3.3.3.4 tell 3.3.3.3, length 28
10:06:28.394608 6e:5c:e1:bb:30:82 > 3a:c1:45:47:6a:53, ethertype ARP (0x0806), length 42: Ethernet (len 6), IPv4 (len 4), Reply 3.3.3.4 is-at 6e:5c:e1:bb:30:82, length 28
6e:5c:e1:bb:30:82不是vxlan100的地址,是3.3.3.4的mac地址
[root@bogon ~]# ip a show vxlan100
21: vxlan100: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master br1 state UNKNOWN group default qlen 1000
    link/ether da:96:9f:d2:0c:5e brd ff:ff:ff:ff:ff:ff
    inet6 fe80::d896:9fff:fed2:c5e/64 scope link 
       valid_lft forever preferred_lft forever
[root@bogon ~]# 

 

Linux的VXLAN设备还支持对于表项匹配MISS的消息通知。内核在发现在ARP或者FDB表项中找不到相应的表项,则可以通过NETLINK消息发送通知,用户态进程可以监听相应消息并补充所缺失的表项记录,从而实现动态的表项维护。VXLAN设备支持两种消息:

  • L2MISS: VXLAN设备在FDB表中找不到目的MAC地址所属的VTEP IP地址。L2MISS消息的发送需要满足如下条件:

    • 目的MAC地址未知,即在FDB表中没有相应条项
    • FDB表中没有全零表项
    • 目的MAC地址不是组播或多播地址
  • L3MISS: VXLAN设备在ARP表中找不到目的IP所对应的MAC地址

我们在bogon上删除vxlan100,重新添加开启l2missl3missvxlan100接口:

[root@bogon ~]# vi vxlan3.sh
  1 ip link del vxlan100
  2 ip link add vxlan100 type vxlan  id 100 dstport 4789 local 192.168.33.15 nolearning proxy l2miss l3miss
  3 ip link set vxlan100 master br1
  4 ip link set up vxlan100
[root@bogon ~]# ip netns exec ns1 ping -c2 3.3.3.4
PING 3.3.3.4 (3.3.3.4) 56(84) bytes of data.
From 3.3.3.3 icmp_seq=1 Destination Host Unreachable
From 3.3.3.3 icmp_seq=2 Destination Host Unreachable

--- 3.3.3.4 ping statistics ---
2 packets transmitted, 0 received, +2 errors, 100% packet loss, time 1077ms
pipe 2
[root@bogon ~]# 
[root@bogon ~]# ip monitor all dev vxlan100
[NEIGH][NEIGH]miss 3.3.3.4  STALE
[NEIGH][NEIGH]lladdr 22:e3:36:8e:f1:d9 REACHABLE
[NEIGH][NEIGH]miss 3.3.3.4  STALE
[NEIGH]miss 3.3.3.4  STALE

 

[root@bogon ~]# bridge fdb show brport vxlan100
0a:c5:33:d1:e9:e7 vlan 1 master br1 permanent
0a:c5:33:d1:e9:e7 master br1 permanent
22:e3:36:8e:f1:d9 master br1 
[root@bogon ~]# 
root@ubuntu:/home/ubuntu#  bridge fdb show brport vxlan100
22:e3:36:8e:f1:d9 vlan 1 master br1 permanent
22:e3:36:8e:f1:d9 master br1 permanent
00:00:00:00:00:00 dst 192.168.33.15 self permanent
3a:c1:45:47:6a:53 dst 192.168.33.15 self permanent
root@ubuntu:/home/ubuntu# 

 

[root@bogon ~]# ip n show dev vxlan100
[root@bogon ~]# ip neighbor add 3.3.3.4 lladdr 6e:5c:e1:bb:30:82  dev vxlan100 nud reachable
[root@bogon ~]# ip n show dev vxlan100
3.3.3.4 lladdr 6e:5c:e1:bb:30:82 REACHABLE
[root@bogon ~]# 
[root@bogon ~]# ip monitor all dev vxlan100
[NEIGH][NEIGH]miss 3.3.3.4  STALE
[NEIGH][NEIGH]lladdr 22:e3:36:8e:f1:d9 REACHABLE
[NEIGH][NEIGH]miss 3.3.3.4  STALE
[NEIGH]miss 3.3.3.4  STALE
[NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH]3.3.3.4 lladdr 6e:5c:e1:bb:30:82 REACHABLE
[NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH]3.3.3.4 lladdr 6e:5c:e1:bb:30:82 STALE

 

[root@bogon ~]# ip neighbor replace 3.3.3.4 lladdr 6e:5c:e1:bb:30:82  dev vxlan100 nud reachable
[root@bogon ~]# 
[NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH]3.3.3.4 lladdr 6e:5c:e1:bb:30:82 REACHABLE
[NEIGH][NEIGH][NEIGH]Deleted lladdr 22:e3:36:8e:f1:d9 STALE

 

[root@bogon ~]# ip neighbor replace 3.3.3.4 lladdr 6e:5c:e1:bb:30:82  dev vxlan100 nud reachable
[root@bogon ~]# ip netns exec ns1 ping -c2 3.3.3.4
PING 3.3.3.4 (3.3.3.4) 56(84) bytes of data.
^C
--- 3.3.3.4 ping statistics ---
2 packets transmitted, 0 received, 100% packet loss, time 1047ms


[root@bogon ~]# bridge fdb append 6e:5c:e1:bb:30:82 dev vxlan100 dst 192.168.33.16
[root@bogon ~]# ip netns exec ns1 ping -c2 3.3.3.4
PING 3.3.3.4 (3.3.3.4) 56(84) bytes of data.
64 bytes from 3.3.3.4: icmp_seq=1 ttl=64 time=0.332 ms
64 bytes from 3.3.3.4: icmp_seq=2 ttl=64 time=0.127 ms

--- 3.3.3.4 ping statistics ---
2 packets transmitted, 2 received, 0% packet loss, time 1047ms
rtt min/avg/max/mdev = 0.127/0.229/0.332/0.103 ms
[root@bogon ~]# 

 

[root@bogon ~]# ip monitor all dev vxlan100
[NEIGH][NEIGH]miss 3.3.3.4  STALE           -------------------  L3MISS的消息,通过配置neighbor
[NEIGH][NEIGH]lladdr 22:e3:36:8e:f1:d9 REACHABLE
[NEIGH][NEIGH]miss 3.3.3.4  STALE
[NEIGH]miss 3.3.3.4  STALE
[NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH]3.3.3.4 lladdr 6e:5c:e1:bb:30:82 REACHABLE
[NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH]3.3.3.4 lladdr 6e:5c:e1:bb:30:82 STALE
[NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH]3.3.3.4 lladdr 6e:5c:e1:bb:30:82 REACHABLE
[NEIGH][NEIGH][NEIGH]Deleted lladdr 22:e3:36:8e:f1:d9 STALE
[NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH]3.3.3.4 lladdr 6e:5c:e1:bb:30:82 STALE
[NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH]3.3.3.4 lladdr 6e:5c:e1:bb:30:82 REACHABLE
[NEIGH]lladdr 6e:5c:e1:bb:30:82 REACHABLE
[NEIGH]miss lladdr 6e:5c:e1:bb:30:82 STALE            -----------------------------通过配置 fdb
[NEIGH]miss lladdr 6e:5c:e1:bb:30:82 STALE
[NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH]3.3.3.4 lladdr 6e:5c:e1:bb:30:82 STALE
[NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH]Deleted ff02::1:ffd1:e9e7 lladdr 33:33:ff:d1:e9:e7 NOARP
[NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH]Deleted ff02::16 lladdr 33:33:00:00:00:16 NOARP
[NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH][NEIGH]??? lladdr 6e:5c:e1:bb:30:82 NOARP,PERMANENT
[NEIGH]lladdr 6e:5c:e1:bb:30:82 NOARP,PERMANENT

 

posted on 2020-07-23 10:09  tycoon3  阅读(1774)  评论(0编辑  收藏  举报

导航