UDP隧道

 

 

隧道创建

对于隧道L2TP、FOU/GUE、GENEVE和VXLAN,隧道创建时,都需要在内核中新建一个UDP套接口,框架中的函数udp_sock_create4提供此功能。不仅是套接口的创建,还有本机接口的绑定bind,以及如果特定隧道提供了对端地址信息,进行连接connect。

int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp)
{ 
err = sock_create_kern(net, AF_INET, SOCK_DGRAM, 0, &sock);

udp_addr.sin_family = AF_INET;
udp_addr.sin_addr = cfg->local_ip;
udp_addr.sin_port = cfg->local_udp_port; 
err = kernel_bind(sock, (struct sockaddr *)&udp_addr, sizeof(udp_addr));

if (cfg->peer_udp_port) { 
udp_addr.sin_family = AF_INET;
udp_addr.sin_addr = cfg->peer_ip;
udp_addr.sin_port = cfg->peer_udp_port;
err = kernel_connect(sock, (struct sockaddr *)&udp_addr, sizeof(udp_addr), 0);
}
sock->sk->sk_no_check_tx = !cfg->use_udp_checksums;
}

 


函数setup_udp_tunnel_sock建立套接口的隧道绑定。此函数将第二个参数socket套接口关联tunnel隧道属性,内核由此套接口接收到的数据包交由配置的encap_rcv回调函数处理(cfg->encap_rcv)。目前基于UDP的隧道协议主要有L2TP、VxLAN和GENEVE,分别注册了接收处理函数l2tp_udp_encap_recv、vxlan_rcv和geneve_udp_encap_recv。通用的UDP隧道协议FOU和GUE,处理函数分别为fou_udp_recv和gue_udp_recv。

void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *cfg)
{
struct sock *sk = sock->sk;

udp_sk(sk)->encap_type = cfg->encap_type;
udp_sk(sk)->encap_rcv = cfg->encap_rcv;
udp_sk(sk)->encap_destroy = cfg->encap_destroy;
udp_sk(sk)->gro_receive = cfg->gro_receive;
udp_sk(sk)->gro_complete = cfg->gro_complete;

udp_tunnel_encap_enable(sock);
}

 

UDP隧道接收

在UDP数据包处理路径中,函数udp_queue_rcv_skb判断当前套接口的udp_encap_needed是否使能,并且encap_type不为0。随即调用绑定在此套接口上的封装数据包回调处理函数encap_rcv进行处理。

static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
    struct udp_sock *up = udp_sk(sk);
 
    if (static_key_false(&udp_encap_needed) && up->encap_type) {
        encap_rcv = READ_ONCE(up->encap_rcv);
        if (encap_rcv) {
            ret = encap_rcv(sk, skb);
    }

 

 

void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, ...)
{
    uh = udp_hdr(skb);
    
    uh->dest = dst_port;
    uh->source = src_port; 
    uh->len = htons(skb->len);
    iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet);
} 
 

 

UDP隧道发送

当数据包到达UDP隧道设备的发送函数(ndo_start_xmit)时,例如GENEVE隧道发送函数(geneve_xmit),进行特定隧道相关处理,之后由通用UDP隧道发送函数udp_tunnel_xmit_skb进行发送。
 

const struct net_device_ops mlx5e_netdev_ops = {
        .ndo_open                = mlx5e_open,
        .ndo_stop                = mlx5e_close,
        .ndo_start_xmit          = mlx5e_xmit,
        .ndo_setup_tc            = mlx5e_setup_tc,
        .ndo_select_queue        = mlx5e_select_queue,
        .ndo_get_stats64         = mlx5e_get_stats,
        .ndo_set_rx_mode         = mlx5e_set_rx_mode,
        .ndo_set_mac_address     = mlx5e_set_mac,
        .ndo_vlan_rx_add_vid     = mlx5e_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid    = mlx5e_vlan_rx_kill_vid,
        .ndo_set_features        = mlx5e_set_features,
        .ndo_fix_features        = mlx5e_fix_features,
        .ndo_change_mtu          = mlx5e_change_nic_mtu,
        .ndo_do_ioctl            = mlx5e_ioctl,
        .ndo_set_tx_maxrate      = mlx5e_set_tx_maxrate,
        .ndo_udp_tunnel_add      = mlx5e_add_vxlan_port,
        .ndo_udp_tunnel_del      = mlx5e_del_vxlan_port,
        .ndo_features_check      = mlx5e_features_check,
        .ndo_tx_timeout          = mlx5e_tx_timeout,
        .ndo_bpf                 = mlx5e_xdp,
        .ndo_xdp_xmit            = mlx5e_xdp_xmit,
        .ndo_xsk_wakeup          = mlx5e_xsk_wakeup,
#ifdef CONFIG_MLX5_EN_ARFS
        .ndo_rx_flow_steer       = mlx5e_rx_flow_steer,
#endif
#ifdef CONFIG_MLX5_ESWITCH
        .ndo_bridge_setlink      = mlx5e_bridge_setlink,
        .ndo_bridge_getlink      = mlx5e_bridge_getlink,

        /* SRIOV E-Switch NDOs */
        .ndo_set_vf_mac          = mlx5e_set_vf_mac,
        .ndo_set_vf_vlan         = mlx5e_set_vf_vlan,
        .ndo_set_vf_spoofchk     = mlx5e_set_vf_spoofchk,
        .ndo_set_vf_trust        = mlx5e_set_vf_trust,
        .ndo_set_vf_rate         = mlx5e_set_vf_rate,
        .ndo_get_vf_config       = mlx5e_get_vf_config,
        .ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
        .ndo_get_vf_stats        = mlx5e_get_vf_stats,
#endif
};

 

UDP隧道Offload

对于支持UDP隧道(VXLAN/GENEVE)Offloading功能的物理网卡,其通过标志位NETDEV_UDP_TUNNEL_PUSH_INFO/NETDEV_UDP_TUNNEL_DROP_INFO进行表示。函数udp_tunnel_push_rx_port与udp_tunnel_drop_rx_port用于设置和取消网卡的Offloading功能。


 

 

void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
                 unsigned short type)
{
    struct sock *sk = sock->sk;
    struct udp_tunnel_info ti;
    if (!dev->netdev_ops->ndo_udp_tunnel_add ||
        !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
        return;
    ti.type = type;
    ti.sa_family = sk->sk_family;
    ti.port = inet_sk(sk)->inet_sport;
    dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
}
EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port);
void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock,
                 unsigned short type)
{
    struct sock *sk = sock->sk;
    struct udp_tunnel_info ti;
    if (!dev->netdev_ops->ndo_udp_tunnel_del ||
        !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
        return;
    ti.type = type;
    ti.sa_family = sk->sk_family;
    ti.port = inet_sk(sk)->inet_sport;
    dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
}

 

static int vxlan_netdevice_event(struct notifier_block *unused,
                                 unsigned long event, void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);

        if (event == NETDEV_UNREGISTER) {
                vxlan_offload_rx_ports(dev, false);
                vxlan_handle_lowerdev_unregister(vn, dev);
        } else if (event == NETDEV_REGISTER) {
                vxlan_offload_rx_ports(dev, true);
        } else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO ||
                   event == NETDEV_UDP_TUNNEL_DROP_INFO) {
                vxlan_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO);
        }

        return NOTIFY_DONE;
}

static void vxlan_offload_rx_ports(struct net_device *dev, bool push)
{
        struct vxlan_sock *vs;
        struct net *net = dev_net(dev);
        struct vxlan_net *vn = net_generic(net, vxlan_net_id);
        unsigned int i;

        spin_lock(&vn->sock_lock);
        for (i = 0; i < PORT_HASH_SIZE; ++i) {
                hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
                        unsigned short type;

                        if (vs->flags & VXLAN_F_GPE)
                                type = UDP_TUNNEL_TYPE_VXLAN_GPE;
                        else
                                type = UDP_TUNNEL_TYPE_VXLAN;

                        if (push)
                                udp_tunnel_push_rx_port(dev, vs->sock, type);
                        else
                                udp_tunnel_drop_rx_port(dev, vs->sock, type);
                }
        }
        spin_unlock(&vn->sock_lock);
}

 

 

static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add)
{
        struct mlx5e_vxlan_work *vxlan_work;

        vxlan_work = kmalloc(sizeof(*vxlan_work), GFP_ATOMIC);
        if (!vxlan_work)
                return;

        if (add)
                INIT_WORK(&vxlan_work->work, mlx5e_vxlan_add_work);
        else
                INIT_WORK(&vxlan_work->work, mlx5e_vxlan_del_work);

        vxlan_work->priv = priv;
        vxlan_work->port = port;
        queue_work(priv->wq, &vxlan_work->work);
}

void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
{
        struct mlx5e_priv *priv = netdev_priv(netdev);

        if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
                return;

        if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
                return;

        mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1);
}

static void mlx5e_vxlan_add_work(struct work_struct *work)
{
        struct mlx5e_vxlan_work *vxlan_work =
                container_of(work, struct mlx5e_vxlan_work, work);
        struct mlx5e_priv *priv = vxlan_work->priv;
        u16 port = vxlan_work->port;

        mutex_lock(&priv->state_lock);
        mlx5_vxlan_add_port(priv->mdev->vxlan, port);
        mutex_unlock(&priv->state_lock);

        kfree(vxlan_work);
}

int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
{
        ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port);
      
}

static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
{
        u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)]   = {0};
        u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0};

        MLX5_SET(add_vxlan_udp_dport_in, in, opcode,
                 MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT);
        MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port);
        return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
}

 

外部UDP隧道

对于非使用ip link系统生成的UDP隧道,即控制通道在外部系统的隧道,如路由系统,其通过ip route encap指定隧道参数,就需要将这些有路由相关的隧道信息保存在路由缓存中。参见UDP框架函数udp_tun_rx_dst,使用metadata_dst结构体保存通用路由信息和隧道信息。
 

struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb,  unsigned short family, __be16 flags, __be64 tunnel_id, int md_size)
{
    struct metadata_dst *tun_dst;
    struct ip_tunnel_info *info;
                         
    tun_dst = ip_tun_rx_dst(skb, flags, tunnel_id, md_size);
 
    info = &tun_dst->u.tun_info;
    info->key.tp_src = udp_hdr(skb)->source;
    info->key.tp_dst = udp_hdr(skb)->dest;
    if (udp_hdr(skb)->check)  
        info->key.tun_flags |= TUNNEL_CSUM;         
} 
 

 

posted on 2021-03-02 10:29  tycoon3  阅读(1175)  评论(0编辑  收藏  举报

导航