SKB路由缓存与SOCK路由缓存2

skb结构体中的成员_skb_refdst用于暂时缓存出口/入口路由,避免在skb生存期中反复查找路由

sock结构体中有两个成员缓存路由:sk_rx_dst缓存入口路由,sk_dst_cache缓存出口路由

 

 

SKB路由缓存

skb_dst_set需要在调用前增加引用计数(dst_clone);而skb_dst_set_noref不需要,其通过标志SKB_DST_NOREF用来标识此缓存没有引用计数,并且在skb_dst_drop函数释放路由缓存时,不进行释放操作。

/**
 * skb_dst_set - sets skb dst
 * @skb: buffer
 * @dst: dst entry
 *
 * Sets skb dst, assuming a reference was taken on dst and should
 * be released by skb_dst_drop()
 */
static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
{
    skb->_skb_refdst = (unsigned long)dst;
}

/**
 * skb_dst_set_noref - sets skb dst, hopefully, without taking reference
 * @skb: buffer
 * @dst: dst entry
 *
 * Sets skb dst, assuming a reference was not taken on dst.
 * If dst entry is cached, we do not take reference and dst_release
 * will be avoided by refdst_drop. If dst entry is not cached, we take
 * reference, so that last dst_release can destroy the dst immediately.
 */
static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
{
    WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
    skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
}

 

 SKB出口路由缓存

1、TCP服务端,在回复客户端SYN+ACK时,新建一个skb结构体,根据路由查询结果(inet_csk_route_req查询出口路由),设置skb路由缓存,此时缓存的为出口路由

tcp_make_synack()
{
    struct inet_request_sock *ireq = inet_rsk(req);
    const struct tcp_sock *tp = tcp_sk(sk);
    skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
    skb_dst_set(skb, dst);
}

 

2、TCP服务端,在发送数据时最后调用ip_queue_xmit;IP层发送数据包时(调用ip_queue_xmit),检测sock结构中出口路由缓存,如果有效,设置到skb结构体中。否则重新进行出口路由查找,然后设置sock以及skb中

也就是:

  • skb有出口路由缓存,则使用
  • sock有出口缓存则copy到skb,使用
  • 否则查找路由;然后设置到sk 以及skb,使用

 

/**
 * skb_rtable - Returns the skb &rtable
 * @skb: buffer
 */
static inline struct rtable *skb_rtable(const struct sk_buff *skb)
{
    return (struct rtable *)skb_dst(skb);
}

 

 

 

int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, __u8 tos)
{
    rt = skb_rtable(skb);
    if (rt)
        goto packet_routed;
    rt = (struct rtable *)__sk_dst_check(sk, 0);
    if (!rt) {
        rt = ip_route_output_ports(net, fl4, sk, daddr, inet->inet_saddr, inet->inet_dport,inet->inet_sport,sk->sk_protocol,---);
        sk_setup_caps(sk, &rt->dst);
    }
    skb_dst_set_noref(skb, &rt->dst);

packet_routed:
------------------------
}
sk_setup_caps(sk, &rt->dst);---->sk_dst_set(sk, dst);
涉及到sk的出口路由缓存
static inline void
sk_dst_set(struct sock *sk, struct dst_entry *dst)
{
    struct dst_entry *old_dst;

    sk_tx_queue_clear(sk);
    sk->sk_dst_pending_confirm = 0;
    old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst);
    dst_release(old_dst);
}

 

 

入口路由缓存

   对于接收到的数据包,一种情况是通过early_demux获取缓存路由,例如,在函数tcp_v4_early_demux中,通过sock结构体成员sk_rx_dst中的路由缓存初始化skb的dst,也就是,此时缓存的为入口路由。使用设置函数skb_dst_set_noref,不增加dst的引用计数。使用关联的sock成员sk_rx_dst的引用计数,可保障在sock存续期间,skb的dst可安全释放;当sock释放时,关联的skb会一并释放。

如果early_demux中找不到路由----则直接查询入口路由(ip_route_input_noref),缓存到skb中

  • 也就是sock中有sk_rx_dst;则将sock结构体成员sk_rx_dst的路由缓存赋值给skb成员_skb_refdst
  • 否则查找fib_lookup
if (net->ipv4.sysctl_ip_early_demux &&
        !skb_dst(skb) &&
        !skb->sk &&
        !ip_is_fragment(iph)) {
        const struct net_protocol *ipprot;
        int protocol = iph->protocol;
        ipprot = rcu_dereference(inet_protos[protocol]);
        if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
            err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
                          udp_v4_early_demux, skb);
        }
    }
    /*
     *    Initialise the virtual path cache for the packet. It describes
     *    how the packet travels inside Linux networking.
     */
    if (!skb_valid_dst(skb)) {
        err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
                       iph->tos, dev);
        if (unlikely(err))
            goto drop_error;
    }

 

 

forwar路由缓存

在查找路由时如果FIB(fib_lookup)查询出的路由类型不是之前的RTN_LOCAL,路由dst的input函数指针设置为ip_forward; output函数指针设置为ip_output。在转发过程中避免重复查找路由

 此过程中涉及到路目的ip路由缓存处理;不看了

struct rtable *rt_dst_alloc(struct net_device *dev,
                unsigned int flags, u16 type,
                bool nopolicy, bool noxfrm)
{
    struct rtable *rt;
    rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
               (nopolicy ? DST_NOPOLICY : 0) |
               (noxfrm ? DST_NOXFRM : 0));
    if (rt) {
        rt->rt_genid = rt_genid_ipv4(dev_net(dev));
        rt->dst.output = ip_output;
        if (flags & RTCF_LOCAL)
            rt->dst.input = ip_local_deliver;
    }
    return rt;
}

static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res,s)
{
    rth = rt_dst_alloc(out_dev->dev, 0, res->type,
               IN_DEV_CONF_GET(in_dev, NOPOLICY),
               IN_DEV_CONF_GET(out_dev, NOXFRM));
    rth->rt_is_input = 1;
    rth->dst.input = ip_forward;
    rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
               do_cache);
    skb_dst_set(skb, &rth->dst); //skb缓存

}

 

 

 

 

 

posted @ 2024-10-14 21:58  codestacklinuxer  阅读(87)  评论(0)    收藏  举报