连接建立定时器

1、连接建立定时器:

  tcp 连接建立时, client 会发出syn 然后等待ack,server 收到syn 后会回复ack 同时也会带上新的syn,此时等待客户端回复ack,当时server没有收到ack,server 会超时重发几次synack,最后没有收到ack,导致连接建立将终止。

创建request_sock, 并进入TCP_NEW_SYN_RECV状态后,插入ehash表中,发送synack,并初始化reqsk_timer定时器,准备好重传synack的准备

此时这点 在老内核 新内核上有一些区别!!!!   也就是半链接  会插入到 ehash中,不会像以前那样 会插入到icsk_accept_queue  listen的accept队列中

static void reqsk_queue_hash_req(struct request_sock *req,
                 unsigned long timeout)
{
    req->num_retrans = 0;
    req->num_timeout = 0;
    req->sk = NULL;

    setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req);
    mod_timer_pinned(&req->rsk_timer, jiffies + timeout);

    inet_ehash_insert(req_to_sk(req), NULL);//短链接会频繁操作establish hash表
    /* before letting lookups find us, make sure all req fields
     * are committed to memory and refcnt initialized.
     */
    smp_wmb();
    atomic_set(&req->rsk_refcnt, 2 + 1);
}

/*

启动SYNACK定时器。这便是SYNACK定时器的激活时机,三次握手的详情可见之前的文章。
*/
void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
                   unsigned long timeout)
{
    reqsk_queue_hash_req(req, timeout);
    inet_csk_reqsk_queue_added(sk);
}
static inline void inet_csk_reqsk_queue_added(struct sock *sk)
{
    reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue);
}

static inline void reqsk_queue_added(struct request_sock_queue *queue)
{
    atomic_inc(&queue->young);//没有重传过synack的请求 
    atomic_inc(&queue->qlen);//  目前有多少个未完成握手的请求
}

 

/*
 * 在TCP传输控制块中有一个用于存放连接请求块(处于SYN_RECV状态以及
 * 已连接但未被accept的传输控制块)的容器
 */ 
 //该结构在inet_connection_sock中的icsk_accept_queue
struct request_sock_queue {
    spinlock_t        rskq_lock;
       /*
     * 保存相关套接字TCP层的选项TCP_DEFER_ACCEPT的值,参见
     * TCP_DEFER_ACCEPT
     * 保存的是启用TCP_DEFER_ACCEPT时允许重传SYN+ACK段的次数。
     * 注意:如果启用了TCP_DEFER_ACCEPT选项,将使用rskq_defer_accept
     * 作为允许重传的最大次数,不再是sysctl_tcp_synack_retries,
     * 参见inet_csk_reqsk_queue_prune()。
     */
    u8            rskq_defer_accept;

    u32            synflood_warned;
    atomic_t        qlen;
    atomic_t        young;
 /*
     * rskq_accept_head和rskq_accept_tail表示的链表保存的是
     * 已完成连接建立过程的连接请求块  服务器端accept的时候
    struct sock是从这个队列上面取出来的
      已经建立连接的连接的节点添加到这里,  
      这些链表的节点信息结构体是tcp_request_sock。
      当应用程序调用accept函数后,会从这里面取走这个tcp_request_sock
      当应用程序accept的时候,
      会调用reqsk_queue_get_child取走这个新创建的sock,
      同时就需要把这个取出的tcp_request_sock释放掉
*/

    struct request_sock    *rskq_accept_head;
    struct request_sock    *rskq_accept_tail;
    struct fastopen_queue    fastopenq;  /* Check max_qlen != 0 to determine
                         * if TFO is enabled.
                         */
};

 

 

 

static void reqsk_timer_handler(unsigned long data)
{
    struct request_sock *req = (struct request_sock *)data;
    struct sock *sk_listener = req->rsk_listener;
    struct net *net = sock_net(sk_listener);
    struct inet_connection_sock *icsk = inet_csk(sk_listener);
    struct request_sock_queue *queue = &icsk->icsk_accept_queue;
    int qlen, expire = 0, resend = 0;
    int max_retries, thresh;
    u8 defer_accept;

    if (sk_state_load(sk_listener) != TCP_LISTEN)
        goto drop;
//优先使用TCP_SYNCNT socket选项
    max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
    thresh = max_retries;//默认3-5
    /* Normally all the openreqs are young and become mature
     * (i.e. converted to established socket) for first timeout.
     * If synack was not acknowledged for 1 second, it means
     * one of the following things: synack was lost, ack was lost,
     * rtt is high or nobody planned to ack (i.e. synflood).
     * When server is a bit loaded, queue is populated with old
     * open requests, reducing effective size of queue.
     * When server is well loaded, queue size reduces to zero
     * after several minutes of work. It is not synflood,
     * it is normal operation. The solution is pruning
     * too old entries overriding normal timeout, when
     * situation becomes dangerous.
     *
     * Essentially, we reserve half of room for young
     * embrions; and abort old ones without pity, if old
     * ones are about to clog our table.
     */
    qlen = reqsk_queue_len(queue);//// 目前有多少个未完成握手的请求
    if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) {//没有完成三次握手的数量,超过syn请求队列最大长度的一半
        int young = reqsk_queue_len_young(queue) << 1;//  使用 * 2 而不是 除以 2 作比较
//young  // 没有重传过synack的请求
        while (thresh > 2) {//没重传过的请求大于等待完成三次握手数的一半
            if (qlen < young)//队列中还在等待客户端的第三个ACK报文并且没有超时的请求套接口(young状态)的数量大于当前队列长度的一半,说明队列尚在健康状态
                break;
            thresh--;
            young <<= 1;
        }
    }
    defer_accept = READ_ONCE(queue->rskq_defer_accept);//defer_accept指定的重传次数
    if (defer_accept)
        max_retries = defer_accept;
    syn_ack_recalc(req, thresh, max_retries, defer_accept,
               &expire, &resend);//计算是否需要重传
    req->rsk_ops->syn_ack_timeout(req);
    if (!expire &&//没有超过最大重传次数; 对于defer_accept来说,如果收到ack了,但是一直没有收到数据
        (!resend ||//不需要重传
         !inet_rtx_syn_ack(sk_listener, req) ||/// 需要重传且重传synack成功 --->执行tcp_v4_send_synack 重传 成功
         inet_rsk(req)->acked)) {////重传失败,但是被ack, 说明是defer_accept
        unsigned long timeo;

        if (req->num_timeout++ == 0)
            atomic_dec(&queue->young);//第一次重传,则标记为old
        timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
        mod_timer_pinned(&req->rsk_timer, jiffies + timeo);//指数增加超时时间
        return;
    }
drop://超过最大重传次数,删除这个req,从ehash中删除,并清除定时器
    inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
}

 

 

 

 

/* Decide when to expire the request and when to resend SYN-ACK */
static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
                  const int max_retries,
                  const u8 rskq_defer_accept,
                  int *expire, int *resend)
{
    if (!rskq_defer_accept) {
        //不考虑延时accept的情况下,实现逻辑  超时次数已经大于限定的阈值,说明已经超时,需要销毁此请求套接口
        *expire = req->num_timeout >= thresh;
        *resend = 1;
        return;
    }
    //如果当前的超时次数大于阈值thresh,并且大于最大重传次数(即延时accept--max_retries的次数),判定为超时;同时
    //同时acked等于0(即未接收到单独的ACK报文)也判定为超时,其它情况下判定未超时
    *expire = req->num_timeout >= thresh &&
          (!inet_rsk(req)->acked || req->num_timeout >= max_retries);
    /*
     * Do not resend while waiting for data after ACK,
     * start to resend on end of deferring period to give
     * last chance for data or ACK to create established socket.
     重传resend,如果未接收到单独的ACK报文或者是已到延时accept的最后*/
    *resend = !inet_rsk(req)->acked ||
          req->num_timeout >= rskq_defer_accept - 1;
}

 

延时ACCEPT功能

用户层可通过setsockopt设置延时accept功能

开启此功能,处理逻辑位于函数tcp_check_req中。如果仅仅是接收到客户端回复的第三个握手ACK报文,无数据,不进行处理,设置acked为1。反之如果接收到数据和ACK,进行正常处理,忽略延时accept功能。

struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, bool fastopen)
{
    /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
    if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
        TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
        inet_rsk(req)->acked = 1;
        return NULL
}

 

 

注意:

static void reqsk_queue_hash_req(struct request_sock *req,
                 unsigned long timeout)
{
    req->num_retrans = 0;
    req->num_timeout = 0;
    req->sk = NULL;

    setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req);
    mod_timer_pinned(&req->rsk_timer, jiffies + timeout);

    inet_ehash_insert(req_to_sk(req), NULL);//短链接会频繁操作establish hash表
    /* before letting lookups find us, make sure all req fields
     * are committed to memory and refcnt initialized.
     */
    smp_wmb();
    atomic_set(&req->rsk_refcnt, 2 + 1);
}

/* insert a socket into ehash, and eventually remove another one
 * (The another one can be a SYN_RECV or TIMEWAIT
 */
bool inet_ehash_insert(struct sock *sk, struct sock *osk)
{//--->tcp_hashinfo  需要注意的是tcp_hashinfo.ehash不仅包括已建立连接的TCP套接口,
        //还包括除了在LISTEN状态的其它所有状态的套接口。
    struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
    struct hlist_nulls_head *list;
    struct inet_ehash_bucket *head;
    spinlock_t *lock;
    bool ret = true;

    WARN_ON_ONCE(!sk_unhashed(sk));

    sk->sk_hash = sk_ehashfn(sk);
    head = inet_ehash_bucket(hashinfo, sk->sk_hash);
    list = &head->chain;
    lock = inet_ehash_lockp(hashinfo, sk->sk_hash);//,在当前的Linux TCP实现中,每一个hash bucket拥有一个spinlock 多核cpu 添加删除 的时候 抢占lock 有点麻烦

    spin_lock(lock);
    if (osk) {
        WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
        ret = sk_nulls_del_node_init_rcu(osk);
    }
    if (ret)
        __sk_nulls_add_node_rcu(sk, list);
    spin_unlock(lock);
    return ret;
}


posted @ 2020-05-10 11:37  codestacklinuxer  阅读(372)  评论(0)    收藏  举报