深入理解TCP协议及其源代码

1.TCP协议栈的访问接口函数

  结构体变量struct proto tcp_prot指定了TCP协议栈的访问接口函数,它几乎包含了所有要用到的东西。

struct proto tcp_prot = {
	.name			= "TCP",
	.owner			= THIS_MODULE,
	.close			= tcp_close,
	.pre_connect		= tcp_v4_pre_connect,
	.connect		= tcp_v4_connect,
	.disconnect		= tcp_disconnect,
	.accept			= inet_csk_accept,
	.ioctl			= tcp_ioctl,
	.init			= tcp_v4_init_sock,
	.destroy		= tcp_v4_destroy_sock,
	.shutdown		= tcp_shutdown,
	.setsockopt		= tcp_setsockopt,
	.getsockopt		= tcp_getsockopt,
	.keepalive		= tcp_set_keepalive,
	.recvmsg		= tcp_recvmsg,
	.sendmsg		= tcp_sendmsg,
	.sendpage		= tcp_sendpage,
	.backlog_rcv		= tcp_v4_do_rcv,
	.release_cb		= tcp_release_cb,
	.hash			= inet_hash,
	.unhash			= inet_unhash,
	.get_port		= inet_csk_get_port,
	.enter_memory_pressure	= tcp_enter_memory_pressure,
	.leave_memory_pressure	= tcp_leave_memory_pressure,
	.stream_memory_free	= tcp_stream_memory_free,
	.sockets_allocated	= &tcp_sockets_allocated,
	.orphan_count		= &tcp_orphan_count,
	.memory_allocated	= &tcp_memory_allocated,
	.memory_pressure	= &tcp_memory_pressure,
	.sysctl_mem		= sysctl_tcp_mem,
	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
	.max_header		= MAX_TCP_HEADER,
	.obj_size		= sizeof(struct tcp_sock),
	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
	.twsk_prot		= &tcp_timewait_sock_ops,
	.rsk_prot		= &tcp_request_sock_ops,
	.h.hashinfo		= &tcp_hashinfo,
	.no_autobind		= true,
#ifdef CONFIG_COMPAT
	.compat_setsockopt	= compat_tcp_setsockopt,
	.compat_getsockopt	= compat_tcp_getsockopt,
#endif
	.diag_destroy		= tcp_abort,
};

  前面介绍了inet_init函数是TCP/IP协议栈初始化的入口函数,通过fs_initcall(inet_init)函数将inet_init函数注册进initcalls的table,可以注册内置的几种协议(TCP,UDP,RAW,PING)。

static int __init inet_init(void)
{
    ...
    rc = proto_register(&tcp_prot, 1);//注册TCP
    if (rc)
        goto out_free_reserved_ports;

    rc = proto_register(&udp_prot, 1);//注册UDP
    if (rc)
        goto out_unregister_tcp_proto;

    rc = proto_register(&raw_prot, 1);//注册RAW
    if (rc)
        goto out_unregister_udp_proto;

    rc = proto_register(&ping_prot, 1);//注册PING
    if (rc)
        goto out_unregister_raw_proto;
    ...
}

  注册函数proto_register调用kmem_cache_create函数完成TCP协议的slab缓存建立。

int proto_register(struct proto *prot, int alloc_slab)
{
    if (alloc_slab) {//注册协议时必须已经分配slab缓存
        prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
                    SLAB_HWCACHE_ALIGN | prot->slab_flags,
                    NULL);

        if (prot->slab == NULL) {
            pr_crit("%s: Can't create sock SLAB cache!\n",
                prot->name);
            goto out;
        }
...
}

2.TCP协议的初始化工作

  从用户程序的角度看,TCP的三次握手就是客户端发起connect请求,然后由服务端用accept接收请求这一过程中完成的工作。而客户端的connect在tcp_prot中对应的则是tcp_v4_connect函数,服务端的accept对应的则是inet_csk_accept函数。

(1)tcp_v4_connect函数

  调用IP层提供的一些服务;调用tcp_connect来构造SYN并发送出去。

/* This will initiate an outgoing connection. */
 
//主要作用就是发起一个TCP连接
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
    ···
    rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
                  RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
                  IPPROTO_TCP,
                  orig_sport, orig_dport, sk);
    ···
    rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
                   inet->inet_sport, inet->inet_dport, sk);
 
    ···
    //以上调用一些IP层提供的一些服务
    ···
    if (!tp->write_seq && likely(!tp->repair))
        //计算初始序号
        tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
                               inet->inet_daddr,
                               inet->inet_sport,
                               usin->sin_port);
     inet->inet_id = tp->write_seq ^ jiffies;
     //调用tcp_connect来构造SYN,并将它发送出去。
    err = tcp_connect(sk); 
    rt = NULL;
    if (err)
        goto failure; 
    return 0; 
failure:
    /*
     * This unhashes the socket and releases the local port,
     * if necessary.
     */
    tcp_set_state(sk, TCP_CLOSE);
    ip_rt_put(rt);
    sk->sk_route_caps = 0;
    inet->inet_dport = 0;
    return err;
}
tcp_connect函数

  构造携带SYN标志位的TCP头并发送出去;设置计时器超时重发。

/* Build a SYN and send it off. */
int tcp_connect(struct sock *sk)
{
    ··· 
    /* Send off SYN; include data in Fast Open. */
    err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
 
          //tcp_transmit_skb将tcp数据发送出去
          tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
    if (err == -ECONNREFUSED)
        return err;
 
    /* We change tp->snd_nxt after the tcp_transmit_skb() call
     * in order to make this packet get counted in tcpOutSegs.
     */
    tp->snd_nxt = tp->write_seq;
    tp->pushed_seq = tp->write_seq;
    TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
 
    /* Timer for repeating the SYN until an answer. */ 
    //启动重传定时器
    inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
                  inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
    return 0;
}
(2)inet_csk_accept函数
struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
{...
    if (sk->sk_state != TCP_LISTEN)//判断是否是LISTEN状态
        goto out_err;

    /* Find already established connection */
    if (reqsk_queue_empty(queue)) {//判断请求队列是否为空
        long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);

        /* If this is a non blocking socket don't sleep */
        error = -EAGAIN;
        if (!timeo)
            goto out_err;

        error = inet_csk_wait_for_connect(sk, timeo);//为空则阻塞,执行此函数等待连接
        if (error)
            goto out_err;
    
}
EXPORT_SYMBOL(inet_csk_accept);
(3)负责接收处理数据的入口tcp_v4_rcv

  TCP/IP协议栈的初始化入口函数inet_init中涉及了tcp_protocol结构体变量。

static int __init inet_init(void)
{
    ...
if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
		pr_crit("%s: Cannot add TCP protocol\n", __func__);
    ···
}

  而在tcp_protocol中的handler被赋值为tcp_v4_rev。

static struct net_protocol tcp_protocol = {
	.early_demux	=	tcp_v4_early_demux,
	.early_demux_handler =  tcp_v4_early_demux,
	.handler	=	tcp_v4_rcv,
	.err_handler	=	tcp_v4_err,
	.no_policy	=	1,
	.netns_ok	=	1,
	.icmp_strict_tag_validation = 1,
};
int tcp_v4_rcv(struct sk_buff *skb)
{
    ...
 
    if (sk->sk_state == TCP_NEW_SYN_RECV) {
        struct request_sock *req = inet_reqsk(sk);
        struct sock *nsk;
 
        sk = req->rsk_listener;
        if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
            sk_drops_add(sk, skb);
            reqsk_put(req);
            goto discard_it;
        }
        if (unlikely(sk->sk_state != TCP_LISTEN)) {
            inet_csk_reqsk_queue_drop_and_put(sk, req);
            goto lookup;
        }
        /* We own a reference on the listener, increase it again
         * as we might lose it too soon.
         */
        sock_hold(sk);
        refcounted = true;
 
        //创建新的sock进入TCP_SYN_RECV state
        nsk = tcp_check_req(sk, skb, req, false);
        if (!nsk) {
            reqsk_put(req);
            goto discard_and_relse;
        }
        if (nsk == sk) {
            reqsk_put(req);
 
        //调用 tcp_rcv_state_process
        } else if (tcp_child_process(sk, nsk, skb)) {
            tcp_v4_send_reset(nsk, skb);
            goto discard_and_relse;
        } else {//成功后直接返回
            sock_put(sk);
            return 0;
        }
    }
}

参考链接:
https://github.com/mengning/net/blob/master/np2019.md
https://blog.csdn.net/u010039418/article/details/79350421

posted on 2019-12-26 17:26  MinHui  阅读(514)  评论(0)    收藏  举报

导航