深入理解TCP三次握手
一、概述
在本篇文章中我们主要研究TCP协议如何进行连接的建立。我们知道如果应用层要使用TCP进行通信,首先要进行连接的建立,在这个过程中需要进行所谓的"三次握手"。三次握手的示意图如下所示:

在进行网络通信的时候,客户端(client)和服务器端(server)都是通过socket这层操作系统提供的抽象进行编程,那么他们是如何通过socket来建立连接的呢?注意我们只研究TCP协议。下图展示了通过socket客户端和服务器端如何进行通信。

可以看到如果通过socket进行网络通信,在客户端调用connect的时候,会和服务器端建立连接。那么我们主要的研究内容就是connect这个系统调用。
二、__sys_connect
上次的实验告诉我们,socketcall(int call, unsigned long *args) 是我们调用socket相关系统调用的入口函数,这个函数的call参数决定具体调用的哪个函数,一些主要函数的具体的对应关系如下所示:
1 __sys_socket ---- call = 1 2 __sys_bind ---- call = 2 3 __sys_connect ---- call = 3 4 __sys_listen ---- call = 4 5 __sys_accept ---- call = 5
当客户端调用系统调用connect连接服务端的时候,经过socket系统调用入口函数的分发,最终调用的是__sys_connect()函数,下面来分析这个函数。
1 SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, int, addrlen) 2 { 3 struct socket *sock; 4 struct sockaddr_storage address; 5 int err, fput_needed; 6 7 //根据fd,找到对应的socket。11 sock = sockfd_lookup_light(fd, &err, &fput_needed); 12 if (!sock) 13 goto out; 14 15 16 err = move_addr_to_kernel(uservaddr, addrlen, &address); 17 if (err < 0) 18 goto out_put; 19 20 err = security_socket_connect(sock, (struct sockaddr *)&address, addrlen); 21 if (err) 22 goto out_put; 23 24 // 调用sock的连接函数27 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, sock->file->f_flags); 28 29 out_put: 30 fput_light(sock->file, fput_needed); 31 32 out: 33 return err; 34 }
上述代码比较重要的是调用了 sockfd_lookup_light()和 sock->ops->connect()函数。首先sockfd_lookup_light()找到具体的socket实例。socket实例结构体如下:
1 struct socket { 2 socket_state state; 3 unsigned long flags; 4 const struct proto_ops * ops; 5 struct fasync_struct * fasync_list; 6 struct file * file; 7 struct sock * sk; 8 wait_queue_head_t wait; 9 short type; 10 };
其中的ops是具体协议的操作函数指针。里面包含了对应协议的操作函数。在sock->ops->connect()处打上断点,进入其中看一下。
可以看到他去了实现IPV4协议的底层函数中去。调用的是inet_stream_connect()。
1 int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) 2 { 3 int err; 4 5 lock_sock(sock->sk); 6 err = __inet_stream_connect(sock, uaddr,addr_len, flags); 7 release_sock(sock->sk); 8 return err; 9 }
其中调用了__inet_stream_connect函数。
1 int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, 2 int addr_len, int flags, int is_sendmsg) 3 { 4 struct sock *sk = sock->sk; 5 int err; 6 long timeo; 7 8 /* 9 * uaddr can be NULL and addr_len can be 0 if: 10 * sk is a TCP fastopen active socket and 11 * TCP_FASTOPEN_CONNECT sockopt is set and 12 * we already have a valid cookie for this socket. 13 * In this case, user can call write() after connect(). 14 * write() will invoke tcp_sendmsg_fastopen() which calls 15 * __inet_stream_connect(). 16 */ 17 if (uaddr) { 18 if (addr_len < sizeof(uaddr->sa_family)) 19 return -EINVAL; 20 21 if (uaddr->sa_family == AF_UNSPEC) { 22 err = sk->sk_prot->disconnect(sk, flags); 23 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; 24 goto out; 25 } 26 } 27 28 switch (sock->state) { 29 default: 30 err = -EINVAL; 31 goto out; 32 case SS_CONNECTED: 33 err = -EISCONN; 34 goto out; 35 case SS_CONNECTING: 36 if (inet_sk(sk)->defer_connect) 37 err = is_sendmsg ? -EINPROGRESS : -EISCONN; 38 else 39 err = -EALREADY; 40 /* Fall out of switch with err, set for this state */ 41 break; 42 case SS_UNCONNECTED: //没有建立连接 43 err = -EISCONN; 44 if (sk->sk_state != TCP_CLOSE) 45 goto out; 46 47 if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { 48 err = sk->sk_prot->pre_connect(sk, uaddr, addr_len); 49 if (err) 50 goto out; 51 } 52 53 err = sk->sk_prot->connect(sk, uaddr, addr_len); //这一行发送一个报文,这是TCP连接的第一步 54 if (err < 0) 55 goto out; 56 57 sock->state = SS_CONNECTING; 58 59 if (!err && inet_sk(sk)->defer_connect) 60 goto out; 61 62 /* Just entered SS_CONNECTING state; the only 63 * difference is that return value in non-blocking 64 * case is EINPROGRESS, rather than EALREADY. 65 */ 66 err = -EINPROGRESS; 67 break; 68 } 69 70 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 71 72 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { //客户端发出请求连接后,等待服务端的响应。 73 int writebias = (sk->sk_protocol == IPPROTO_TCP) && 74 tcp_sk(sk)->fastopen_req && 75 tcp_sk(sk)->fastopen_req->data ? 1 : 0; 76 77 /* Error code is set above */ 78 if (!timeo || !inet_wait_for_connect(sk, timeo, writebias)) //等待第二次连接 79 goto out; 80 81 err = sock_intr_errno(timeo); 82 if (signal_pending(current)) 83 goto out; 84 } 85 86 /* Connection was closed by RST, timeout, ICMP error 87 * or another process disconnected us. 88 */ 89 if (sk->sk_state == TCP_CLOSE) 90 goto sock_error; 91 92 /* sk->sk_err may be not zero now, if RECVERR was ordered by user 93 * and error was received after socket entered established state. 94 * Hence, it is handled normally after connect() return successfully. 95 */ 96 97 sock->state = SS_CONNECTED; //连接建立成功 98 err = 0; 99 out: 100 return err; 101 102 sock_error: 103 err = sock_error(sk) ? : -ECONNABORTED; 104 sock->state = SS_UNCONNECTED; 105 if (sk->sk_prot->disconnect(sk, flags)) 106 sock->state = SS_DISCONNECTING; 107 goto out; 108 }
上述的代码其实也很直接,检查socket的状态,如果没有建立连接,则发送一个syn报文,这是三次握手的第一次握手。将自己睡眠,然后等待服务器端的回应,若是服务器端的回应到达,则完成连接,将socket的状态改为已连接。至此connect系统调用已经完成。
三 、__sys_accept
服务器端调用accpet来阻塞的接收请求,调用accept系统调用时,根据socketcall的分发,具体调用的是__sys_accept4函数。
1 int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, 2 int __user *upeer_addrlen, int flags) 3 { 4 struct socket *sock, *newsock; 5 struct file *newfile; 6 int err, len, newfd, fput_needed; 7 struct sockaddr_storage address; 8 9 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 10 return -EINVAL; 11 12 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 13 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 14 15 sock = sockfd_lookup_light(fd, &err, &fput_needed); //查找服务器端监听的socket 16 if (!sock) 17 goto out; 18 19 err = -ENFILE; 20 newsock = sock_alloc(); //分配一个socket 21 if (!newsock) 22 goto out_put; 23 24 newsock->type = sock->type; 25 newsock->ops = sock->ops; 26 27 /* 28 * We don't need try_module_get here, as the listening socket (sock) 29 * has the protocol module (sock->ops->owner) held. 30 */ 31 __module_get(newsock->ops->owner); 32 33 newfd = get_unused_fd_flags(flags); 34 if (unlikely(newfd < 0)) { 35 err = newfd; 36 sock_release(newsock); 37 goto out_put; 38 } 39 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name); 40 if (IS_ERR(newfile)) { 41 err = PTR_ERR(newfile); 42 put_unused_fd(newfd); 43 goto out_put; 44 } 45 46 err = security_socket_accept(sock, newsock); 47 if (err) 48 goto out_fd; 49 50 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false); //调用socket上对应的accept 51 if (err < 0) 52 goto out_fd; 53 54 if (upeer_sockaddr) { 55 len = newsock->ops->getname(newsock, 56 (struct sockaddr *)&address, 2); 57 if (len < 0) { 58 err = -ECONNABORTED; 59 goto out_fd; 60 } 61 err = move_addr_to_user(&address, 62 len, upeer_sockaddr, upeer_addrlen); 63 if (err < 0) 64 goto out_fd; 65 } 66 67 /* File flags are not inherited via accept() unlike another OSes. */ 68 69 fd_install(newfd, newfile); 70 err = newfd; 71 72 out_put: 73 fput_light(sock->file, fput_needed); 74 out: 75 return err; 76 out_fd: 77 fput(newfile); 78 put_unused_fd(newfd); 79 goto out_put; 80 }
这段代码的流程也很简单,就是分配一个新的socket对象,这个socket的新对象当有新连接进来时,用于与客户端进行连接,然后调用inet_accept():
1 int inet_accept(struct socket *sock, struct socket *newsock, int flags, 2 bool kern) 3 { 4 struct sock *sk1 = sock->sk; 5 int err = -EINVAL; 6 struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err, kern); //调用inet_csk_accept 7 8 if (!sk2) 9 goto do_err; 10 11 lock_sock(sk2); 12 13 sock_rps_record_flow(sk2); 14 WARN_ON(!((1 << sk2->sk_state) & 15 (TCPF_ESTABLISHED | TCPF_SYN_RECV | 16 TCPF_CLOSE_WAIT | TCPF_CLOSE))); 17 18 sock_graft(sk2, newsock); 19 20 newsock->state = SS_CONNECTED; //建立连接 21 err = 0; 22 release_sock(sk2); 23 do_err: 24 return err; 25 }
这个函数调用net/ipv4/inet_connection_sock.c文件中的inet_csk_accept函数,监听连接。
1 struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) 2 { 3 struct inet_connection_sock *icsk = inet_csk(sk); 4 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 5 struct request_sock *req; 6 struct sock *newsk; 7 int error; 8 9 lock_sock(sk); 10 11 /* We need to make sure that this socket is listening, 12 * and that it has something pending. 13 */ 14 error = -EINVAL; 15 if (sk->sk_state != TCP_LISTEN) 16 goto out_err; 17 18 /* Find already established connection */ 19 if (reqsk_queue_empty(queue)) { 20 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 21 22 /* If this is a non blocking socket don't sleep */ 23 error = -EAGAIN; 24 if (!timeo) 25 goto out_err; 26 27 error = inet_csk_wait_for_connect(sk, timeo); //等待连接 28 if (error) 29 goto out_err; 30 } 31 req = reqsk_queue_remove(queue, sk); 32 newsk = req->sk; 33 34 if (sk->sk_protocol == IPPROTO_TCP && 35 tcp_rsk(req)->tfo_listener) { 36 spin_lock_bh(&queue->fastopenq.lock); 37 if (tcp_rsk(req)->tfo_listener) { 38 /* We are still waiting for the final ACK from 3WHS 39 * so can't free req now. Instead, we set req->sk to 40 * NULL to signify that the child socket is taken 41 * so reqsk_fastopen_remove() will free the req 42 * when 3WHS finishes (or is aborted). 43 */ 44 req->sk = NULL; 45 req = NULL; 46 } 47 spin_unlock_bh(&queue->fastopenq.lock); 48 }
这个函数等待客户端的连接,并且一旦有连接进来,发送报文返回客户端,这是第二次握手的过程。根据38-42行的注释,知道此时服务器端等待客户端发回第三次请求,若是收到请求,则完成整个TCP的建立过程。
至此整个三次握手的过程分析完毕。
参考自 https://www.geeksforgeeks.org/tcp-3-way-handshake-process/

浙公网安备 33010602011771号