TCP输入 之 tcp_rcv_established

概述

tcp_rcv_established用于处理已连接状态下的输入,处理过程根据首部预测字段分为快速路径和慢速路径;

1. 在快路中,对是有有数据负荷进行不同处理:

(1) 若无数据,则处理输入ack,释放该skb,检查是否有数据发送,有则发送;

(2) 若有数据,检查是否当前处理进程上下文,并且是期望读取的数据,若是则将数据复制到用户空间,若不满足直接复制到用户空间的情况,或者复制失败,则需要将数据段加入到接收队列中,加入方式包括合并到已有数据段,或者加入队列尾部,并唤醒用户进程通知有数据可读;

2. 在慢路中,会进行更详细的校验,然后处理ack,处理紧急数据,接收数据段,其中数据段可能包含乱序的情况,最后进行是否有数据和ack的发送检查;

源码分析
  1 he first three cases are guaranteed by proper pred_flags setting,
  2  *    the rest is checked inline. Fast processing is turned on in
  3  *    tcp_data_queue when everything is OK.
  4  */
  5 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
  6              const struct tcphdr *th, unsigned int len)
  7 {
  8     struct tcp_sock *tp = tcp_sk(sk);
  9 
 10     skb_mstamp_get(&tp->tcp_mstamp);
 11     /* 路由为空,则重新设置路由 */
 12     if (unlikely(!sk->sk_rx_dst))
 13         inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
 14     /*
 15      *    Header prediction.
 16      *    The code loosely follows the one in the famous
 17      *    "30 instruction TCP receive" Van Jacobson mail.
 18      *
 19      *    Van's trick is to deposit buffers into socket queue
 20      *    on a device interrupt, to call tcp_recv function
 21      *    on the receive process context and checksum and copy
 22      *    the buffer to user space. smart...
 23      *
 24      *    Our current scheme is not silly either but we take the
 25      *    extra cost of the net_bh soft interrupt processing...
 26      *    We do checksum and copy also but from device to kernel.
 27      */
 28 
 29     tp->rx_opt.saw_tstamp = 0;
 30 
 31     /*    pred_flags is 0xS?10 << 16 + snd_wnd
 32      *    if header_prediction is to be made
 33      *    'S' will always be tp->tcp_header_len >> 2
 34      *    '?' will be 0 for the fast path, otherwise pred_flags is 0 to
 35      *  turn it off    (when there are holes in the receive
 36      *     space for instance)
 37      *    PSH flag is ignored.
 38      */
 39 
 40     /* 快路检查&& 序号正确 && ack序号正确 */
 41     if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
 42         TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
 43         !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
 44         /* tcp头部长度 */
 45         int tcp_header_len = tp->tcp_header_len;
 46 
 47         /* Timestamp header prediction: tcp_header_len
 48          * is automatically equal to th->doff*4 due to pred_flags
 49          * match.
 50          */
 51 
 52         /* Check timestamp */
 53         /* 有时间戳选项 */
 54         if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
 55             /* No? Slow path! */
 56             /* 解析时间戳选项失败,执行慢路 */
 57             if (!tcp_parse_aligned_timestamp(tp, th))
 58                 goto slow_path;
 59 
 60             /* If PAWS failed, check it more carefully in slow path */
 61             /* 序号回转,执行慢路 */
 62             if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
 63                 goto slow_path;
 64 
 65             /* DO NOT update ts_recent here, if checksum fails
 66              * and timestamp was corrupted part, it will result
 67              * in a hung connection since we will drop all
 68              * future packets due to the PAWS test.
 69              */
 70         }
 71 
 72         /* 无数据 */
 73         if (len <= tcp_header_len) {
 74             /* Bulk data transfer: sender */
 75             if (len == tcp_header_len) {
 76                 /* Predicted packet is in window by definition.
 77                  * seq == rcv_nxt and rcv_wup <= rcv_nxt.
 78                  * Hence, check seq<=rcv_wup reduces to:
 79                  */
 80                 /* 
 81                     有时间戳选项
 82                     && 所有接收的数据段均确认完毕 
 83                     保存时间戳
 84                   */
 85                 if (tcp_header_len ==
 86                     (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
 87                     tp->rcv_nxt == tp->rcv_wup)
 88                     tcp_store_ts_recent(tp);
 89 
 90                 /* We know that such packets are checksummed
 91                  * on entry.
 92                  */
 93                 /* 输入ack处理 */
 94                 tcp_ack(sk, skb, 0);
 95                 /* 释放skb */
 96                 __kfree_skb(skb);
 97 
 98                 /* 检查是否有数据要发送,并检查发送缓冲区大小 */
 99                 tcp_data_snd_check(sk);
100                 return;
101             }
102             /* 数据多小,比头部都小,错包 */
103             else { /* Header too small */
104                 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
105                 goto discard;
106             }
107         }
108         /* 有数据 */
109         else {
110             int eaten = 0;
111             bool fragstolen = false;
112 
113             /* 读取进程上下文 */
114             if (tp->ucopy.task == current &&
115                 /* 期待读取的和期待接收的序号一致 */
116                 tp->copied_seq == tp->rcv_nxt &&
117                 /* 数据<= 待读取长度 */
118                 len - tcp_header_len <= tp->ucopy.len &&
119                 /* 控制块被用户空间锁定 */
120                 sock_owned_by_user(sk)) {
121 
122                 /* 设置状态为running??? */
123                 __set_current_state(TASK_RUNNING);
124 
125                 /* 拷贝数据到msghdr */
126                 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
127                     /* Predicted packet is in window by definition.
128                      * seq == rcv_nxt and rcv_wup <= rcv_nxt.
129                      * Hence, check seq<=rcv_wup reduces to:
130                      */
131                     /* 有时间戳选项&& 收到的数据段均已确认,更新时间戳 */
132                     if (tcp_header_len ==
133                         (sizeof(struct tcphdr) +
134                          TCPOLEN_TSTAMP_ALIGNED) &&
135                         tp->rcv_nxt == tp->rcv_wup)
136                         tcp_store_ts_recent(tp);
137 
138                     /* 接收端RTT估算 */
139                     tcp_rcv_rtt_measure_ts(sk, skb);
140 
141                     __skb_pull(skb, tcp_header_len);
142 
143                     /* 更新期望接收的序号 */
144                     tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
145                     NET_INC_STATS(sock_net(sk),
146                             LINUX_MIB_TCPHPHITSTOUSER);
147                     eaten = 1;
148                 }
149             }
150 
151             /* 未拷贝数据到用户空间,或者拷贝失败 */
152             if (!eaten) {
153                 /* 检查校验和 */
154                 if (tcp_checksum_complete(skb))
155                     goto csum_error;
156 
157                 /* skb长度> 预分配长度 */
158                 if ((int)skb->truesize > sk->sk_forward_alloc)
159                     goto step5;
160 
161                 /* Predicted packet is in window by definition.
162                  * seq == rcv_nxt and rcv_wup <= rcv_nxt.
163                  * Hence, check seq<=rcv_wup reduces to:
164                  */
165                 /* 有时间戳选项,且数据均已确认完毕,则更新时间戳 */
166                 if (tcp_header_len ==
167                     (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
168                     tp->rcv_nxt == tp->rcv_wup)
169                     tcp_store_ts_recent(tp);
170 
171                 /* 计算RTT */
172                 tcp_rcv_rtt_measure_ts(sk, skb);
173 
174                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
175 
176                 /* Bulk data transfer: receiver */
177                 /* 数据加入接收队列 */
178                 eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
179                               &fragstolen);
180             }
181 
182             tcp_event_data_recv(sk, skb);
183 
184             /* 确认序号确认了数据 */
185             if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
186                 /* Well, only one small jumplet in fast path... */
187                 /* 处理ack */
188                 tcp_ack(sk, skb, FLAG_DATA);
189                 /* 检查是否有数据要发送,需要则发送 */
190                 tcp_data_snd_check(sk);
191                 /* 没有ack要发送 */
192                 if (!inet_csk_ack_scheduled(sk))
193                     goto no_ack;
194             }
195 
196             /* 检查是否有ack要发送,需要则发送 */
197             __tcp_ack_snd_check(sk, 0);
198 no_ack:
199             /* skb已经复制到用户空间,则释放之 */
200             if (eaten)
201                 kfree_skb_partial(skb, fragstolen);
202 
203             /* 唤醒用户进程有数据读取 */
204             sk->sk_data_ready(sk);
205             return;
206         }
207     }
208 
209 slow_path:
210     /* 长度错误|| 校验和错误 */
211     if (len < (th->doff << 2) || tcp_checksum_complete(skb))
212         goto csum_error;
213 
214     /* 无ack,无rst,无syn */
215     if (!th->ack && !th->rst && !th->syn)
216         goto discard;
217 
218     /*
219      *    Standard slow path.
220      */
221     /* 种种校验 */
222     if (!tcp_validate_incoming(sk, skb, th, 1))
223         return;
224 
225 step5:
226     /* 处理ack */
227     if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
228         goto discard;
229 
230     /* 计算rtt */
231     tcp_rcv_rtt_measure_ts(sk, skb);
232 
233     /* Process urgent data. */
234     /* 处理紧急数据 */
235     tcp_urg(sk, skb, th);
236 
237     /* step 7: process the segment text */
238     /* 数据段处理 */
239     tcp_data_queue(sk, skb);
240 
241     /* 发送数据检查,有则发送 */
242     tcp_data_snd_check(sk);
243 
244     /* 发送ack检查,有则发送 */
245     tcp_ack_snd_check(sk);
246     return;
247 
248 csum_error:
249     TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
250     TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
251 
252 discard:
253     tcp_drop(sk, skb);
254 }

 

posted @ 2019-10-28 14:31  AlexAlex  阅读(2105)  评论(0编辑  收藏  举报