📄 tcp_output.c
字号:
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(nskb)->sacked = 0; nskb->csum = 0; nskb->ip_summed = skb->ip_summed; len = 0; while (len < probe_size) { next = tcp_write_queue_next(sk, skb); copy = min_t(int, skb->len, probe_size - len); if (nskb->ip_summed) skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); else nskb->csum = skb_copy_and_csum_bits(skb, 0, skb_put(nskb, copy), copy, nskb->csum); if (skb->len <= copy) { /* We've eaten all the data from this skb. * Throw it away. */ TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags; tcp_unlink_write_queue(skb, sk); sk_stream_free_skb(sk, skb); } else { TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); if (!skb_shinfo(skb)->nr_frags) { skb_pull(skb, copy); if (skb->ip_summed != CHECKSUM_PARTIAL) skb->csum = csum_partial(skb->data, skb->len, 0); } else { __pskb_trim_head(skb, copy); tcp_set_skb_tso_segs(sk, skb, mss_now); } TCP_SKB_CB(skb)->seq += copy; } len += copy; skb = next; } tcp_init_tso_segs(sk, nskb, nskb->len); /* We're ready to send. If this fails, the probe will * be resegmented into mss-sized pieces by tcp_write_xmit(). */ TCP_SKB_CB(nskb)->when = tcp_time_stamp; if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) { /* Decrement cwnd here because we are sending * effectively two packets. */ tp->snd_cwnd--; update_send_head(sk, nskb); icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len); tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq; tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq; return 1; } return -1;}/* This routine writes packets to the network. It advances the * send_head. This happens as incoming acks open up the remote * window for us. * * Returns 1, if no segments are in flight and we have queued segments, but * cannot send anything now because of SWS or another problem. */static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle){ struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; unsigned int tso_segs, sent_pkts; int cwnd_quota; int result; /* If we are closed, the bytes will have to remain here. * In time closedown will finish, we empty the write queue and all * will be happy. */ if (unlikely(sk->sk_state == TCP_CLOSE)) return 0; sent_pkts = 0; /* Do MTU probing. */ if ((result = tcp_mtu_probe(sk)) == 0) { return 0; } else if (result > 0) { sent_pkts = 1; } while ((skb = tcp_send_head(sk))) { unsigned int limit; tso_segs = tcp_init_tso_segs(sk, skb, mss_now); BUG_ON(!tso_segs); cwnd_quota = tcp_cwnd_test(tp, skb); if (!cwnd_quota) break; if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) break; if (tso_segs == 1) { if (unlikely(!tcp_nagle_test(tp, skb, mss_now, (tcp_skb_is_last(sk, skb) ? nonagle : TCP_NAGLE_PUSH)))) break; } else { if (tcp_tso_should_defer(sk, skb)) break; } limit = mss_now; if (tso_segs > 1) { limit = tcp_window_allows(tp, skb, mss_now, cwnd_quota); if (skb->len < limit) { unsigned int trim = skb->len % mss_now; if (trim) limit = skb->len - trim; } } if (skb->len > limit && unlikely(tso_fragment(sk, skb, limit, mss_now))) break; TCP_SKB_CB(skb)->when = tcp_time_stamp; if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC))) break; /* Advance the send_head. This one is sent out. * This call will increment packets_out. */ update_send_head(sk, skb); tcp_minshall_update(tp, mss_now, skb); sent_pkts++; } if (likely(sent_pkts)) { tcp_cwnd_validate(sk); return 0; } return !tp->packets_out && tcp_send_head(sk);}/* Push out any pending frames which were held back due to * TCP_CORK or attempt at coalescing tiny packets. * The socket must be locked by the caller. */void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle){ struct sk_buff *skb = tcp_send_head(sk); if (skb) { if (tcp_write_xmit(sk, cur_mss, nonagle)) tcp_check_probe_timer(sk); }}/* Send _single_ skb sitting at the send head. This function requires * true push pending frames to setup probe timer etc. */void tcp_push_one(struct sock *sk, unsigned int mss_now){ struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = tcp_send_head(sk); unsigned int tso_segs, cwnd_quota; BUG_ON(!skb || skb->len < mss_now); tso_segs = tcp_init_tso_segs(sk, skb, mss_now); cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); if (likely(cwnd_quota)) { unsigned int limit; BUG_ON(!tso_segs); limit = mss_now; if (tso_segs > 1) { limit = tcp_window_allows(tp, skb, mss_now, cwnd_quota); if (skb->len < limit) { unsigned int trim = skb->len % mss_now; if (trim) limit = skb->len - trim; } } if (skb->len > limit && unlikely(tso_fragment(sk, skb, limit, mss_now))) return; /* Send it out now. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) { update_send_head(sk, skb); tcp_cwnd_validate(sk); return; } }}/* This function returns the amount that we can raise the * usable window based on the following constraints * * 1. The window can never be shrunk once it is offered (RFC 793) * 2. We limit memory per socket * * RFC 1122: * "the suggested [SWS] avoidance algorithm for the receiver is to keep * RECV.NEXT + RCV.WIN fixed until: * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)" * * i.e. don't raise the right edge of the window until you can raise * it at least MSS bytes. * * Unfortunately, the recommended algorithm breaks header prediction, * since header prediction assumes th->window stays fixed. * * Strictly speaking, keeping th->window fixed violates the receiver * side SWS prevention criteria. The problem is that under this rule * a stream of single byte packets will cause the right side of the * window to always advance by a single byte. * * Of course, if the sender implements sender side SWS prevention * then this will not be a problem. * * BSD seems to make the following compromise: * * If the free space is less than the 1/4 of the maximum * space available and the free space is less than 1/2 mss, * then set the window to 0. * [ Actually, bsd uses MSS and 1/4 of maximal _window_ ] * Otherwise, just prevent the window from shrinking * and from being larger than the largest representable value. * * This prevents incremental opening of the window in the regime * where TCP is limited by the speed of the reader side taking * data out of the TCP receive queue. It does nothing about * those cases where the window is constrained on the sender side * because the pipeline is full. * * BSD also seems to "accidentally" limit itself to windows that are a * multiple of MSS, at least until the free space gets quite small. * This would appear to be a side effect of the mbuf implementation. * Combining these two algorithms results in the observed behavior * of having a fixed window size at almost all times. * * Below we obtain similar behavior by forcing the offered window to * a multiple of the mss when it is feasible to do so. * * Note, we don't "adjust" for TIMESTAMP or SACK option bytes. * Regular options like TIMESTAMP are taken into account. */u32 __tcp_select_window(struct sock *sk){ struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); /* MSS for the peer's data. Previous versions used mss_clamp * here. I don't know if the value based on our guesses * of peer's MSS is better for the performance. It's more correct * but may be worse for the performance because of rcv_mss * fluctuations. --SAW 1998/11/1 */ int mss = icsk->icsk_ack.rcv_mss; int free_space = tcp_space(sk); int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk)); int window; if (mss > full_space) mss = full_space; if (free_space < full_space/2) { icsk->icsk_ack.quick = 0; if (tcp_memory_pressure) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); if (free_space < mss) return 0; } if (free_space > tp->rcv_ssthresh) free_space = tp->rcv_ssthresh; /* Don't do rounding if we are using window scaling, since the * scaled window will not line up with the MSS boundary anyway. */ window = tp->rcv_wnd; if (tp->rx_opt.rcv_wscale) { window = free_space; /* Advertise enough space so that it won't get scaled away. * Import case: prevent zero window announcement if * 1<<rcv_wscale > mss. */ if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window) window = (((window >> tp->rx_opt.rcv_wscale) + 1) << tp->rx_opt.rcv_wscale); } else { /* Get the largest window that is a nice multiple of mss. * Window clamp already applied above. * If our current window offering is within 1 mss of the * free space we just keep it. This prevents the divide * and multiply from happening most of the time. * We also don't do any window rounding when the free space * is too small. */ if (window <= free_space - mss || window > free_space) window = (free_space/mss)*mss; else if (mss == full_space && free_space > window + full_space/2) window = free_space; } return window;}/* Attempt to collapse two adjacent SKB's during retransmission. */static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now){ struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); /* The first test we must make is that neither of these two * SKB's are still referenced by someone else. */ if (!skb_cloned(skb) && !skb_cloned(next_skb)) { int skb_size = skb->len, next_skb_size = next_skb->len; u16 flags = TCP_SKB_CB(skb)->flags; /* Also punt if next skb has been SACK'd. */ if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) return; /* Next skb is out of window. */ if (after(TCP_SKB_CB(next_skb)->end_seq, tp->snd_una+tp->snd_wnd)) return; /* Punt if not enough space exists in the first SKB for * the data in the second, or the total combined payload * would exceed the MSS. */ if ((next_skb_size > skb_tailroom(skb)) || ((skb_size + next_skb_size) > mss_now)) return; BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); if (WARN_ON(tcp_is_sack(tp) && tp->sacked_out && (TCP_SKB_CB(next_skb)->seq == tp->highest_sack))) return; /* Ok. We will be able to collapse the packet. */ tcp_unlink_write_queue(next_skb, sk); skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), next_skb_size); if (next_skb->ip_summed == CHECKSUM_PARTIAL) skb->ip_summed = CHECKSUM_PARTIAL; if (skb->ip_summed != CHECKSUM_PARTIAL) skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); /* Update sequence range on original skb. */ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; /* Merge over control information. */ flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */ TCP_SKB_CB(skb)->flags = flags; /* All done, get rid of second SKB and account for it so * packet counting does not break. */ TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS) tp->retrans_out -= tcp_skb_pcount(next_skb); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) tp->lost_out -= tcp_skb_pcount(next_skb); /* Reno case is special. Sigh... */ if (tcp_is_reno(tp) && tp->sacked_out) tcp_dec_pcount_approx(&tp->sacked_out, next_skb); tcp_adjust_fackets_out(tp, next_skb, tcp_skb_pcount(next_skb)); tp->packets_out -= tcp_skb_pcount(next_skb); /* changed transmit queue under us so clear hints */ tcp_clear_retrans_hints_partial(tp); /* manually tune sacktag skb hint */ if (tp->fastpath_skb_hint == next_skb) { tp->fastpath_skb_hint = skb; tp->fastpath_cnt_hint -= tcp_skb_pcount(skb); } sk_stream_free_skb(sk, next_skb); }}/* Do a simple retransmit without using the backoff mechanisms in * tcp_timer. This is used for path mtu discovery. * The socket is already locked here. */void tcp_simple_retransmit(struct sock *sk){ const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; unsigned int mss = tcp_current_mss(sk, 0); int lost = 0; tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) break; if (skb->len > mss && !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); } if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); lost = 1; } } } tcp_clear_all_retrans_hints(tp); if (!lost)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -