📄 tcp_output.c
字号:
/* Advertise enough space so that it won't get scaled away. * Import case: prevent zero window announcement if * 1<<rcv_wscale > mss. */ if (((window >> tp->rcv_wscale) << tp->rcv_wscale) != window) window = (((window >> tp->rcv_wscale) + 1) << tp->rcv_wscale); } else { /* Get the largest window that is a nice multiple of mss. * Window clamp already applied above. * If our current window offering is within 1 mss of the * free space we just keep it. This prevents the divide * and multiply from happening most of the time. * We also don't do any window rounding when the free space * is too small. */ if (window <= free_space - mss || window > free_space) window = (free_space/mss)*mss; } return window;}/* Attempt to collapse two adjacent SKB's during retransmission. */static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now){ struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *next_skb = skb->next; /* The first test we must make is that neither of these two * SKB's are still referenced by someone else. */ if (!skb_cloned(skb) && !skb_cloned(next_skb)) { int skb_size = skb->len, next_skb_size = next_skb->len; u16 flags = TCP_SKB_CB(skb)->flags; /* Also punt if next skb has been SACK'd. */ if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) return; /* Next skb is out of window. */ if (after(TCP_SKB_CB(next_skb)->end_seq, tp->snd_una+tp->snd_wnd)) return; /* Punt if not enough space exists in the first SKB for * the data in the second, or the total combined payload * would exceed the MSS. */ if ((next_skb_size > skb_tailroom(skb)) || ((skb_size + next_skb_size) > mss_now)) return; BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); /* Ok. We will be able to collapse the packet. */ __skb_unlink(next_skb, next_skb->list); memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); if (next_skb->ip_summed == CHECKSUM_HW) skb->ip_summed = CHECKSUM_HW; if (skb->ip_summed != CHECKSUM_HW) skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); /* Update sequence range on original skb. */ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; /* Merge over control information. */ flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */ TCP_SKB_CB(skb)->flags = flags; /* All done, get rid of second SKB and account for it so * packet counting does not break. */ TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS) tcp_dec_pcount(&tp->retrans_out, next_skb); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) { tcp_dec_pcount(&tp->lost_out, next_skb); tcp_dec_pcount(&tp->left_out, next_skb); } /* Reno case is special. Sigh... */ if (!tp->sack_ok && tcp_get_pcount(&tp->sacked_out)) { tcp_dec_pcount_approx(&tp->sacked_out, next_skb); tcp_dec_pcount(&tp->left_out, next_skb); } /* Not quite right: it can be > snd.fack, but * it is better to underestimate fackets. */ tcp_dec_pcount_approx(&tp->fackets_out, next_skb); tcp_packets_out_dec(tp, next_skb); sk_stream_free_skb(sk, next_skb); }}/* Do a simple retransmit without using the backoff mechanisms in * tcp_timer. This is used for path mtu discovery. * The socket is already locked here. */ void tcp_simple_retransmit(struct sock *sk){ struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb; unsigned int mss = tcp_current_mss(sk, 0); int lost = 0; sk_stream_for_retrans_queue(skb, sk) { if (skb->len > mss && !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tcp_dec_pcount(&tp->retrans_out, skb); } if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tcp_inc_pcount(&tp->lost_out, skb); lost = 1; } } } if (!lost) return; tcp_sync_left_out(tp); /* Don't muck with the congestion window here. * Reason is that we do not increase amount of _data_ * in network, but units changed and effective * cwnd/ssthresh really reduced now. */ if (tp->ca_state != TCP_CA_Loss) { tp->high_seq = tp->snd_nxt; tp->snd_ssthresh = tcp_current_ssthresh(tp); tp->prior_ssthresh = 0; tp->undo_marker = 0; tcp_set_ca_state(tp, TCP_CA_Loss); } tcp_xmit_retransmit_queue(sk);}/* This retransmits one SKB. Policy decisions and retransmit queue * state updates are done by the caller. Returns non-zero if an * error occurred which prevented the send. */int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb){ struct tcp_opt *tp = tcp_sk(sk); unsigned int cur_mss = tcp_current_mss(sk, 0); int err; /* Do not sent more than we queued. 1/4 is reserved for possible * copying overhead: frgagmentation, tunneling, mangling etc. */ if (atomic_read(&sk->sk_wmem_alloc) > min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) return -EAGAIN; if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) BUG(); if (sk->sk_route_caps & NETIF_F_TSO) { sk->sk_route_caps &= ~NETIF_F_TSO; sk->sk_no_largesend = 1; tp->mss_cache = tp->mss_cache_std; } if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) return -ENOMEM; } /* If receiver has shrunk his window, and skb is out of * new window, do not retransmit it. The exception is the * case, when window is shrunk to zero. In this case * our retransmit serves as a zero window probe. */ if (!before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd) && TCP_SKB_CB(skb)->seq != tp->snd_una) return -EAGAIN; if (skb->len > cur_mss) { int old_factor = tcp_skb_pcount(skb); int new_factor; if (tcp_fragment(sk, skb, cur_mss)) return -ENOMEM; /* We'll try again later. */ /* New SKB created, account for it. */ new_factor = tcp_skb_pcount(skb); tcp_dec_pcount_explicit(&tp->packets_out, old_factor - new_factor); tcp_inc_pcount(&tp->packets_out, skb->next); } /* Collapse two adjacent packets if worthwhile and we can. */ if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && (skb->len < (cur_mss >> 1)) && (skb->next != sk->sk_send_head) && (skb->next != (struct sk_buff *)&sk->sk_write_queue) && (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) && (sysctl_tcp_retrans_collapse != 0)) tcp_retrans_try_collapse(sk, skb, cur_mss); if(tp->af_specific->rebuild_header(sk)) return -EHOSTUNREACH; /* Routing failure or similar. */ /* Some Solaris stacks overoptimize and ignore the FIN on a * retransmit when old data is attached. So strip it off * since it is cheap to do so and saves bytes on the network. */ if(skb->len > 0 && (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { if (!pskb_trim(skb, 0)) { TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; skb_shinfo(skb)->tso_segs = 1; skb_shinfo(skb)->tso_size = 0; skb->ip_summed = CHECKSUM_NONE; skb->csum = 0; } } /* Make a copy, if the first transmission SKB clone we made * is still in somebody's hands, else make a clone. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; err = tcp_transmit_skb(sk, (skb_cloned(skb) ? pskb_copy(skb, GFP_ATOMIC): skb_clone(skb, GFP_ATOMIC))); if (err == 0) { /* Update global TCP statistics. */ TCP_INC_STATS(TCP_MIB_RETRANSSEGS);#if FASTRETRANS_DEBUG > 0 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { if (net_ratelimit()) printk(KERN_DEBUG "retrans_out leaked.\n"); }#endif TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; tcp_inc_pcount(&tp->retrans_out, skb); /* Save stamp of the first retransmit. */ if (!tp->retrans_stamp) tp->retrans_stamp = TCP_SKB_CB(skb)->when; tp->undo_retrans++; /* snd_nxt is stored to detect loss of retransmitted segment, * see tcp_input.c tcp_sacktag_write_queue(). */ TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt; } return err;}/* This gets called after a retransmit timeout, and the initially * retransmitted data is acknowledged. It tries to continue * resending the rest of the retransmit queue, until either * we've sent it all or the congestion window limit is reached. * If doing SACK, the first ACK which comes back for a timeout * based retransmit packet might feed us FACK information again. * If so, we use it to avoid unnecessarily retransmissions. */void tcp_xmit_retransmit_queue(struct sock *sk){ struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb; int packet_cnt = tcp_get_pcount(&tp->lost_out); /* First pass: retransmit lost packets. */ if (packet_cnt) { sk_stream_for_retrans_queue(skb, sk) { __u8 sacked = TCP_SKB_CB(skb)->sacked; /* Assume this retransmit will generate * only one packet for congestion window * calculation purposes. This works because * tcp_retransmit_skb() will chop up the * packet to be MSS sized and all the * packet counting works out. */ if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) return; if (sacked&TCPCB_LOST) { if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { if (tcp_retransmit_skb(sk, skb)) return; if (tp->ca_state != TCP_CA_Loss) NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); else NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); if (skb == skb_peek(&sk->sk_write_queue)) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); } packet_cnt -= tcp_skb_pcount(skb); if (packet_cnt <= 0) break; } } } /* OK, demanded retransmission is finished. */ /* Forward retransmissions are possible only during Recovery. */ if (tp->ca_state != TCP_CA_Recovery) return; /* No forward retransmissions in Reno are possible. */ if (!tp->sack_ok) return; /* Yeah, we have to make difficult choice between forward transmission * and retransmission... Both ways have their merits... * * For now we do not retransmit anything, while we have some new * segments to send. */ if (tcp_may_send_now(sk, tp)) return; packet_cnt = 0; sk_stream_for_retrans_queue(skb, sk) { /* Similar to the retransmit loop above we * can pretend that the retransmitted SKB * we send out here will be composed of one * real MSS sized packet because tcp_retransmit_skb() * will fragment it if necessary. */ if (++packet_cnt > tcp_get_pcount(&tp->fackets_out)) break; if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) break; if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) continue; /* Ok, retransmit it. */ if (tcp_retransmit_skb(sk, skb)) break; if (skb == skb_peek(&sk->sk_write_queue)) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); }}/* Send a fin. The caller locks the socket for us. This cannot be * allowed to fail queueing a FIN frame under any circumstances. */void tcp_send_fin(struct sock *sk){ struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue); int mss_now; /* Optimization, tack on the FIN if we have a queue of * unsent frames. But be careful about outgoing SACKS * and IP options. */ mss_now = tcp_current_mss(sk, 1); if (sk->sk_send_head != NULL) { TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; TCP_SKB_CB(skb)->end_seq++; tp->write_seq++; } else { /* Socket is locked, keep trying until memory is available. */ for (;;) { skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL); if (skb) break; yield(); } /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, MAX_TCP_HEADER); skb->csum = 0; TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); TCP_SKB_CB(skb)->sacked = 0; skb_shinfo(skb)->tso_segs = 1; skb_shinfo(skb)->tso_size = 0; /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ TCP_SKB_CB(skb)->seq = tp->write_seq; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; tcp_queue_skb(sk, skb); } __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_OFF);}/* We get here when a process closes a file descriptor (either due to * an explicit close() or as a byproduct of exit()'ing) and there * was unread data in the receive queue. This behavior is recommended * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM */void tcp_send_active_reset(struct sock *sk, int priority){ struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb; /* NOTE: No TCP options attached and we never retransmit this. */ skb = alloc_skb(MAX_TCP_HEADER, priority); if (!skb) { NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); return; } /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, MAX_TCP_HEADER); skb->csum = 0; TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST); TCP_SKB_CB(skb)->sacked = 0; skb_shinfo(skb)->tso_segs = 1; skb_shinfo(skb)->tso_size = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -