📄 tcp_output.c
字号:
* * Strictly speaking, keeping th->window fixed violates the receiver * side SWS prevention criteria. The problem is that under this rule * a stream of single byte packets will cause the right side of the * window to always advance by a single byte. * * Of course, if the sender implements sender side SWS prevention * then this will not be a problem. * * BSD seems to make the following compromise: * * If the free space is less than the 1/4 of the maximum * space available and the free space is less than 1/2 mss, * then set the window to 0. * Otherwise, just prevent the window from shrinking * and from being larger than the largest representable value. * * This prevents incremental opening of the window in the regime * where TCP is limited by the speed of the reader side taking * data out of the TCP receive queue. It does nothing about * those cases where the window is constrained on the sender side * because the pipeline is full. * * BSD also seems to "accidentally" limit itself to windows that are a * multiple of MSS, at least until the free space gets quite small. * This would appear to be a side effect of the mbuf implementation. * Combining these two algorithms results in the observed behavior * of having a fixed window size at almost all times. * * Below we obtain similar behavior by forcing the offered window to * a multiple of the mss when it is feasible to do so. * * Note, we don't "adjust" for TIMESTAMP or SACK option bytes. */u32 __tcp_select_window(struct sock *sk){ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; unsigned int mss = tp->mss_cache; int free_space; u32 window; /* Sometimes free_space can be < 0. */ free_space = (sk->rcvbuf - atomic_read(&sk->rmem_alloc)) / 2; if (tp->window_clamp) { if (free_space > ((int) tp->window_clamp)) free_space = tp->window_clamp; mss = min(tp->window_clamp, mss); } else { printk("tcp_select_window: tp->window_clamp == 0.\n"); } if (mss < 1) { mss = 1; printk("tcp_select_window: sk->mss fell to 0.\n"); } if ((free_space < (sk->rcvbuf/4)) && (free_space < ((int) (mss/2)))) { window = 0; tp->pred_flags = 0; } else { /* Get the largest window that is a nice multiple of mss. * Window clamp already applied above. * If our current window offering is within 1 mss of the * free space we just keep it. This prevents the divide * and multiply from happening most of the time. * We also don't do any window rounding when the free space * is too small. */ window = tp->rcv_wnd; if ((((int) window) <= (free_space - ((int) mss))) || (((int) window) > free_space)) window = (((unsigned int) free_space)/mss)*mss; } return window;}/* Attempt to collapse two adjacent SKB's during retransmission. */static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now){ struct sk_buff *next_skb = skb->next; /* The first test we must make is that neither of these two * SKB's are still referenced by someone else. */ if(!skb_cloned(skb) && !skb_cloned(next_skb)) { int skb_size = skb->len, next_skb_size = next_skb->len; u16 flags = TCP_SKB_CB(skb)->flags; /* Punt if the first SKB has URG set. */ if(flags & TCPCB_FLAG_URG) return; /* Also punt if next skb has been SACK'd. */ if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) return; /* Punt if not enough space exists in the first SKB for * the data in the second, or the total combined payload * would exceed the MSS. */ if ((next_skb_size > skb_tailroom(skb)) || ((skb_size + next_skb_size) > mss_now)) return; /* Ok. We will be able to collapse the packet. */ __skb_unlink(next_skb, next_skb->list); if(skb->len % 4) { /* Must copy and rechecksum all data. */ memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); skb->csum = csum_partial(skb->data, skb->len, 0); } else { /* Optimize, actually we could also combine next_skb->csum * to skb->csum using a single add w/carry operation too. */ skb->csum = csum_partial_copy(next_skb->data, skb_put(skb, next_skb_size), next_skb_size, skb->csum); } /* Update sequence range on original skb. */ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; /* Merge over control information. */ flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */ if(flags & TCPCB_FLAG_URG) { u16 urgptr = TCP_SKB_CB(next_skb)->urg_ptr; TCP_SKB_CB(skb)->urg_ptr = urgptr + skb_size; } TCP_SKB_CB(skb)->flags = flags; /* All done, get rid of second SKB and account for it so * packet counting does not break. */ kfree_skb(next_skb); sk->tp_pinfo.af_tcp.packets_out--; }}/* Do a simple retransmit without using the backoff mechanisms in * tcp_timer. This is used for path mtu discovery. * The socket is already locked here. */ void tcp_simple_retransmit(struct sock *sk){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sk_buff *skb, *old_next_skb; unsigned int mss = tcp_current_mss(sk); /* Don't muck with the congestion window here. */ tp->dup_acks = 0; tp->high_seq = tp->snd_nxt; tp->retrans_head = NULL; /* Input control flow will see that this was retransmitted * and not use it for RTT calculation in the absence of * the timestamp option. */ for (old_next_skb = skb = skb_peek(&sk->write_queue); ((skb != tp->send_head) && (skb != (struct sk_buff *)&sk->write_queue)); skb = skb->next) { int resend_skb = 0; /* Our goal is to push out the packets which we * sent already, but are being chopped up now to * account for the PMTU information we have. * * As we resend the queue, packets are fragmented * into two pieces, and when we try to send the * second piece it may be collapsed together with * a subsequent packet, and so on. -DaveM */ if (old_next_skb != skb || skb->len > mss) resend_skb = 1; old_next_skb = skb->next; if (resend_skb != 0) tcp_retransmit_skb(sk, skb); }}static __inline__ void update_retrans_head(struct sock *sk){ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; tp->retrans_head = tp->retrans_head->next; if((tp->retrans_head == tp->send_head) || (tp->retrans_head == (struct sk_buff *) &sk->write_queue)) { tp->retrans_head = NULL; tp->rexmt_done = 1; }}/* This retransmits one SKB. Policy decisions and retransmit queue * state updates are done by the caller. Returns non-zero if an * error occurred which prevented the send. */int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); unsigned int cur_mss = tcp_current_mss(sk); if(skb->len > cur_mss) { if(tcp_fragment(sk, skb, cur_mss)) return 1; /* We'll try again later. */ /* New SKB created, account for it. */ tp->packets_out++; } /* Collapse two adjacent packets if worthwhile and we can. */ if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && (skb->len < (cur_mss >> 1)) && (skb->next != tp->send_head) && (skb->next != (struct sk_buff *)&sk->write_queue) && (sysctl_tcp_retrans_collapse != 0)) tcp_retrans_try_collapse(sk, skb, cur_mss); if(tp->af_specific->rebuild_header(sk)) return 1; /* Routing failure or similar. */ /* Some Solaris stacks overoptimize and ignore the FIN on a * retransmit when old data is attached. So strip it off * since it is cheap to do so and saves bytes on the network. */ if(skb->len > 0 && (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; skb_trim(skb, 0); skb->csum = 0; } /* Ok, we're gonna send it out, update state. */ TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_RETRANS; tp->retrans_out++; /* Make a copy, if the first transmission SKB clone we made * is still in somebody's hands, else make a clone. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; if(skb_cloned(skb)) skb = skb_copy(skb, GFP_ATOMIC); else skb = skb_clone(skb, GFP_ATOMIC); tcp_transmit_skb(sk, skb); /* Update global TCP statistics and return success. */ sk->prot->retransmits++; tcp_statistics.TcpRetransSegs++; return 0;}/* This gets called after a retransmit timeout, and the initially * retransmitted data is acknowledged. It tries to continue * resending the rest of the retransmit queue, until either * we've sent it all or the congestion window limit is reached. * If doing SACK, the first ACK which comes back for a timeout * based retransmit packet might feed us FACK information again. * If so, we use it to avoid unnecessarily retransmissions. */void tcp_xmit_retransmit_queue(struct sock *sk){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sk_buff *skb; if (tp->retrans_head == NULL && tp->rexmt_done == 0) tp->retrans_head = skb_peek(&sk->write_queue); if (tp->retrans_head == tp->send_head) tp->retrans_head = NULL; /* Each time, advance the retrans_head if we got * a packet out or we skipped one because it was * SACK'd. -DaveM */ while ((skb = tp->retrans_head) != NULL) { /* If it has been ack'd by a SACK block, we don't * retransmit it. */ if(!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { /* Send it out, punt if error occurred. */ if(tcp_retransmit_skb(sk, skb)) break; update_retrans_head(sk); /* Stop retransmitting if we've hit the congestion * window limit. */ if (tp->retrans_out >= tp->snd_cwnd) break; } else { update_retrans_head(sk); } }}/* Using FACK information, retransmit all missing frames at the receiver * up to the forward most SACK'd packet (tp->fackets_out) if the packet * has not been retransmitted already. */void tcp_fack_retransmit(struct sock *sk){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sk_buff *skb = skb_peek(&sk->write_queue); int packet_cnt = 0; while((skb != NULL) && (skb != tp->send_head) && (skb != (struct sk_buff *)&sk->write_queue)) { __u8 sacked = TCP_SKB_CB(skb)->sacked; if(sacked & (TCPCB_SACKED_ACKED | TCPCB_SACKED_RETRANS)) goto next_packet; /* Ok, retransmit it. */ if(tcp_retransmit_skb(sk, skb)) break; if(tcp_packets_in_flight(tp) >= tp->snd_cwnd) break;next_packet: packet_cnt++; if(packet_cnt >= tp->fackets_out) break; skb = skb->next; }}/* Send a fin. The caller locks the socket for us. This cannot be * allowed to fail queueing a FIN frame under any circumstances. */void tcp_send_fin(struct sock *sk){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sk_buff *skb = skb_peek_tail(&sk->write_queue); unsigned int mss_now; /* Optimization, tack on the FIN if we have a queue of * unsent frames. But be careful about outgoing SACKS * and IP options. */ mss_now = tcp_current_mss(sk); if((tp->send_head != NULL) && (skb->len < mss_now)) { /* tcp_write_xmit() takes care of the rest. */ TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; TCP_SKB_CB(skb)->end_seq++; tp->write_seq++; /* Special case to avoid Nagle bogosity. If this * segment is the last segment, and it was queued * due to Nagle/SWS-avoidance, send it out now. */ if(tp->send_head == skb && !sk->nonagle && skb->len < (tp->mss_cache >> 1) && tp->packets_out && !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_URG)) { update_send_head(sk); TCP_SKB_CB(skb)->when = tcp_time_stamp; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; tp->packets_out++; tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)); if(!tcp_timer_is_set(sk, TIME_RETRANS)) tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto); } } else { /* Socket is locked, keep trying until memory is available. */ do { skb = sock_wmalloc(sk, (MAX_HEADER + sk->prot->max_header), 1, GFP_KERNEL); } while (skb == NULL); /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, MAX_HEADER + sk->prot->max_header); skb->csum = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -