📄 tcp_output.c

📁 GNU Hurd 源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
 * * Strictly speaking, keeping th->window fixed violates the receiver * side SWS prevention criteria. The problem is that under this rule * a stream of single byte packets will cause the right side of the * window to always advance by a single byte. *  * Of course, if the sender implements sender side SWS prevention * then this will not be a problem. *  * BSD seems to make the following compromise: *  *	If the free space is less than the 1/4 of the maximum *	space available and the free space is less than 1/2 mss, *	then set the window to 0. *	Otherwise, just prevent the window from shrinking *	and from being larger than the largest representable value. * * This prevents incremental opening of the window in the regime * where TCP is limited by the speed of the reader side taking * data out of the TCP receive queue. It does nothing about * those cases where the window is constrained on the sender side * because the pipeline is full. * * BSD also seems to "accidentally" limit itself to windows that are a * multiple of MSS, at least until the free space gets quite small. * This would appear to be a side effect of the mbuf implementation. * Combining these two algorithms results in the observed behavior * of having a fixed window size at almost all times. * * Below we obtain similar behavior by forcing the offered window to * a multiple of the mss when it is feasible to do so. * * Note, we don't "adjust" for TIMESTAMP or SACK option bytes. */u32 __tcp_select_window(struct sock *sk){	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;	unsigned int mss = tp->mss_cache;	int free_space;	u32 window;	/* Sometimes free_space can be < 0. */	free_space = (sk->rcvbuf - atomic_read(&sk->rmem_alloc)) / 2;	if (tp->window_clamp) {		if (free_space > ((int) tp->window_clamp))			free_space = tp->window_clamp;		mss = min(tp->window_clamp, mss);	} else {		printk("tcp_select_window: tp->window_clamp == 0.\n");	}	if (mss < 1) {		mss = 1;		printk("tcp_select_window: sk->mss fell to 0.\n");	}		if ((free_space < (sk->rcvbuf/4)) && (free_space < ((int) (mss/2)))) {		window = 0;		tp->pred_flags = 0; 	} else {		/* Get the largest window that is a nice multiple of mss.		 * Window clamp already applied above.		 * If our current window offering is within 1 mss of the		 * free space we just keep it. This prevents the divide		 * and multiply from happening most of the time.		 * We also don't do any window rounding when the free space		 * is too small.		 */		window = tp->rcv_wnd;		if ((((int) window) <= (free_space - ((int) mss))) ||				(((int) window) > free_space))			window = (((unsigned int) free_space)/mss)*mss;	}	return window;}/* Attempt to collapse two adjacent SKB's during retransmission. */static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now){	struct sk_buff *next_skb = skb->next;	/* The first test we must make is that neither of these two	 * SKB's are still referenced by someone else.	 */	if(!skb_cloned(skb) && !skb_cloned(next_skb)) {		int skb_size = skb->len, next_skb_size = next_skb->len;		u16 flags = TCP_SKB_CB(skb)->flags;		/* Punt if the first SKB has URG set. */		if(flags & TCPCB_FLAG_URG)			return;			/* Also punt if next skb has been SACK'd. */		if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)			return;		/* Punt if not enough space exists in the first SKB for		 * the data in the second, or the total combined payload		 * would exceed the MSS.		 */		if ((next_skb_size > skb_tailroom(skb)) ||		    ((skb_size + next_skb_size) > mss_now))			return;		/* Ok.  We will be able to collapse the packet. */		__skb_unlink(next_skb, next_skb->list);		if(skb->len % 4) {			/* Must copy and rechecksum all data. */			memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);			skb->csum = csum_partial(skb->data, skb->len, 0);		} else {			/* Optimize, actually we could also combine next_skb->csum			 * to skb->csum using a single add w/carry operation too.			 */			skb->csum = csum_partial_copy(next_skb->data,						      skb_put(skb, next_skb_size),						      next_skb_size, skb->csum);		}			/* Update sequence range on original skb. */		TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;		/* Merge over control information. */		flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */		if(flags & TCPCB_FLAG_URG) {			u16 urgptr = TCP_SKB_CB(next_skb)->urg_ptr;			TCP_SKB_CB(skb)->urg_ptr = urgptr + skb_size;		}		TCP_SKB_CB(skb)->flags = flags;		/* All done, get rid of second SKB and account for it so		 * packet counting does not break.		 */		kfree_skb(next_skb);		sk->tp_pinfo.af_tcp.packets_out--;	}}/* Do a simple retransmit without using the backoff mechanisms in * tcp_timer. This is used for path mtu discovery.  * The socket is already locked here. */ void tcp_simple_retransmit(struct sock *sk){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	struct sk_buff *skb, *old_next_skb;	unsigned int mss = tcp_current_mss(sk); 	/* Don't muck with the congestion window here. */ 	tp->dup_acks = 0; 	tp->high_seq = tp->snd_nxt; 	tp->retrans_head = NULL; 	/* Input control flow will see that this was retransmitted	 * and not use it for RTT calculation in the absence of	 * the timestamp option.	 */	for (old_next_skb = skb = skb_peek(&sk->write_queue);	     ((skb != tp->send_head) &&	      (skb != (struct sk_buff *)&sk->write_queue));	     skb = skb->next) {		int resend_skb = 0;		/* Our goal is to push out the packets which we		 * sent already, but are being chopped up now to		 * account for the PMTU information we have.		 *		 * As we resend the queue, packets are fragmented		 * into two pieces, and when we try to send the		 * second piece it may be collapsed together with		 * a subsequent packet, and so on.  -DaveM		 */		if (old_next_skb != skb || skb->len > mss)			resend_skb = 1;		old_next_skb = skb->next;		if (resend_skb != 0)			tcp_retransmit_skb(sk, skb);	}}static __inline__ void update_retrans_head(struct sock *sk){	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;		tp->retrans_head = tp->retrans_head->next;	if((tp->retrans_head == tp->send_head) ||	   (tp->retrans_head == (struct sk_buff *) &sk->write_queue)) {		tp->retrans_head = NULL;		tp->rexmt_done = 1;	}}/* This retransmits one SKB.  Policy decisions and retransmit queue * state updates are done by the caller.  Returns non-zero if an * error occurred which prevented the send. */int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	unsigned int cur_mss = tcp_current_mss(sk);	if(skb->len > cur_mss) {		if(tcp_fragment(sk, skb, cur_mss))			return 1; /* We'll try again later. */		/* New SKB created, account for it. */		tp->packets_out++;	}	/* Collapse two adjacent packets if worthwhile and we can. */	if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&	   (skb->len < (cur_mss >> 1)) &&	   (skb->next != tp->send_head) &&	   (skb->next != (struct sk_buff *)&sk->write_queue) &&	   (sysctl_tcp_retrans_collapse != 0))		tcp_retrans_try_collapse(sk, skb, cur_mss);	if(tp->af_specific->rebuild_header(sk))		return 1; /* Routing failure or similar. */	/* Some Solaris stacks overoptimize and ignore the FIN on a	 * retransmit when old data is attached.  So strip it off	 * since it is cheap to do so and saves bytes on the network.	 */	if(skb->len > 0 &&	   (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&	   tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {		TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;		skb_trim(skb, 0);		skb->csum = 0;	}	/* Ok, we're gonna send it out, update state. */	TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_RETRANS;	tp->retrans_out++;	/* Make a copy, if the first transmission SKB clone we made	 * is still in somebody's hands, else make a clone.	 */	TCP_SKB_CB(skb)->when = tcp_time_stamp;	if(skb_cloned(skb))		skb = skb_copy(skb, GFP_ATOMIC);	else		skb = skb_clone(skb, GFP_ATOMIC);	tcp_transmit_skb(sk, skb);	/* Update global TCP statistics and return success. */	sk->prot->retransmits++;	tcp_statistics.TcpRetransSegs++;	return 0;}/* This gets called after a retransmit timeout, and the initially * retransmitted data is acknowledged.  It tries to continue * resending the rest of the retransmit queue, until either * we've sent it all or the congestion window limit is reached. * If doing SACK, the first ACK which comes back for a timeout * based retransmit packet might feed us FACK information again. * If so, we use it to avoid unnecessarily retransmissions. */void tcp_xmit_retransmit_queue(struct sock *sk){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	struct sk_buff *skb;	if (tp->retrans_head == NULL &&	    tp->rexmt_done == 0)		tp->retrans_head = skb_peek(&sk->write_queue);	if (tp->retrans_head == tp->send_head)		tp->retrans_head = NULL;	/* Each time, advance the retrans_head if we got	 * a packet out or we skipped one because it was	 * SACK'd.  -DaveM	 */	while ((skb = tp->retrans_head) != NULL) {		/* If it has been ack'd by a SACK block, we don't		 * retransmit it.		 */		if(!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {			/* Send it out, punt if error occurred. */			if(tcp_retransmit_skb(sk, skb))				break;			update_retrans_head(sk);					/* Stop retransmitting if we've hit the congestion			 * window limit.			 */			if (tp->retrans_out >= tp->snd_cwnd)				break;		} else {			update_retrans_head(sk);		}	}}/* Using FACK information, retransmit all missing frames at the receiver * up to the forward most SACK'd packet (tp->fackets_out) if the packet * has not been retransmitted already. */void tcp_fack_retransmit(struct sock *sk){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	struct sk_buff *skb = skb_peek(&sk->write_queue);	int packet_cnt = 0;	while((skb != NULL) &&	      (skb != tp->send_head) &&	      (skb != (struct sk_buff *)&sk->write_queue)) {		__u8 sacked = TCP_SKB_CB(skb)->sacked;		if(sacked & (TCPCB_SACKED_ACKED | TCPCB_SACKED_RETRANS))			goto next_packet;		/* Ok, retransmit it. */		if(tcp_retransmit_skb(sk, skb))			break;		if(tcp_packets_in_flight(tp) >= tp->snd_cwnd)			break;next_packet:		packet_cnt++;		if(packet_cnt >= tp->fackets_out)			break;		skb = skb->next;	}}/* Send a fin.  The caller locks the socket for us.  This cannot be * allowed to fail queueing a FIN frame under any circumstances. */void tcp_send_fin(struct sock *sk){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);		struct sk_buff *skb = skb_peek_tail(&sk->write_queue);	unsigned int mss_now;		/* Optimization, tack on the FIN if we have a queue of	 * unsent frames.  But be careful about outgoing SACKS	 * and IP options.	 */	mss_now = tcp_current_mss(sk); 	if((tp->send_head != NULL) && (skb->len < mss_now)) {		/* tcp_write_xmit() takes care of the rest. */		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;		TCP_SKB_CB(skb)->end_seq++;		tp->write_seq++;		/* Special case to avoid Nagle bogosity.  If this		 * segment is the last segment, and it was queued		 * due to Nagle/SWS-avoidance, send it out now.		 */		if(tp->send_head == skb &&		   !sk->nonagle &&		   skb->len < (tp->mss_cache >> 1) &&		   tp->packets_out &&		   !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_URG)) {			update_send_head(sk);			TCP_SKB_CB(skb)->when = tcp_time_stamp;			tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;			tp->packets_out++;			tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));			if(!tcp_timer_is_set(sk, TIME_RETRANS))				tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);		}	} else {		/* Socket is locked, keep trying until memory is available. */		do {			skb = sock_wmalloc(sk,					   (MAX_HEADER +					    sk->prot->max_header),					   1, GFP_KERNEL);		} while (skb == NULL);		/* Reserve space for headers and prepare control bits. */		skb_reserve(skb, MAX_HEADER + sk->prot->max_header);		skb->csum = 0;
💿 文件大小 2946 K
👤 上传用户 xiaomaolv1017
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#Hurd #GNU #源代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -