tcp_output.c

来自「嵌入式系统设计与实例开发实验教材二源码多线程应用程序设计串行端口程序设计」· C语言代码 · 共 1,446 行 · 第 1/3 页
1,446 行
   tp->mss_clamp is mss negotiated at connection setup.   It is minumum of user_mss and mss received with SYN.   It also does not include TCP options.   tp->pmtu_cookie is last pmtu, seen by this function.   tp->mss_cache is current effective sending mss, including   all tcp options except for SACKs. It is evaluated,   taking into account current pmtu, but never exceeds   tp->mss_clamp.   NOTE1. rfc1122 clearly states that advertised MSS   DOES NOT include either tcp or ip options.   NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside   this function.			--ANK (980731) */int tcp_sync_mss(struct sock *sk, u32 pmtu){	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;	int mss_now;	/* Calculate base mss without TCP options:	   It is MMS_S - sizeof(tcphdr) of rfc1122	 */	mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);	/* Clamp it (mss_clamp does not include tcp options) */	if (mss_now > tp->mss_clamp)		mss_now = tp->mss_clamp;	/* Now subtract optional transport overhead */	mss_now -= tp->ext_header_len;	/* Then reserve room for full set of TCP options and 8 bytes of data */	if (mss_now < 48)		mss_now = 48;	/* Now subtract TCP options size, not including SACKs */	mss_now -= tp->tcp_header_len - sizeof(struct tcphdr);	/* Bound mss with half of window */	if (tp->max_window && mss_now > (tp->max_window>>1))		mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);	/* And store cached results */	tp->pmtu_cookie = pmtu;	tp->mss_cache = mss_now;	return mss_now;}/* This routine writes packets to the network.  It advances the * send_head.  This happens as incoming acks open up the remote * window for us. * * Returns 1, if no segments are in flight and we have queued segments, but * cannot send anything now because of SWS or another problem. */int tcp_write_xmit(struct sock *sk, int nonagle){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	unsigned int mss_now;	/* If we are closed, the bytes will have to remain here.	 * In time closedown will finish, we empty the write queue and all	 * will be happy.	 */	if(sk->state != TCP_CLOSE) {		struct sk_buff *skb;		int sent_pkts = 0;		/* Account for SACKS, we may need to fragment due to this.		 * It is just like the real MSS changing on us midstream.		 * We also handle things correctly when the user adds some		 * IP options mid-stream.  Silly to do, but cover it.		 */		mss_now = tcp_current_mss(sk); 		while((skb = tp->send_head) &&		      tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb) ? nonagle : 1)) {			if (skb->len > mss_now) {				if (tcp_fragment(sk, skb, mss_now))					break;			}			TCP_SKB_CB(skb)->when = tcp_time_stamp;			if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))				break;			/* Advance the send_head.  This one is sent out. */			update_send_head(sk, tp, skb);			tcp_minshall_update(tp, mss_now, skb);			sent_pkts = 1;		}		if (sent_pkts) {			tcp_cwnd_validate(sk, tp);			return 0;		}		return !tp->packets_out && tp->send_head;	}	return 0;}/* This function returns the amount that we can raise the * usable window based on the following constraints *   * 1. The window can never be shrunk once it is offered (RFC 793) * 2. We limit memory per socket * * RFC 1122: * "the suggested [SWS] avoidance algorithm for the receiver is to keep *  RECV.NEXT + RCV.WIN fixed until: *  RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)" * * i.e. don't raise the right edge of the window until you can raise * it at least MSS bytes. * * Unfortunately, the recommended algorithm breaks header prediction, * since header prediction assumes th->window stays fixed. * * Strictly speaking, keeping th->window fixed violates the receiver * side SWS prevention criteria. The problem is that under this rule * a stream of single byte packets will cause the right side of the * window to always advance by a single byte. *  * Of course, if the sender implements sender side SWS prevention * then this will not be a problem. *  * BSD seems to make the following compromise: *  *	If the free space is less than the 1/4 of the maximum *	space available and the free space is less than 1/2 mss, *	then set the window to 0. *	[ Actually, bsd uses MSS and 1/4 of maximal _window_ ] *	Otherwise, just prevent the window from shrinking *	and from being larger than the largest representable value. * * This prevents incremental opening of the window in the regime * where TCP is limited by the speed of the reader side taking * data out of the TCP receive queue. It does nothing about * those cases where the window is constrained on the sender side * because the pipeline is full. * * BSD also seems to "accidentally" limit itself to windows that are a * multiple of MSS, at least until the free space gets quite small. * This would appear to be a side effect of the mbuf implementation. * Combining these two algorithms results in the observed behavior * of having a fixed window size at almost all times. * * Below we obtain similar behavior by forcing the offered window to * a multiple of the mss when it is feasible to do so. * * Note, we don't "adjust" for TIMESTAMP or SACK option bytes. * Regular options like TIMESTAMP are taken into account. */u32 __tcp_select_window(struct sock *sk){	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;	/* MSS for the peer's data.  Previous verions used mss_clamp	 * here.  I don't know if the value based on our guesses	 * of peer's MSS is better for the performance.  It's more correct	 * but may be worse for the performance because of rcv_mss	 * fluctuations.  --SAW  1998/11/1	 */	int mss = tp->ack.rcv_mss;	int free_space = tcp_space(sk);	int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));	int window;	if (mss > full_space)		mss = full_space; 	if (free_space < full_space/2) {		tp->ack.quick = 0;		if (tcp_memory_pressure)			tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);		if (free_space < mss)			return 0;	}	if (free_space > tp->rcv_ssthresh)		free_space = tp->rcv_ssthresh;	/* Get the largest window that is a nice multiple of mss.	 * Window clamp already applied above.	 * If our current window offering is within 1 mss of the	 * free space we just keep it. This prevents the divide	 * and multiply from happening most of the time.	 * We also don't do any window rounding when the free space	 * is too small.	 */	window = tp->rcv_wnd;	if (window <= free_space - mss || window > free_space)		window = (free_space/mss)*mss;	return window;}/* Attempt to collapse two adjacent SKB's during retransmission. */static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now){	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;	struct sk_buff *next_skb = skb->next;	/* The first test we must make is that neither of these two	 * SKB's are still referenced by someone else.	 */	if(!skb_cloned(skb) && !skb_cloned(next_skb)) {		int skb_size = skb->len, next_skb_size = next_skb->len;		u16 flags = TCP_SKB_CB(skb)->flags;		/* Also punt if next skb has been SACK'd. */		if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)			return;		/* Next skb is out of window. */		if (after(TCP_SKB_CB(next_skb)->end_seq, tp->snd_una+tp->snd_wnd))			return;		/* Punt if not enough space exists in the first SKB for		 * the data in the second, or the total combined payload		 * would exceed the MSS.		 */		if ((next_skb_size > skb_tailroom(skb)) ||		    ((skb_size + next_skb_size) > mss_now))			return;		/* Ok.  We will be able to collapse the packet. */		__skb_unlink(next_skb, next_skb->list);		if (next_skb->ip_summed == CHECKSUM_HW)			skb->ip_summed = CHECKSUM_HW;		if (skb->ip_summed != CHECKSUM_HW) {			memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);			skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);		}		/* Update sequence range on original skb. */		TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;		/* Merge over control information. */		flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */		TCP_SKB_CB(skb)->flags = flags;		/* All done, get rid of second SKB and account for it so		 * packet counting does not break.		 */		TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);		if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS)			tp->retrans_out--;		if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) {			tp->lost_out--;			tp->left_out--;		}		/* Reno case is special. Sigh... */		if (!tp->sack_ok && tp->sacked_out) {			tp->sacked_out--;			tp->left_out--;		}		/* Not quite right: it can be > snd.fack, but		 * it is better to underestimate fackets.		 */		if (tp->fackets_out)			tp->fackets_out--;		tcp_free_skb(sk, next_skb);		tp->packets_out--;	}}/* Do a simple retransmit without using the backoff mechanisms in * tcp_timer. This is used for path mtu discovery.  * The socket is already locked here. */ void tcp_simple_retransmit(struct sock *sk){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	struct sk_buff *skb;	unsigned int mss = tcp_current_mss(sk);	int lost = 0;	for_retrans_queue(skb, sk, tp) {		if (skb->len > mss && 		    !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {			if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;				tp->retrans_out--;			}			if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) {				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;				tp->lost_out++;				lost = 1;			}		}	}	if (!lost)		return;	tcp_sync_left_out(tp); 	/* Don't muck with the congestion window here.	 * Reason is that we do not increase amount of _data_	 * in network, but units changed and effective	 * cwnd/ssthresh really reduced now.	 */	if (tp->ca_state != TCP_CA_Loss) {		tp->high_seq = tp->snd_nxt;		tp->snd_ssthresh = tcp_current_ssthresh(tp);		tp->prior_ssthresh = 0;		tp->undo_marker = 0;		tp->ca_state = TCP_CA_Loss;	}	tcp_xmit_retransmit_queue(sk);}/* This retransmits one SKB.  Policy decisions and retransmit queue * state updates are done by the caller.  Returns non-zero if an * error occurred which prevented the send. */int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	unsigned int cur_mss = tcp_current_mss(sk);	int err;	/* Do not sent more than we queued. 1/4 is reserved for possible	 * copying overhead: frgagmentation, tunneling, mangling etc.	 */	if (atomic_read(&sk->wmem_alloc) > min(sk->wmem_queued+(sk->wmem_queued>>2),sk->sndbuf))		return -EAGAIN;	/* If receiver has shrunk his window, and skb is out of	 * new window, do not retransmit it. The exception is the	 * case, when window is shrunk to zero. In this case	 * our retransmit serves as a zero window probe.	 */	if (!before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)	    && TCP_SKB_CB(skb)->seq != tp->snd_una)		return -EAGAIN;	if(skb->len > cur_mss) {		if(tcp_fragment(sk, skb, cur_mss))			return -ENOMEM; /* We'll try again later. */		/* New SKB created, account for it. */		tp->packets_out++;	}	/* Collapse two adjacent packets if worthwhile and we can. */	if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&	   (skb->len < (cur_mss >> 1)) &&	   (skb->next != tp->send_head) &&	   (skb->next != (struct sk_buff *)&sk->write_queue) &&	   (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) &&	   (sysctl_tcp_retrans_collapse != 0))		tcp_retrans_try_collapse(sk, skb, cur_mss);	if(tp->af_specific->rebuild_header(sk))		return -EHOSTUNREACH; /* Routing failure or similar. */	/* Some Solaris stacks overoptimize and ignore the FIN on a	 * retransmit when old data is attached.  So strip it off	 * since it is cheap to do so and saves bytes on the network.	 */	if(skb->len > 0 &&	   (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&	   tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {		if (!pskb_trim(skb, 0)) {			TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;			skb->ip_summed = CHECKSUM_NONE;			skb->csum = 0;		}	}	/* Make a copy, if the first transmission SKB clone we made	 * is still in somebody's hands, else make a clone.	 */	TCP_SKB_CB(skb)->when = tcp_time_stamp;	err = tcp_transmit_skb(sk, (skb_cloned(skb) ?				    pskb_copy(skb, GFP_ATOMIC):				    skb_clone(skb, GFP_ATOMIC)));	if (err == 0) {		/* Update global TCP statistics. */		TCP_INC_STATS(TcpRetransSegs);#if FASTRETRANS_DEBUG > 0		if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {			if (net_ratelimit())				printk(KERN_DEBUG "retrans_out leaked.\n");		}#endif		TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;		tp->retrans_out++;		/* Save stamp of the first retransmit. */		if (!tp->retrans_stamp)			tp->retrans_stamp = TCP_SKB_CB(skb)->when;		tp->undo_retrans++;		/* snd_nxt is stored to detect loss of retransmitted segment,		 * see tcp_input.c tcp_sacktag_write_queue().		 */		TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;	}	return err;}/* This gets called after a retransmit timeout, and the initially * retransmitted data is acknowledged.  It tries to continue * resending the rest of the retransmit queue, until either * we've sent it all or the congestion window limit is reached. * If doing SACK, the first ACK which comes back for a timeout * based retransmit packet might feed us FACK information again. * If so, we use it to avoid unnecessarily retransmissions. */void tcp_xmit_retransmit_queue(struct sock *sk){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	struct sk_buff *skb;	int packet_cnt = tp->lost_out;	/* First pass: retransmit lost packets. */	if (packet_cnt) {		for_retrans_queue(skb, sk, tp) {			__u8 sacked = TCP_SKB_CB(skb)->sacked;			if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)				return;			if (sacked&TCPCB_LOST) {				if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {					if (tcp_retransmit_skb(sk, skb))						return;					if (tp->ca_state != TCP_CA_Loss)						NET_INC_STATS_BH(TCPFastRetrans);					else						NET_INC_STATS_BH(TCPSlowStartRetrans);					if (skb == skb_peek(&sk->write_queue))						tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);				}				if (--packet_cnt <= 0)					break;			}		}	}	/* OK, demanded retransmission is finished. */	/* Forward retransmissions are possible only during Recovery. */	if (tp->ca_state != TCP_CA_Recovery)		return;	/* No forward retransmissions in Reno are possible. */	if (!tp->sack_ok)		return;	/* Yeah, we have to make difficult choice between forward transmission	 * and retransmission... Both ways have their merits...	 *	 * For now we do not retrnamsit anything, while we have some new	 * segments to send.	 */	if (tcp_may_send_now(sk, tp))		return;	packet_cnt = 0;
tcp_output.c - 源码说明

本页面展示了「嵌入式系统设计与实例开发实验教材二源码多线程应用程序设计串行端口程序设计 AD接口实验 CAN总线通信实验 GPS通信实验 Linux内核移植与编译实验 IC卡读写实验 SD驱动使」中的 tcp_output.c 源码文件，采用 C语言编程语言编写，共 1,446 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与实验相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?