⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tcp_input.c

📁 Linux内核源代码 为压缩文件 是<<Linux内核>>一书中的源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
/* * INET		An implementation of the TCP/IP protocol suite for the LINUX *		operating system.  INET is implemented using the  BSD Socket *		interface as the means of communication with the user level. * *		Implementation of the Transmission Control Protocol(TCP). * * Version:	$Id: tcp_input.c,v 1.205 2000/12/13 18:31:48 davem Exp $ * * Authors:	Ross Biro, <bir7@leland.Stanford.Edu> *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> *		Mark Evans, <evansmp@uhura.aston.ac.uk> *		Corey Minyard <wf-rch!minyard@relay.EU.net> *		Florian La Roche, <flla@stud.uni-sb.de> *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu> *		Linus Torvalds, <torvalds@cs.helsinki.fi> *		Alan Cox, <gw4pts@gw4pts.ampr.org> *		Matthew Dillon, <dillon@apollo.west.oic.com> *		Arnt Gulbrandsen, <agulbra@nvg.unit.no> *		Jorge Cwik, <jorge@laser.satlink.net> *//* * Changes: *		Pedro Roque	:	Fast Retransmit/Recovery. *					Two receive queues. *					Retransmit queue handled by TCP. *					Better retransmit timer handling. *					New congestion avoidance. *					Header prediction. *					Variable renaming. * *		Eric		:	Fast Retransmit. *		Randy Scott	:	MSS option defines. *		Eric Schenk	:	Fixes to slow start algorithm. *		Eric Schenk	:	Yet another double ACK bug. *		Eric Schenk	:	Delayed ACK bug fixes. *		Eric Schenk	:	Floyd style fast retrans war avoidance. *		David S. Miller	:	Don't allow zero congestion window. *		Eric Schenk	:	Fix retransmitter so that it sends *					next packet on ack of previous packet. *		Andi Kleen	:	Moved open_request checking here *					and process RSTs for open_requests. *		Andi Kleen	:	Better prune_queue, and other fixes. *		Andrey Savochkin:	Fix RTT measurements in the presnce of *					timestamps. *		Andrey Savochkin:	Check sequence numbers correctly when *					removing SACKs due to in sequence incoming *					data segments. *		Andi Kleen:		Make sure we never ack data there is not *					enough room for. Also make this condition *					a fatal error if it might still happen. *		Andi Kleen:		Add tcp_measure_rcv_mss to make  *					connections with MSS<min(MTU,ann. MSS) *					work without delayed acks.  *		Andi Kleen:		Process packets with PSH set in the *					fast path. *		J Hadi Salim:		ECN support *	 	Andrei Gurtov, *		Pasi Sarolahti, *		Panu Kuhlberg:		Experimental audit of TCP (re)transmission *					engine. Lots of bugs are found. */#include <linux/config.h>#include <linux/mm.h>#include <linux/sysctl.h>#include <net/tcp.h>#include <net/inet_common.h>#include <linux/ipsec.h>/* These are on by default so the code paths get tested. * For the final 2.2 this may be undone at our discretion. -DaveM */int sysctl_tcp_timestamps = 1;int sysctl_tcp_window_scaling = 1;int sysctl_tcp_sack = 1;int sysctl_tcp_fack = 1;int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;#ifdef CONFIG_INET_ECNint sysctl_tcp_ecn = 1;#elseint sysctl_tcp_ecn = 0;#endifint sysctl_tcp_dsack = 1;int sysctl_tcp_app_win = 31;int sysctl_tcp_adv_win_scale = 2;int sysctl_tcp_stdurg = 0;int sysctl_tcp_rfc1337 = 0;int sysctl_tcp_max_orphans = NR_FILE;#define FLAG_DATA		0x01 /* Incoming frame contained data.		*/#define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/#define FLAG_DATA_ACKED		0x04 /* This ACK acknowledged new data.		*/#define FLAG_RETRANS_DATA_ACKED	0x08 /* "" "" some of which was retransmitted.	*/#define FLAG_SYN_ACKED		0x10 /* This ACK acknowledged SYN.		*/#define FLAG_DATA_SACKED	0x20 /* New SACK.				*/#define FLAG_ECE		0x40 /* ECE in this ACK				*/#define FLAG_DATA_LOST		0x80 /* SACK detected data lossage.		*/#define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/#define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)#define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)#define FLAG_CA_ALERT		(FLAG_DATA_SACKED|FLAG_ECE)#define FLAG_FORWARD_PROGRESS	(FLAG_ACKED|FLAG_DATA_SACKED)#define IsReno(tp) ((tp)->sack_ok == 0)#define IsFack(tp) ((tp)->sack_ok & 2)#define IsDSack(tp) ((tp)->sack_ok & 4)#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)/* Adapt the MSS value used to make delayed ack decision to the  * real world. */ static __inline__ void tcp_measure_rcv_mss(struct tcp_opt *tp, struct sk_buff *skb){	unsigned int len, lss;	lss = tp->ack.last_seg_size; 	tp->ack.last_seg_size = 0; 	/* skb->len may jitter because of SACKs, even if peer	 * sends good full-sized frames.	 */	len = skb->len;	if (len >= tp->ack.rcv_mss) {		tp->ack.rcv_mss = len;		/* Dubious? Rather, it is final cut. 8) */		if (tcp_flag_word(skb->h.th)&TCP_REMNANT)			tp->ack.pending |= TCP_ACK_PUSHED;	} else {		/* Otherwise, we make more careful check taking into account,		 * that SACKs block is variable.		 *		 * "len" is invariant segment length, including TCP header.		 */		len = skb->tail - skb->h.raw;		if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) ||		    /* If PSH is not set, packet should be		     * full sized, provided peer TCP is not badly broken.		     * This observation (if it is correct 8)) allows		     * to handle super-low mtu links fairly.		     */		    (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&		     !(tcp_flag_word(skb->h.th)&TCP_REMNANT))) {			/* Subtract also invariant (if peer is RFC compliant),			 * tcp header plus fixed timestamp option length.			 * Resulting "len" is MSS free of SACK jitter.			 */			len -= tp->tcp_header_len;			tp->ack.last_seg_size = len;			if (len == lss) {				tp->ack.rcv_mss = len;				return;			}		}		tp->ack.pending |= TCP_ACK_PUSHED;	}}static void tcp_incr_quickack(struct tcp_opt *tp){	unsigned quickacks = tp->rcv_wnd/(2*tp->ack.rcv_mss);	if (quickacks==0)		quickacks=2;	if (quickacks > tp->ack.quick)		tp->ack.quick = min(quickacks, TCP_MAX_QUICKACKS);}void tcp_enter_quickack_mode(struct tcp_opt *tp){	tcp_incr_quickack(tp);	tp->ack.pingpong = 0;	tp->ack.ato = TCP_ATO_MIN;}/* Send ACKs quickly, if "quick" count is not exhausted * and the session is not interactive. */static __inline__ int tcp_in_quickack_mode(struct tcp_opt *tp){	return (tp->ack.quick && !tp->ack.pingpong);}/* Buffer size and advertised window tuning. * * 1. Tuning sk->sndbuf, when connection enters established state. */static void tcp_fixup_sndbuf(struct sock *sk){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	int sndmem = tp->mss_clamp+MAX_TCP_HEADER+16+sizeof(struct sk_buff);	if (sk->sndbuf < 3*sndmem)		sk->sndbuf = min(3*sndmem, sysctl_tcp_wmem[2]);}/* 2. Tuning advertised window (window_clamp, rcv_ssthresh) * * All tcp_full_space() is split to two parts: "network" buffer, allocated * forward and advertised in receiver window (tp->rcv_wnd) and * "application buffer", required to isolate scheduling/application * latencies from network. * window_clamp is maximal advertised window. It can be less than * tcp_full_space(), in this case tcp_full_space() - window_clamp * is reserved for "application" buffer. The less window_clamp is * the smoother our behaviour from viewpoint of network, but the lower * throughput and the higher sensitivity of the connection to losses. 8) * * rcv_ssthresh is more strict window_clamp used at "slow start" * phase to predict further behaviour of this connection. * It is used for two goals: * - to enforce header prediction at sender, even when application *   requires some significant "application buffer". It is check #1. * - to prevent pruning of receive queue because of misprediction *   of receiver window. Check #2. * * The scheme does not work when sender sends good segments opening * window and then starts to feed us spagetti. But it should work * in common situations. Otherwise, we have to rely on queue collapsing. *//* Slow part of check#2. */static int__tcp_grow_window(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb){	/* Optimize this! */	int truesize = tcp_win_from_space(skb->truesize)/2;	int window = tcp_full_space(sk)/2;	while (tp->rcv_ssthresh <= window) {		if (truesize <= skb->len)			return 2*tp->ack.rcv_mss;		truesize >>= 1;		window >>= 1;	}	return 0;}static __inline__ voidtcp_grow_window(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb){	/* Check #1 */	if (tp->rcv_ssthresh < tp->window_clamp &&	    (int)tp->rcv_ssthresh < tcp_space(sk) &&	    !tcp_memory_pressure) {		int incr;		/* Check #2. Increase window, if skb with such overhead		 * will fit to rcvbuf in future.		 */		if (tcp_win_from_space(skb->truesize) <= skb->len)			incr = 2*tp->advmss;		else			incr = __tcp_grow_window(sk, tp, skb);		if (incr) {			tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);			tp->ack.quick |= 1;		}	}}/* 3. Tuning rcvbuf, when connection enters established state. */static void tcp_fixup_rcvbuf(struct sock *sk){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	int rcvmem = tp->advmss+MAX_TCP_HEADER+16+sizeof(struct sk_buff);	/* Try to select rcvbuf so that 4 mss-sized segments	 * will fit to window and correspoding skbs will fit to our rcvbuf.	 * (was 3; 4 is minimum to allow fast retransmit to work.)	 */	while (tcp_win_from_space(rcvmem) < tp->advmss)		rcvmem += 128;	if (sk->rcvbuf < 4*rcvmem)		sk->rcvbuf = min(4*rcvmem, sysctl_tcp_rmem[2]);}/* 4. Try to fixup all. It is made iimediately after connection enters *    established state. */static void tcp_init_buffer_space(struct sock *sk){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	int maxwin;	if (!(sk->userlocks&SOCK_RCVBUF_LOCK))		tcp_fixup_rcvbuf(sk);	if (!(sk->userlocks&SOCK_SNDBUF_LOCK))		tcp_fixup_sndbuf(sk);	maxwin = tcp_full_space(sk);	if (tp->window_clamp >= maxwin) {		tp->window_clamp = maxwin;		if (sysctl_tcp_app_win && maxwin>4*tp->advmss)			tp->window_clamp = max(maxwin-(maxwin>>sysctl_tcp_app_win), 4*tp->advmss);	}	/* Force reservation of one segment. */	if (sysctl_tcp_app_win &&	    tp->window_clamp > 2*tp->advmss &&	    tp->window_clamp + tp->advmss > maxwin)		tp->window_clamp = max(2*tp->advmss, maxwin-tp->advmss);	tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);	tp->snd_cwnd_stamp = tcp_time_stamp;}/* 5. Recalculate window clamp after socket hit its memory bounds. */static void tcp_clamp_window(struct sock *sk, struct tcp_opt *tp){	struct sk_buff *skb;	int app_win = tp->rcv_nxt - tp->copied_seq;	int ofo_win = 0;	tp->ack.quick = 0;	skb_queue_walk(&tp->out_of_order_queue, skb) {		ofo_win += skb->len;	}	/* If overcommit is due to out of order segments,	 * do not clamp window. Try to expand rcvbuf instead.	 */	if (ofo_win) {		if (sk->rcvbuf < sysctl_tcp_rmem[2] &&		    !(sk->userlocks&SOCK_RCVBUF_LOCK) &&		    !tcp_memory_pressure &&		    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])			sk->rcvbuf = min(atomic_read(&sk->rmem_alloc), sysctl_tcp_rmem[2]);	}	if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) {		app_win += ofo_win;		if (atomic_read(&sk->rmem_alloc) >= 2*sk->rcvbuf)			app_win >>= 1;		if (app_win > tp->ack.rcv_mss)			app_win -= tp->ack.rcv_mss;		app_win = max(app_win, 2*tp->advmss);		if (!ofo_win)			tp->window_clamp = min(tp->window_clamp, app_win);		tp->rcv_ssthresh = min(tp->window_clamp, 2*tp->advmss);	}}/* There is something which you must keep in mind when you analyze the * behavior of the tp->ato delayed ack timeout interval.  When a * connection starts up, we want to ack as quickly as possible.  The * problem is that "good" TCP's do slow start at the beginning of data * transmission.  The means that until we send the first few ACK's the * sender will sit on his end and only queue most of his data, because * he can only send snd_cwnd unacked packets at any given time.  For * each ACK we send, he increments snd_cwnd and transmits more of his * queue.  -DaveM */static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb){	u32 now;	tcp_schedule_ack(tp);	tcp_measure_rcv_mss(tp, skb);	now = tcp_time_stamp;	if (!tp->ack.ato) {		/* The _first_ data packet received, initialize		 * delayed ACK engine.		 */		tcp_enter_quickack_mode(tp);	} else {		int m = now - tp->ack.lrcvtime;		if (m <= TCP_ATO_MIN/2) {			/* The fastest case is the first. */			tp->ack.ato = (tp->ack.ato>>1) + TCP_ATO_MIN/2;		} else if (m < tp->ack.ato) {			tp->ack.ato = (tp->ack.ato>>1) + m;			if (tp->ack.ato > tp->rto)				tp->ack.ato = tp->rto;		} else if (m > tp->rto) {			/* Too long gap. Apparently sender falled to			 * restart window, so that we send ACKs quickly.			 */			tcp_incr_quickack(tp);			tcp_mem_reclaim(sk);		}	}	tp->ack.lrcvtime = now;	TCP_ECN_check_ce(tp, skb);	if (skb->len >= 128)		tcp_grow_window(sk, tp, skb);}/* Called to compute a smoothed rtt estimate. The data fed to this * routine either comes from timestamps, or from segments that were * known _not_ to have been retransmitted [see Karn/Partridge * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 * piece by Van Jacobson. * NOTE: the next three routines used to be one big routine. * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics */static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt){	long m = mrtt; /* RTT */	/*	The following amusing code comes from Jacobson's	 *	article in SIGCOMM '88.  Note that rtt and mdev	 *	are scaled versions of rtt and mean deviation.	 *	This is designed to be as fast as possible 	 *	m stands for "measurement".	 *	 *	On a 1990 paper the rto value is changed to:	 *	RTO = rtt + 4 * mdev	 *	 * Funny. This algorithm seems to be very broken.	 * These formulae increase RTO, when it should be decreased, increase	 * too slowly, when it should be incresed fastly, decrease too fastly	 * etc. I guess in BSD RTO takes ONE value, so that it is absolutely	 * does not matter how to _calculate_ it. Seems, it was trap	 * that VJ failed to avoid. 8)	 */	if(m == 0)		m = 1;	if (tp->srtt != 0) {		m -= (tp->srtt >> 3);	/* m is now error in rtt est */		tp->srtt += m;		/* rtt = 7/8 rtt + 1/8 new */		if (m < 0) {			m = -m;		/* m is now abs(error) */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -