📄 tcp_input.c

📁 GNU Hurd 源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* * INET		An implementation of the TCP/IP protocol suite for the LINUX *		operating system.  INET is implemented using the  BSD Socket *		interface as the means of communication with the user level. * *		Implementation of the Transmission Control Protocol(TCP). * * Version:	$Id: tcp_input.c,v 1.164.2.8 1999/09/23 19:21:23 davem Exp $ * * Authors:	Ross Biro, <bir7@leland.Stanford.Edu> *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> *		Mark Evans, <evansmp@uhura.aston.ac.uk> *		Corey Minyard <wf-rch!minyard@relay.EU.net> *		Florian La Roche, <flla@stud.uni-sb.de> *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu> *		Linus Torvalds, <torvalds@cs.helsinki.fi> *		Alan Cox, <gw4pts@gw4pts.ampr.org> *		Matthew Dillon, <dillon@apollo.west.oic.com> *		Arnt Gulbrandsen, <agulbra@nvg.unit.no> *		Jorge Cwik, <jorge@laser.satlink.net> *//* * Changes: *		Pedro Roque	:	Fast Retransmit/Recovery. *					Two receive queues. *					Retransmit queue handled by TCP. *					Better retransmit timer handling. *					New congestion avoidance. *					Header prediction. *					Variable renaming. * *		Eric		:	Fast Retransmit. *		Randy Scott	:	MSS option defines. *		Eric Schenk	:	Fixes to slow start algorithm. *		Eric Schenk	:	Yet another double ACK bug. *		Eric Schenk	:	Delayed ACK bug fixes. *		Eric Schenk	:	Floyd style fast retrans war avoidance. *		David S. Miller	:	Don't allow zero congestion window. *		Eric Schenk	:	Fix retransmitter so that it sends *					next packet on ack of previous packet. *		Andi Kleen	:	Moved open_request checking here *					and process RSTs for open_requests. *		Andi Kleen	:	Better prune_queue, and other fixes. *		Andrey Savochkin:	Fix RTT measurements in the presnce of *					timestamps. *		Andrey Savochkin:	Check sequence numbers correctly when *					removing SACKs due to in sequence incoming *					data segments. *		Andi Kleen:		Make sure we never ack data there is not *					enough room for. Also make this condition *					a fatal error if it might still happen. *		Andi Kleen:		Add tcp_measure_rcv_mss to make  *					connections with MSS<min(MTU,ann. MSS) *					work without delayed acks.  *		Andi Kleen:		Process packets with PSH set in the *					fast path. */#include <linux/config.h>#include <linux/mm.h>#include <linux/sysctl.h>#include <net/tcp.h>#include <linux/ipsec.h>#ifdef CONFIG_SYSCTL#define SYNC_INIT 0 /* let the user enable it */#else#define SYNC_INIT 1#endifextern int sysctl_tcp_fin_timeout;/* These are on by default so the code paths get tested. * For the final 2.2 this may be undone at our discretion. -DaveM */int sysctl_tcp_timestamps = 1;int sysctl_tcp_window_scaling = 1;int sysctl_tcp_sack = 1;int sysctl_tcp_syncookies = SYNC_INIT; int sysctl_tcp_stdurg;int sysctl_tcp_rfc1337;static int prune_queue(struct sock *sk);/* There is something which you must keep in mind when you analyze the * behavior of the tp->ato delayed ack timeout interval.  When a * connection starts up, we want to ack as quickly as possible.  The * problem is that "good" TCP's do slow start at the beginning of data * transmission.  The means that until we send the first few ACK's the * sender will sit on his end and only queue most of his data, because * he can only send snd_cwnd unacked packets at any given time.  For * each ACK we send, he increments snd_cwnd and transmits more of his * queue.  -DaveM */static void tcp_delack_estimator(struct tcp_opt *tp){	if(tp->ato == 0) {		tp->lrcvtime = tcp_time_stamp;		/* Help sender leave slow start quickly,		 * and also makes sure we do not take this		 * branch ever again for this connection.		 */		tp->ato = 1;		tcp_enter_quickack_mode(tp);	} else {		int m = tcp_time_stamp - tp->lrcvtime;		tp->lrcvtime = tcp_time_stamp;		if(m <= 0)			m = 1;		if(m > tp->rto)			tp->ato = tp->rto;		else {			/* This funny shift makes sure we			 * clear the "quick ack mode" bit.			 */			tp->ato = ((tp->ato << 1) >> 2) + m;		}	}}/*  * Remember to send an ACK later. */static __inline__ void tcp_remember_ack(struct tcp_opt *tp, struct tcphdr *th, 					struct sk_buff *skb){	tp->delayed_acks++; 	/* Tiny-grams with PSH set artifically deflate our	 * ato measurement, but with a lower bound.	 */	if(th->psh && (skb->len < (tp->mss_cache >> 1))) {		/* Preserve the quickack state. */		if((tp->ato & 0x7fffffff) > HZ/50)			tp->ato = ((tp->ato & 0x80000000) |				   (HZ/50));	}} /* Called to compute a smoothed rtt estimate. The data fed to this * routine either comes from timestamps, or from segments that were * known _not_ to have been retransmitted [see Karn/Partridge * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 * piece by Van Jacobson. * NOTE: the next three routines used to be one big routine. * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics */static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt){	long m = mrtt; /* RTT */	/*	The following amusing code comes from Jacobson's	 *	article in SIGCOMM '88.  Note that rtt and mdev	 *	are scaled versions of rtt and mean deviation.	 *	This is designed to be as fast as possible 	 *	m stands for "measurement".	 *	 *	On a 1990 paper the rto value is changed to:	 *	RTO = rtt + 4 * mdev	 */	if(m == 0)		m = 1;	if (tp->srtt != 0) {		m -= (tp->srtt >> 3);	/* m is now error in rtt est */		tp->srtt += m;		/* rtt = 7/8 rtt + 1/8 new */		if (m < 0)			m = -m;		/* m is now abs(error) */		m -= (tp->mdev >> 2);   /* similar update on mdev */		tp->mdev += m;	    	/* mdev = 3/4 mdev + 1/4 new */	} else {		/* no previous measure. */		tp->srtt = m<<3;	/* take the measured time to be rtt */		tp->mdev = m<<2;	/* make sure rto = 3*rtt */	}}/* Calculate rto without backoff.  This is the second half of Van Jacobson's * routine referred to above. */static __inline__ void tcp_set_rto(struct tcp_opt *tp){	tp->rto = (tp->srtt >> 3) + tp->mdev;	tp->rto += (tp->rto >> 2) + (tp->rto >> (tp->snd_cwnd-1));} /* Keep the rto between HZ/5 and 120*HZ. 120*HZ is the upper bound * on packet lifetime in the internet. We need the HZ/5 lower * bound to behave correctly against BSD stacks with a fixed * delayed ack. * FIXME: It's not entirely clear this lower bound is the best * way to avoid the problem. Is it possible to drop the lower * bound and still avoid trouble with BSD stacks? Perhaps * some modification to the RTO calculation that takes delayed * ack bias into account? This needs serious thought. -- erics */static __inline__ void tcp_bound_rto(struct tcp_opt *tp){	if (tp->rto > 120*HZ)		tp->rto = 120*HZ;	if (tp->rto < HZ/5)		tp->rto = HZ/5;}/* WARNING: this must not be called if tp->saw_timestamp was false. */extern __inline__ void tcp_replace_ts_recent(struct sock *sk, struct tcp_opt *tp,					     __u32 start_seq, __u32 end_seq){	/* It is start_seq <= last_ack_seq combined	   with in window check. If start_seq<=last_ack_seq<=rcv_nxt,	   then segment is in window if end_seq>=rcv_nxt.	 */	if (!after(start_seq, tp->last_ack_sent) &&	    !before(end_seq, tp->rcv_nxt)) {		/* PAWS bug workaround wrt. ACK frames, the PAWS discard		 * extra check below makes sure this can only happen		 * for pure ACK frames.  -DaveM		 *		 * Plus: expired timestamps.		 *		 * Plus: resets failing PAWS.		 */		if((s32)(tp->rcv_tsval - tp->ts_recent) >= 0) {			tp->ts_recent = tp->rcv_tsval;			tp->ts_recent_stamp = tcp_time_stamp;		}	}}#define PAWS_24DAYS	(HZ * 60 * 60 * 24 * 24)extern __inline__ int tcp_paws_discard(struct tcp_opt *tp, struct tcphdr *th, unsigned len){	return ((s32)(tp->rcv_tsval - tp->ts_recent) < 0 &&		(s32)(tcp_time_stamp - tp->ts_recent_stamp) < PAWS_24DAYS &&		/* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM */		len != (th->doff * 4));}static int __tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq){	u32 end_window = tp->rcv_wup + tp->rcv_wnd;	if (tp->rcv_wnd &&	    after(end_seq, tp->rcv_nxt) &&	    before(seq, end_window))		return 1;	if (seq != end_window)		return 0;	return (seq == end_seq);}/* This functions checks to see if the tcp header is actually acceptable. */extern __inline__ int tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq){	if (seq == tp->rcv_nxt)		return (tp->rcv_wnd || (end_seq == seq));	return __tcp_sequence(tp, seq, end_seq);}/* When we get a reset we do this. */static void tcp_reset(struct sock *sk){	sk->zapped = 1;	/* We want the right error as BSD sees it (and indeed as we do). */	switch (sk->state) {		case TCP_SYN_SENT:			sk->err = ECONNREFUSED;			break;		case TCP_CLOSE_WAIT:			sk->err = EPIPE;			break;		default:			sk->err = ECONNRESET;	};	tcp_set_state(sk, TCP_CLOSE);	sk->shutdown = SHUTDOWN_MASK;	if (!sk->dead) 		sk->state_change(sk);}/* This tags the retransmission queue when SACKs arrive. */static void tcp_sacktag_write_queue(struct sock *sk, struct tcp_sack_block *sp, int nsacks){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	int i = nsacks;	while(i--) {		struct sk_buff *skb = skb_peek(&sk->write_queue);		__u32 start_seq = ntohl(sp->start_seq);		__u32 end_seq = ntohl(sp->end_seq);		int fack_count = 0;		while((skb != NULL) &&		      (skb != tp->send_head) &&		      (skb != (struct sk_buff *)&sk->write_queue)) {			/* The retransmission queue is always in order, so			 * we can short-circuit the walk early.			 */			if(after(TCP_SKB_CB(skb)->seq, end_seq))				break;			/* We play conservative, we don't allow SACKS to partially			 * tag a sequence space.			 */			fack_count++;			if(!after(start_seq, TCP_SKB_CB(skb)->seq) &&			   !before(end_seq, TCP_SKB_CB(skb)->end_seq)) {				/* If this was a retransmitted frame, account for it. */				if((TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) &&				   tp->retrans_out)					tp->retrans_out--;				TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;				/* RULE: All new SACKs will either decrease retrans_out				 *       or advance fackets_out.				 */				if(fack_count > tp->fackets_out)					tp->fackets_out = fack_count;			}			skb = skb->next;		}		sp++; /* Move on to the next SACK block. */	}}/* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when * the fast version below fails. */void tcp_parse_options(struct sock *sk, struct tcphdr *th, struct tcp_opt *tp, int no_fancy){	unsigned char *ptr;	int length=(th->doff*4)-sizeof(struct tcphdr);	int saw_mss = 0;	ptr = (unsigned char *)(th + 1);	tp->saw_tstamp = 0;	while(length>0) {	  	int opcode=*ptr++;		int opsize;		switch (opcode) {			case TCPOPT_EOL:				return;			case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */				length--;				continue;			default:				opsize=*ptr++;				if (opsize < 2) /* "silly options" */					return;				if (opsize > length)					break;	/* don't parse partial options */	  			switch(opcode) {				case TCPOPT_MSS:					if(opsize==TCPOLEN_MSS && th->syn) {						u16 in_mss = ntohs(*(__u16 *)ptr);						if (in_mss == 0)							in_mss = 536;						if (tp->mss_clamp > in_mss)							tp->mss_clamp = in_mss;						saw_mss = 1;					}					break;				case TCPOPT_WINDOW:					if(opsize==TCPOLEN_WINDOW && th->syn)						if (!no_fancy && sysctl_tcp_window_scaling) {							tp->wscale_ok = 1;							tp->snd_wscale = *(__u8 *)ptr;							if(tp->snd_wscale > 14) {								if(net_ratelimit())									printk("tcp_parse_options: Illegal window "									       "scaling value %d >14 received.",									       tp->snd_wscale);								tp->snd_wscale = 14;							}						}					break;				case TCPOPT_TIMESTAMP:					if(opsize==TCPOLEN_TIMESTAMP) {						if (sysctl_tcp_timestamps && !no_fancy) {							tp->tstamp_ok = 1;							tp->saw_tstamp = 1;							tp->rcv_tsval = ntohl(*(__u32 *)ptr);							tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));						}					}					break;				case TCPOPT_SACK_PERM:					if(opsize==TCPOLEN_SACK_PERM && th->syn) {						if (sysctl_tcp_sack && !no_fancy) {							tp->sack_ok = 1;							tp->num_sacks = 0;						}					}					break;				case TCPOPT_SACK:					if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&					   sysctl_tcp_sack && (sk != NULL) && !th->syn) {						int sack_bytes = opsize - TCPOLEN_SACK_BASE;						if(!(sack_bytes % TCPOLEN_SACK_PERBLOCK)) {							int num_sacks = sack_bytes >> 3;							struct tcp_sack_block *sackp;							sackp = (struct tcp_sack_block *)ptr;							tcp_sacktag_write_queue(sk, sackp, num_sacks);						}					}	  			};	  			ptr+=opsize-2;	  			length-=opsize;	  	};	}	if(th->syn && saw_mss == 0)		tp->mss_clamp = 536;}/* Fast parse options. This hopes to only see timestamps. * If it is wrong it falls back on tcp_parse_options(). */static __inline__ int tcp_fast_parse_options(struct sock *sk, struct tcphdr *th, struct tcp_opt *tp){	/* If we didn't send out any options ignore them all. */	if (tp->tcp_header_len == sizeof(struct tcphdr))		return 0;	if (th->doff == sizeof(struct tcphdr)>>2) {		tp->saw_tstamp = 0;		return 0;	} else if (th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {		__u32 *ptr = (__u32 *)(th + 1);
12 3 4 5 下一页
💿 文件大小 2946 K
👤 上传用户 xiaomaolv1017
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#Hurd #GNU #源代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -