⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tcp_input.c

📁 Linux内核源代码 为压缩文件 是<<Linux内核>>一书中的源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
			m -= (tp->mdev >> 2);   /* similar update on mdev */			/* This is similar to one of Eifel findings.			 * Eifel blocks mdev updates when rtt decreases.			 * This solution is a bit different: we use finer gain			 * for mdev in this case (alpha*beta).			 * Like Eifel it also prevents growth of rto,			 * but also it limits too fast rto decreases,			 * happening in pure Eifel.			 */			if (m > 0)				m >>= 3;		} else {			m -= (tp->mdev >> 2);   /* similar update on mdev */		}		tp->mdev += m;	    	/* mdev = 3/4 mdev + 1/4 new */		if (tp->mdev > tp->mdev_max) {			tp->mdev_max = tp->mdev;			if (tp->mdev_max > tp->rttvar)				tp->rttvar = tp->mdev_max;		}		if (after(tp->snd_una, tp->rtt_seq)) {			if (tp->mdev_max < tp->rttvar)				tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2;			tp->rtt_seq = tp->snd_una;			tp->mdev_max = TCP_RTO_MIN;		}	} else {		/* no previous measure. */		tp->srtt = m<<3;	/* take the measured time to be rtt */		tp->mdev = m<<2;	/* make sure rto = 3*rtt */		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);		tp->rtt_seq = tp->snd_nxt;	}}/* Calculate rto without backoff.  This is the second half of Van Jacobson's * routine referred to above. */static __inline__ void tcp_set_rto(struct tcp_opt *tp){	/* Old crap is replaced with new one. 8)	 *	 * More seriously:	 * 1. If rtt variance happened to be less 50msec, it is hallucination.	 *    It cannot be less due to utterly erratic ACK generation made	 *    at least by solaris and freebsd. "Erratic ACKs" has _nothing_	 *    to do with delayed acks, because at cwnd>2 true delack timeout	 *    is invisible. Actually, Linux-2.4 also generates erratic	 *    ACKs in some curcumstances.	 */	tp->rto = (tp->srtt >> 3) + tp->rttvar;	/* 2. Fixups made earlier cannot be right.	 *    If we do not estimate RTO correctly without them,	 *    all the algo is pure shit and should be replaced	 *    with correct one. It is exaclty, which we pretend to do.	 */}/* NOTE: clamping at TCP_RTO_MIN is not required, current algo * guarantees that rto is higher. */static __inline__ void tcp_bound_rto(struct tcp_opt *tp){	if (tp->rto > TCP_RTO_MAX)		tp->rto = TCP_RTO_MAX;}/* Save metrics learned by this TCP session.   This function is called only, when TCP finishes sucessfully   i.e. when it enters TIME-WAIT or goes from LAST-ACK to CLOSE. */void tcp_update_metrics(struct sock *sk){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	struct dst_entry *dst = __sk_dst_get(sk);	dst_confirm(dst);	if (dst && (dst->flags&DST_HOST)) {		int m;		if (tp->backoff || !tp->srtt) {			/* This session failed to estimate rtt. Why?			 * Probably, no packets returned in time.			 * Reset our results.			 */			if (!(dst->mxlock&(1<<RTAX_RTT)))				dst->rtt = 0;			return;		}		m = dst->rtt - tp->srtt;		/* If newly calculated rtt larger than stored one,		 * store new one. Otherwise, use EWMA. Remember,		 * rtt overestimation is always better than underestimation.		 */		if (!(dst->mxlock&(1<<RTAX_RTT))) {			if (m <= 0)				dst->rtt = tp->srtt;			else				dst->rtt -= (m>>3);		}		if (!(dst->mxlock&(1<<RTAX_RTTVAR))) {			if (m < 0)				m = -m;			/* Scale deviation to rttvar fixed point */			m >>= 1;			if (m < tp->mdev)				m = tp->mdev;			if (m >= dst->rttvar)				dst->rttvar = m;			else				dst->rttvar -= (dst->rttvar - m)>>2;		}		if (tp->snd_ssthresh >= 0xFFFF) {			/* Slow start still did not finish. */			if (dst->ssthresh &&			    !(dst->mxlock&(1<<RTAX_SSTHRESH)) &&			    (tp->snd_cwnd>>1) > dst->ssthresh)				dst->ssthresh = (tp->snd_cwnd>>1);			if (!(dst->mxlock&(1<<RTAX_CWND)) &&			    tp->snd_cwnd > dst->cwnd)				dst->cwnd = tp->snd_cwnd;		} else if (tp->snd_cwnd > tp->snd_ssthresh &&			   tp->ca_state == TCP_CA_Open) {			/* Cong. avoidance phase, cwnd is reliable. */			if (!(dst->mxlock&(1<<RTAX_SSTHRESH)))				dst->ssthresh = max(tp->snd_cwnd>>1, tp->snd_ssthresh);			if (!(dst->mxlock&(1<<RTAX_CWND)))				dst->cwnd = (dst->cwnd + tp->snd_cwnd)>>1;		} else {			/* Else slow start did not finish, cwnd is non-sense,			   ssthresh may be also invalid.			 */			if (!(dst->mxlock&(1<<RTAX_CWND)))				dst->cwnd = (dst->cwnd + tp->snd_ssthresh)>>1;			if (dst->ssthresh &&			    !(dst->mxlock&(1<<RTAX_SSTHRESH)) &&			    tp->snd_ssthresh > dst->ssthresh)				dst->ssthresh = tp->snd_ssthresh;		}		if (!(dst->mxlock&(1<<RTAX_REORDERING))) {			if (dst->reordering < tp->reordering &&			    tp->reordering != sysctl_tcp_reordering)				dst->reordering = tp->reordering;		}	}}/* Increase initial CWND conservatively: if estimated * RTT is low enough (<20msec) or if we have some preset ssthresh. * * Numbers are taken from RFC1414. */__u32 tcp_init_cwnd(struct tcp_opt *tp){	__u32 cwnd;	if (tp->mss_cache > 1460)		return 2;	cwnd = (tp->mss_cache > 1095) ? 3 : 4;	if (!tp->srtt || (tp->snd_ssthresh >= 0xFFFF && tp->srtt > ((HZ/50)<<3)))		cwnd = 2;	else if (cwnd > tp->snd_ssthresh)		cwnd = tp->snd_ssthresh;	return min(cwnd, tp->snd_cwnd_clamp);}/* Initialize metrics on socket. */static void tcp_init_metrics(struct sock *sk){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	struct dst_entry *dst = __sk_dst_get(sk);	if (dst == NULL)		goto reset;	dst_confirm(dst);	if (dst->mxlock&(1<<RTAX_CWND))		tp->snd_cwnd_clamp = dst->cwnd;	if (dst->ssthresh) {		tp->snd_ssthresh = dst->ssthresh;		if (tp->snd_ssthresh > tp->snd_cwnd_clamp)			tp->snd_ssthresh = tp->snd_cwnd_clamp;	}	if (dst->reordering && tp->reordering != dst->reordering) {		tp->sack_ok &= ~2;		tp->reordering = dst->reordering;	}	if (dst->rtt == 0)		goto reset;	if (!tp->srtt && dst->rtt < (TCP_TIMEOUT_INIT<<3))		goto reset;	/* Initial rtt is determined from SYN,SYN-ACK.	 * The segment is small and rtt may appear much	 * less than real one. Use per-dst memory	 * to make it more realistic.	 *	 * A bit of theory. RTT is time passed after "normal" sized packet	 * is sent until it is ACKed. In normal curcumstances sending small	 * packets force peer to delay ACKs and calculation is correct too.	 * The algorithm is adaptive and, provided we follow specs, it	 * NEVER underestimate RTT. BUT! If peer tries to make some clever	 * tricks sort of "quick acks" for time long enough to decrease RTT	 * to low value, and then abruptly stops to do it and starts to delay	 * ACKs, wait for troubles.	 */	if (dst->rtt > tp->srtt)		tp->srtt = dst->rtt;	if (dst->rttvar > tp->mdev) {		tp->mdev = dst->rttvar;		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);	}	tcp_set_rto(tp);	tcp_bound_rto(tp);	if (tp->rto < TCP_TIMEOUT_INIT && !tp->saw_tstamp)		goto reset;	tp->snd_cwnd = tcp_init_cwnd(tp);	tp->snd_cwnd_stamp = tcp_time_stamp;	return;reset:	/* Play conservative. If timestamps are not	 * supported, TCP will fail to recalculate correct	 * rtt, if initial rto is too small. FORGET ALL AND RESET!	 */	if (!tp->saw_tstamp && tp->srtt) {		tp->srtt = 0;		tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;		tp->rto = TCP_TIMEOUT_INIT;	}}static void tcp_update_reordering(struct tcp_opt *tp, int metric, int ts){	if (metric > tp->reordering) {		tp->reordering = min(TCP_MAX_REORDERING, metric);		/* This exciting event is worth to be remembered. 8) */		if (ts)			NET_INC_STATS_BH(TCPTSReorder);		else if (IsReno(tp))			NET_INC_STATS_BH(TCPRenoReorder);		else if (IsFack(tp))			NET_INC_STATS_BH(TCPFACKReorder);		else			NET_INC_STATS_BH(TCPSACKReorder);#if FASTRETRANS_DEBUG > 1		printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",		       tp->sack_ok, tp->ca_state,		       tp->reordering, tp->fackets_out, tp->sacked_out,		       tp->undo_marker ? tp->undo_retrans : 0);#endif		/* Disable FACK yet. */		tp->sack_ok &= ~2;	}}/* This procedure tags the retransmission queue when SACKs arrive. * * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). * Packets in queue with these bits set are counted in variables * sacked_out, retrans_out and lost_out, correspondingly. * * Valid combinations are: * Tag  InFlight	Description * 0	1		- orig segment is in flight. * S	0		- nothing flies, orig reached receiver. * L	0		- nothing flies, orig lost by net. * R	2		- both orig and retransmit are in flight. * L|R	1		- orig is lost, retransmit is in flight. * S|R  1		- orig reached receiver, retrans is still in flight. * (L|S|R is logically valid, it could occur when L|R is sacked, *  but it is equivalent to plain S and code short-curcuits it to S. *  L|S is logically invalid, it would mean -1 packet in flight 8)) * * These 6 states form finite state machine, controlled by the following events: * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) * 3. Loss detection event of one of three flavors: *	A. Scoreboard estimator decided the packet is lost. *	   A'. Reno "three dupacks" marks head of queue lost. *	   A''. Its FACK modfication, head until snd.fack is lost. *	B. SACK arrives sacking data transmitted after never retransmitted *	   hole was sent out. *	C. SACK arrives sacking SND.NXT at the moment, when the *	   segment was retransmitted. * 4. D-SACK added new rule: D-SACK changes any tag to S. * * It is pleasant to note, that state diagram turns out to be commutative, * so that we are allowed not to be bothered by order of our actions, * when multiple events arrive simultaneously. (see the function below). * * Reordering detection. * -------------------- * Reordering metric is maximal distance, which a packet can be displaced * in packet stream. With SACKs we can estimate it: * * 1. SACK fills old hole and the corresponding segment was not *    ever retransmitted -> reordering. Alas, we cannot use it *    when segment was retransmitted. * 2. The last flaw is solved with D-SACK. D-SACK arrives *    for retransmitted and already SACKed segment -> reordering.. * Both of these heuristics are not used in Loss state, when we cannot * account for retransmits accurately. */static inttcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;	struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;	int reord = tp->packets_out;	int prior_fackets;	u32 lost_retrans = 0;	int flag = 0;	int i;	if (!tp->sacked_out)		tp->fackets_out = 0;	prior_fackets = tp->fackets_out;	for (i=0; i<num_sacks; i++, sp++) {		struct sk_buff *skb;		__u32 start_seq = ntohl(sp->start_seq);		__u32 end_seq = ntohl(sp->end_seq);		int fack_count = 0;		int dup_sack = 0;		/* Check for D-SACK. */		if (i == 0) {			u32 ack = TCP_SKB_CB(ack_skb)->ack_seq;			if (before(start_seq, ack)) {				dup_sack = 1;				tp->sack_ok |= 4;				NET_INC_STATS_BH(TCPDSACKRecv);			} else if (num_sacks > 1 &&				   !after(end_seq, ntohl(sp[1].end_seq)) &&				   !before(start_seq, ntohl(sp[1].start_seq))) {				dup_sack = 1;				tp->sack_ok |= 4;				NET_INC_STATS_BH(TCPDSACKOfoRecv);			}			/* D-SACK for already forgotten data...			 * Do dumb counting. */			if (dup_sack &&			    !after(end_seq, prior_snd_una) &&			    after(end_seq, tp->undo_marker))				tp->undo_retrans--;			/* Eliminate too old ACKs, but take into			 * account more or less fresh ones, they can			 * contain valid SACK info.			 */			if (before(ack, prior_snd_una-tp->max_window))				return 0;		}		/* Event "B" in the comment above. */		if (after(end_seq, tp->high_seq))			flag |= FLAG_DATA_LOST;		for_retrans_queue(skb, sk, tp) {			u8 sacked = TCP_SKB_CB(skb)->sacked;			int in_sack;			/* The retransmission queue is always in order, so			 * we can short-circuit the walk early.			 */			if(!before(TCP_SKB_CB(skb)->seq, end_seq))				break;			fack_count++;			in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&				!before(end_seq, TCP_SKB_CB(skb)->end_seq);			/* Account D-SACK for retransmitted packet. */			if ((dup_sack && in_sack) &&			    (sacked & TCPCB_RETRANS) &&			    after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))				tp->undo_retrans--;			/* The frame is ACKed. */			if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) {				if (sacked&TCPCB_RETRANS) {					if ((dup_sack && in_sack) &&					    (sacked&TCPCB_SACKED_ACKED))						reord = min(fack_count, reord);				} else {					/* If it was in a hole, we detected reordering. */					if (fack_count < prior_fackets &&					    !(sacked&TCPCB_SACKED_ACKED))						reord = min(fack_count, reord);				}				/* Nothing to do; acked frame is about to be dropped. */				continue;			}			if ((sacked&TCPCB_SACKED_RETRANS) &&			    after(end_seq, TCP_SKB_CB(skb)->ack_seq) &&			    (!lost_retrans || after(end_seq, lost_retrans)))				lost_retrans = end_seq;			if (!in_sack)				continue;			if (!(sacked&TCPCB_SACKED_ACKED)) {				if (sacked & TCPCB_SACKED_RETRANS) {					/* If the segment is not tagged as lost,					 * we do not clear RETRANS, believing					 * that retransmission is still in flight.					 */					if (sacked & TCPCB_LOST) {						TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);						tp->lost_out--;						tp->retrans_out--;					}				} else {					/* New sack for not retransmitted frame,					 * which was in hole. It is reordering.					 */					if (!(sacked & TCPCB_RETRANS) &&					    fack_count < prior_fackets)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -