📄 tcp_input.c

📁 Linux内核源代码为压缩文件是<<Linux内核>>一书中的源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
						reord = min(fack_count, reord);					if (sacked & TCPCB_LOST) {						TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;						tp->lost_out--;					}				}				TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;				flag |= FLAG_DATA_SACKED;				tp->sacked_out++;				if (fack_count > tp->fackets_out)					tp->fackets_out = fack_count;			} else {				if (dup_sack && (sacked&TCPCB_RETRANS))					reord = min(fack_count, reord);			}			/* D-SACK. We can detect redundant retransmission			 * in S|R and plain R frames and clear it.			 * undo_retrans is decreased above, L|R frames			 * are accounted above as well.			 */			if (dup_sack &&			    (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;				tp->retrans_out--;			}		}	}	/* Check for lost retransmit. This superb idea is	 * borrowed from "ratehalving". Event "C".	 * Later note: FACK people cheated me again 8),	 * we have to account for reordering! Ugly,	 * but should help.	 */	if (lost_retrans && tp->ca_state == TCP_CA_Recovery) {		struct sk_buff *skb;		for_retrans_queue(skb, sk, tp) {			if (after(TCP_SKB_CB(skb)->seq, lost_retrans))				break;			if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))				continue;			if ((TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) &&			    after(lost_retrans, TCP_SKB_CB(skb)->ack_seq) &&			    (IsFack(tp) ||			     !before(lost_retrans, TCP_SKB_CB(skb)->ack_seq+tp->reordering*tp->mss_cache))) {				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;				tp->retrans_out--;				if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {					tp->lost_out++;					TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;					flag |= FLAG_DATA_SACKED;					NET_INC_STATS_BH(TCPLostRetransmit);				}			}		}	}	tp->left_out = tp->sacked_out + tp->lost_out;	if (reord < tp->fackets_out && tp->ca_state != TCP_CA_Loss)		tcp_update_reordering(tp, (tp->fackets_out+1)-reord, 0);#if FASTRETRANS_DEBUG > 0	BUG_TRAP((int)tp->sacked_out >= 0);	BUG_TRAP((int)tp->lost_out >= 0);	BUG_TRAP((int)tp->retrans_out >= 0);	BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0);#endif	return flag;}void tcp_clear_retrans(struct tcp_opt *tp){	tp->left_out = 0;	tp->retrans_out = 0;	tp->fackets_out = 0;	tp->sacked_out = 0;	tp->lost_out = 0;	tp->undo_marker = 0;	tp->undo_retrans = 0;}/* Enter Loss state. If "how" is not zero, forget all SACK information * and reset tags completely, otherwise preserve SACKs. If receiver * dropped its ofo queue, we will know this due to reneging detection. */void tcp_enter_loss(struct sock *sk, int how){	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;	struct sk_buff *skb;	int cnt = 0;	/* Reduce ssthresh if it has not yet been made inside this window. */	if (tp->ca_state <= TCP_CA_Disorder ||	    tp->snd_una == tp->high_seq ||	    (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) {		tp->prior_ssthresh = tcp_current_ssthresh(tp);		tp->snd_ssthresh = tcp_recalc_ssthresh(tp);	}	tp->snd_cwnd = 1;	tp->snd_cwnd_cnt = 0;	tp->snd_cwnd_stamp = tcp_time_stamp;	tcp_clear_retrans(tp);	/* Push undo marker, if it was plain RTO and nothing	 * was retransmitted. */	if (!how)		tp->undo_marker = tp->snd_una;	for_retrans_queue(skb, sk, tp) {		cnt++;		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)			tp->undo_marker = 0;		TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;			tp->lost_out++;		} else {			tp->sacked_out++;			tp->fackets_out = cnt;		}	}	tp->left_out = tp->sacked_out + tp->lost_out;	tp->reordering = min(tp->reordering, sysctl_tcp_reordering);	tp->ca_state = TCP_CA_Loss;	tp->high_seq = tp->snd_nxt;	TCP_ECN_queue_cwr(tp);}static int tcp_check_sack_reneging(struct sock *sk, struct tcp_opt *tp){	struct sk_buff *skb;	/* If ACK arrived pointing to a remembered SACK,	 * it means that our remembered SACKs do not reflect	 * real state of receiver i.e.	 * receiver _host_ is heavily congested (or buggy).	 * Do processing similar to RTO timeout.	 */	if ((skb = skb_peek(&sk->write_queue)) != NULL &&	    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {		NET_INC_STATS_BH(TCPSACKReneging);		tcp_enter_loss(sk, 1);		tp->retransmits++;		tcp_retransmit_skb(sk, skb_peek(&sk->write_queue));		tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);		return 1;	}	return 0;}static inline int tcp_fackets_out(struct tcp_opt *tp){	return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out;}/* Linux NewReno/SACK/FACK/ECN state machine. * -------------------------------------- * * "Open"	Normal state, no dubious events, fast path. * "Disorder"   In all the respects it is "Open", *		but requires a bit more attention. It is entered when *		we see some SACKs or dupacks. It is split of "Open" *		mainly to move some processing from fast path to slow one. * "CWR"	CWND was reduced due to some Congestion Notification event. *		It can be ECN, ICMP source quench, local device congestion. * "Recovery"	CWND was reduced, we are fast-retransmitting. * "Loss"	CWND was reduced due to RTO timeout or SACK reneging. * * tcp_fastretrans_alert() is entered: * - each incoming ACK, if state is not "Open" * - when arrived ACK is unusual, namely: *	* SACK *	* Duplicate ACK. *	* ECN ECE. * * Counting packets in flight is pretty simple. * *	in_flight = packets_out - left_out + retrans_out * *	packets_out is SND.NXT-SND.UNA counted in packets. * *	retrans_out is number of retransmitted segments. * *	left_out is number of segments left network, but not ACKed yet. * *		left_out = sacked_out + lost_out * *     sacked_out: Packets, which arrived to receiver out of order *		   and hence not ACKed. With SACKs this number is simply *		   amount of SACKed data. Even without SACKs *		   it is easy to give pretty reliable estimate of this number, *		   counting duplicate ACKs. * *       lost_out: Packets lost by network. TCP has no explicit *		   "loss notification" feedback from network (for now). *		   It means that this number can be only _guessed_. *		   Actually, it is the heuristics to predict lossage that *		   distinguishes different algorithms. * *	F.e. after RTO, when all the queue is considered as lost, *	lost_out = packets_out and in_flight = retrans_out. * *		Essentially, we have now two algorithms counting *		lost packets. * *		FACK: It is the simplest heuristics. As soon as we decided *		that something is lost, we decide that _all_ not SACKed *		packets until the most forward SACK are lost. I.e. *		lost_out = fackets_out - sacked_out and left_out = fackets_out. *		It is absolutely correct estimate, if network does not reorder *		packets. And it loses any connection to reality when reordering *		takes place. We use FACK by default until reordering *		is suspected on the path to this destination. * *		NewReno: when Recovery is entered, we assume that one segment *		is lost (classic Reno). While we are in Recovery and *		a partial ACK arrives, we assume that one more packet *		is lost (NewReno). This heuristics are the same in NewReno *		and SACK. * *  Imagine, that's all! Forget about all this shamanism about CWND inflation *  deflation etc. CWND is real congestion window, never inflated, changes *  only according to classic VJ rules. * * Really tricky (and requiring careful tuning) part of algorithm * is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue(). * The first determines the moment _when_ we should reduce CWND and, * hence, slow down forward transmission. In fact, it determines the moment * when we decide that hole is caused by loss, rather than by a reorder. * * tcp_xmit_retransmit_queue() decides, _what_ we should retransmit to fill * holes, caused by lost packets. * * And the most logically complicated part of algorithm is undo * heuristics. We detect false retransmits due to both too early * fast retransmit (reordering) and underestimated RTO, analyzing * timestamps and D-SACKs. When we detect that some segments were * retransmitted by mistake and CWND reduction was wrong, we undo * window reduction and abort recovery phase. This logic is hidden * inside several functions named tcp_try_undo_<something>. *//* This function decides, when we should leave Disordered state * and enter Recovery phase, reducing congestion window. * * Main question: may we further continue forward transmission * with the same cwnd? */static inttcp_time_to_recover(struct sock *sk, struct tcp_opt *tp){	/* Trick#1: The loss is proven. */	if (tp->lost_out)		return 1;	/* Not-A-Trick#2 : Classic rule... */	if (tcp_fackets_out(tp) > tp->reordering)		return 1;	/* Trick#3: It is still not OK... But will it be useful to delay	 * recovery more?	 */	if (tp->packets_out <= tp->reordering &&	    tp->sacked_out >= max(tp->packets_out/2, sysctl_tcp_reordering) &&	    !tcp_may_send_now(sk, tp)) {		/* We have nothing to send. This connection is limited		 * either by receiver window or by application.		 */		return 1;	}	return 0;}/* If we receive more dupacks than we expected counting segments * in assumption of absent reordering, interpret this as reordering. * The only another reason could be bug in receiver TCP. */static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend){	if (tp->sacked_out + 1 > tp->packets_out) {		tp->sacked_out = tp->packets_out ? tp->packets_out - 1 : 0;		tcp_update_reordering(tp, tp->packets_out+addend, 0);	}}/* Emulate SACKs for SACKless connection: account for a new dupack. */static void tcp_add_reno_sack(struct tcp_opt *tp){	++tp->sacked_out;	tcp_check_reno_reordering(tp, 0);	tp->left_out = tp->sacked_out + tp->lost_out;}/* Account for ACK, ACKing some data in Reno Recovery phase. */static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked){	if (acked > 0) {		/* One ACK eated lost packet. Must eat! */		BUG_TRAP(tp->lost_out == 0);		/* The rest eat duplicate ACKs. */		if (acked-1 >= tp->sacked_out)			tp->sacked_out = 0;		else			tp->sacked_out -= acked-1;	}	tcp_check_reno_reordering(tp, acked);	tp->left_out = tp->sacked_out + tp->lost_out;}static inline void tcp_reset_reno_sack(struct tcp_opt *tp){	tp->sacked_out = 0;	tp->left_out = tp->lost_out;}/* Mark head of queue up as lost. */static voidtcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_seq){	struct sk_buff *skb;	int cnt = packets;	BUG_TRAP(cnt <= tp->packets_out);	for_retrans_queue(skb, sk, tp) {		if (--cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))			break;		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;			tp->lost_out++;		}	}	tp->left_out = tp->sacked_out + tp->lost_out;}/* Account newly detected lost packet(s) */static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp){	if (IsFack(tp)) {		int lost = tp->fackets_out - tp->reordering;		if (lost <= 0)			lost = 1;		tcp_mark_head_lost(sk, tp, lost, tp->high_seq);	} else {		tcp_mark_head_lost(sk, tp, 1, tp->high_seq);	}}/* CWND moderation, preventing bursts due to too big ACKs * in dubious situations. */static __inline__ void tcp_moderate_cwnd(struct tcp_opt *tp){	tp->snd_cwnd = min(tp->snd_cwnd,			   tcp_packets_in_flight(tp)+tcp_max_burst(tp));	tp->snd_cwnd_stamp = tcp_time_stamp;}/* Decrease cwnd each second ack. */static void tcp_cwnd_down(struct tcp_opt *tp){	int decr = tp->snd_cwnd_cnt + 1;	tp->snd_cwnd_cnt = decr&1;	decr >>= 1;	if (decr && tp->snd_cwnd > tp->snd_ssthresh/2)		tp->snd_cwnd -= decr;	tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);	tp->snd_cwnd_stamp = tcp_time_stamp;}/* Nothing was retransmitted or returned timestamp is less * than timestamp of the first retransmission. */static __inline__ int tcp_packet_delayed(struct tcp_opt *tp){	return !tp->retrans_stamp ||		(tp->saw_tstamp && tp->rcv_tsecr &&		 (__s32)(tp->rcv_tsecr - tp->retrans_stamp) < 0);}/* Undo procedures. */#if FASTRETRANS_DEBUG > 1static void DBGUNDO(struct sock *sk, struct tcp_opt *tp, const char *msg){	printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",	       msg,	       NIPQUAD(sk->daddr), ntohs(sk->dport),	       tp->snd_cwnd, tp->left_out,	       tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out);}#else#define DBGUNDO(x...) do { } while (0)#endifstatic void tcp_undo_cwr(struct tcp_opt *tp, int undo){	if (tp->prior_ssthresh) {		tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);		if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {			tp->snd_ssthresh = tp->prior_ssthresh;			TCP_ECN_withdraw_cwr(tp);		}	} else {		tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);	}	tcp_moderate_cwnd(tp);	tp->snd_cwnd_stamp = tcp_time_stamp;}static inline int tcp_may_undo(struct tcp_opt *tp){	return tp->undo_marker &&		(!tp->undo_retrans || tcp_packet_delayed(tp));}/* People celebrate: "We love our President!" */static int tcp_try_undo_recovery(struct sock *sk, struct tcp_opt *tp){	if (tcp_may_undo(tp)) {
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -