📄 tcp_input.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
			tp->snd_cwnd = 2;			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);			tp->snd_cwnd = stored_cwnd;		} else {			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);		}		/* ... in theory, cong.control module could do "any tricks" in		 * ssthresh(), which means that ca_state, lost bits and lost_out		 * counter would have to be faked before the call occurs. We		 * consider that too expensive, unlikely and hacky, so modules		 * using these in ssthresh() must deal these incompatibility		 * issues if they receives CA_EVENT_FRTO and frto_counter != 0		 */		tcp_ca_event(sk, CA_EVENT_FRTO);	}	tp->undo_marker = tp->snd_una;	tp->undo_retrans = 0;	skb = tcp_write_queue_head(sk);	if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)		tp->undo_marker = 0;	if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {		TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;		tp->retrans_out -= tcp_skb_pcount(skb);	}	tcp_verify_left_out(tp);	/* Too bad if TCP was application limited */	tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);	/* Earlier loss recovery underway (see RFC4138; Appendix B).	 * The last condition is necessary at least in tp->frto_counter case.	 */	if (IsSackFrto() && (tp->frto_counter ||	    ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&	    after(tp->high_seq, tp->snd_una)) {		tp->frto_highmark = tp->high_seq;	} else {		tp->frto_highmark = tp->snd_nxt;	}	tcp_set_ca_state(sk, TCP_CA_Disorder);	tp->high_seq = tp->snd_nxt;	tp->frto_counter = 1;}/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO, * which indicates that we should follow the traditional RTO recovery, * i.e. mark everything lost and do go-back-N retransmission. */static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag){	struct tcp_sock *tp = tcp_sk(sk);	struct sk_buff *skb;	tp->lost_out = 0;	tp->retrans_out = 0;	if (tcp_is_reno(tp))		tcp_reset_reno_sack(tp);	tcp_for_write_queue(skb, sk) {		if (skb == tcp_send_head(sk))			break;		TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;		/*		 * Count the retransmission made on RTO correctly (only when		 * waiting for the first ACK and did not get it)...		 */		if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) {			/* For some reason this R-bit might get cleared? */			if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)				tp->retrans_out += tcp_skb_pcount(skb);			/* ...enter this if branch just for the first segment */			flag |= FLAG_DATA_ACKED;		} else {			if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)				tp->undo_marker = 0;			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;		}		/* Don't lost mark skbs that were fwd transmitted after RTO */		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) &&		    !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) {			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;			tp->lost_out += tcp_skb_pcount(skb);		}	}	tcp_verify_left_out(tp);	tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;	tp->snd_cwnd_cnt = 0;	tp->snd_cwnd_stamp = tcp_time_stamp;	tp->frto_counter = 0;	tp->bytes_acked = 0;	tp->reordering = min_t(unsigned int, tp->reordering,					     sysctl_tcp_reordering);	tcp_set_ca_state(sk, TCP_CA_Loss);	tp->high_seq = tp->frto_highmark;	TCP_ECN_queue_cwr(tp);	tcp_clear_retrans_hints_partial(tp);}static void tcp_clear_retrans_partial(struct tcp_sock *tp){	tp->retrans_out = 0;	tp->lost_out = 0;	tp->undo_marker = 0;	tp->undo_retrans = 0;}void tcp_clear_retrans(struct tcp_sock *tp){	tcp_clear_retrans_partial(tp);	tp->fackets_out = 0;	tp->sacked_out = 0;}/* Enter Loss state. If "how" is not zero, forget all SACK information * and reset tags completely, otherwise preserve SACKs. If receiver * dropped its ofo queue, we will know this due to reneging detection. */void tcp_enter_loss(struct sock *sk, int how){	const struct inet_connection_sock *icsk = inet_csk(sk);	struct tcp_sock *tp = tcp_sk(sk);	struct sk_buff *skb;	/* Reduce ssthresh if it has not yet been made inside this window. */	if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||	    (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {		tp->prior_ssthresh = tcp_current_ssthresh(sk);		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);		tcp_ca_event(sk, CA_EVENT_LOSS);	}	tp->snd_cwnd	   = 1;	tp->snd_cwnd_cnt   = 0;	tp->snd_cwnd_stamp = tcp_time_stamp;	tp->bytes_acked = 0;	tcp_clear_retrans_partial(tp);	if (tcp_is_reno(tp))		tcp_reset_reno_sack(tp);	if (!how) {		/* Push undo marker, if it was plain RTO and nothing		 * was retransmitted. */		tp->undo_marker = tp->snd_una;		tcp_clear_retrans_hints_partial(tp);	} else {		tp->sacked_out = 0;		tp->fackets_out = 0;		tcp_clear_all_retrans_hints(tp);	}	tcp_for_write_queue(skb, sk) {		if (skb == tcp_send_head(sk))			break;		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)			tp->undo_marker = 0;		TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;			tp->lost_out += tcp_skb_pcount(skb);		}	}	tcp_verify_left_out(tp);	tp->reordering = min_t(unsigned int, tp->reordering,					     sysctl_tcp_reordering);	tcp_set_ca_state(sk, TCP_CA_Loss);	tp->high_seq = tp->snd_nxt;	TCP_ECN_queue_cwr(tp);	/* Abort F-RTO algorithm if one is in progress */	tp->frto_counter = 0;}static int tcp_check_sack_reneging(struct sock *sk){	struct sk_buff *skb;	/* If ACK arrived pointing to a remembered SACK,	 * it means that our remembered SACKs do not reflect	 * real state of receiver i.e.	 * receiver _host_ is heavily congested (or buggy).	 * Do processing similar to RTO timeout.	 */	if ((skb = tcp_write_queue_head(sk)) != NULL &&	    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {		struct inet_connection_sock *icsk = inet_csk(sk);		NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);		tcp_enter_loss(sk, 1);		icsk->icsk_retransmits++;		tcp_retransmit_skb(sk, tcp_write_queue_head(sk));		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,					  icsk->icsk_rto, TCP_RTO_MAX);		return 1;	}	return 0;}static inline int tcp_fackets_out(struct tcp_sock *tp){	return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out;}static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb){	return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);}static inline int tcp_head_timedout(struct sock *sk){	struct tcp_sock *tp = tcp_sk(sk);	return tp->packets_out &&	       tcp_skb_timedout(sk, tcp_write_queue_head(sk));}/* Linux NewReno/SACK/FACK/ECN state machine. * -------------------------------------- * * "Open"	Normal state, no dubious events, fast path. * "Disorder"   In all the respects it is "Open", *		but requires a bit more attention. It is entered when *		we see some SACKs or dupacks. It is split of "Open" *		mainly to move some processing from fast path to slow one. * "CWR"	CWND was reduced due to some Congestion Notification event. *		It can be ECN, ICMP source quench, local device congestion. * "Recovery"	CWND was reduced, we are fast-retransmitting. * "Loss"	CWND was reduced due to RTO timeout or SACK reneging. * * tcp_fastretrans_alert() is entered: * - each incoming ACK, if state is not "Open" * - when arrived ACK is unusual, namely: *	* SACK *	* Duplicate ACK. *	* ECN ECE. * * Counting packets in flight is pretty simple. * *	in_flight = packets_out - left_out + retrans_out * *	packets_out is SND.NXT-SND.UNA counted in packets. * *	retrans_out is number of retransmitted segments. * *	left_out is number of segments left network, but not ACKed yet. * *		left_out = sacked_out + lost_out * *     sacked_out: Packets, which arrived to receiver out of order *		   and hence not ACKed. With SACKs this number is simply *		   amount of SACKed data. Even without SACKs *		   it is easy to give pretty reliable estimate of this number, *		   counting duplicate ACKs. * *       lost_out: Packets lost by network. TCP has no explicit *		   "loss notification" feedback from network (for now). *		   It means that this number can be only _guessed_. *		   Actually, it is the heuristics to predict lossage that *		   distinguishes different algorithms. * *	F.e. after RTO, when all the queue is considered as lost, *	lost_out = packets_out and in_flight = retrans_out. * *		Essentially, we have now two algorithms counting *		lost packets. * *		FACK: It is the simplest heuristics. As soon as we decided *		that something is lost, we decide that _all_ not SACKed *		packets until the most forward SACK are lost. I.e. *		lost_out = fackets_out - sacked_out and left_out = fackets_out. *		It is absolutely correct estimate, if network does not reorder *		packets. And it loses any connection to reality when reordering *		takes place. We use FACK by default until reordering *		is suspected on the path to this destination. * *		NewReno: when Recovery is entered, we assume that one segment *		is lost (classic Reno). While we are in Recovery and *		a partial ACK arrives, we assume that one more packet *		is lost (NewReno). This heuristics are the same in NewReno *		and SACK. * *  Imagine, that's all! Forget about all this shamanism about CWND inflation *  deflation etc. CWND is real congestion window, never inflated, changes *  only according to classic VJ rules. * * Really tricky (and requiring careful tuning) part of algorithm * is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue(). * The first determines the moment _when_ we should reduce CWND and, * hence, slow down forward transmission. In fact, it determines the moment * when we decide that hole is caused by loss, rather than by a reorder. * * tcp_xmit_retransmit_queue() decides, _what_ we should retransmit to fill * holes, caused by lost packets. * * And the most logically complicated part of algorithm is undo * heuristics. We detect false retransmits due to both too early * fast retransmit (reordering) and underestimated RTO, analyzing * timestamps and D-SACKs. When we detect that some segments were * retransmitted by mistake and CWND reduction was wrong, we undo * window reduction and abort recovery phase. This logic is hidden * inside several functions named tcp_try_undo_<something>. *//* This function decides, when we should leave Disordered state * and enter Recovery phase, reducing congestion window. * * Main question: may we further continue forward transmission * with the same cwnd? */static int tcp_time_to_recover(struct sock *sk){	struct tcp_sock *tp = tcp_sk(sk);	__u32 packets_out;	/* Do not perform any recovery during F-RTO algorithm */	if (tp->frto_counter)		return 0;	/* Trick#1: The loss is proven. */	if (tp->lost_out)		return 1;	/* Not-A-Trick#2 : Classic rule... */	if (tcp_fackets_out(tp) > tp->reordering)		return 1;	/* Trick#3 : when we use RFC2988 timer restart, fast	 * retransmit can be triggered by timeout of queue head.	 */	if (tcp_head_timedout(sk))		return 1;	/* Trick#4: It is still not OK... But will it be useful to delay	 * recovery more?	 */	packets_out = tp->packets_out;	if (packets_out <= tp->reordering &&	    tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&	    !tcp_may_send_now(sk)) {		/* We have nothing to send. This connection is limited		 * either by receiver window or by application.		 */		return 1;	}	return 0;}/* RFC: This is from the original, I doubt that this is necessary at all: * clear xmit_retrans hint if seq of this skb is beyond hint. How could we * retransmitted past LOST markings in the first place? I'm not fully sure * about undo and end of connection cases, which can cause R without L? */static void tcp_verify_retransmit_hint(struct tcp_sock *tp,				       struct sk_buff *skb){	if ((tp->retransmit_skb_hint != NULL) &&	    before(TCP_SKB_CB(skb)->seq,	    TCP_SKB_CB(tp->retransmit_skb_hint)->seq))		tp->retransmit_skb_hint = NULL;}/* Mark head of queue up as lost. */static void tcp_mark_head_lost(struct sock *sk, int packets){	struct tcp_sock *tp = tcp_sk(sk);	struct sk_buff *skb;	int cnt;	BUG_TRAP(packets <= tp->packets_out);	if (tp->lost_skb_hint) {		skb = tp->lost_skb_hint;		cnt = tp->lost_cnt_hint;	} else {		skb = tcp_write_queue_head(sk);		cnt = 0;	}	tcp_for_write_queue_from(skb, sk) {		if (skb == tcp_send_head(sk))			break;		/* TODO: do this better */		/* this is not the most efficient way to do this... */		tp->lost_skb_hint = skb;		tp->lost_cnt_hint = cnt;		cnt += tcp_skb_pcount(skb);		if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))			break;		if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;			tp->lost_out += tcp_skb_pcount(skb);			tcp_verify_retransmit_hint(tp, skb);		}	}	tcp_verify_left_out(tp);}/* Account newly detected
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -