📄 tcp_input.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
	struct tcp_sock *tp = tcp_sk(sk);	unsigned char *ptr = (skb_transport_header(ack_skb) +			      TCP_SKB_CB(ack_skb)->sacked);	struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2);	struct sk_buff *cached_skb;	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;	int reord = tp->packets_out;	int prior_fackets;	u32 highest_sack_end_seq = tp->lost_retrans_low;	int flag = 0;	int found_dup_sack = 0;	int cached_fack_count;	int i;	int first_sack_index;	int force_one_sack;	if (!tp->sacked_out) {		if (WARN_ON(tp->fackets_out))			tp->fackets_out = 0;		tp->highest_sack = tp->snd_una;	}	prior_fackets = tp->fackets_out;	found_dup_sack = tcp_check_dsack(tp, ack_skb, sp,					 num_sacks, prior_snd_una);	if (found_dup_sack)		flag |= FLAG_DSACKING_ACK;	/* Eliminate too old ACKs, but take into	 * account more or less fresh ones, they can	 * contain valid SACK info.	 */	if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))		return 0;	if (!tp->packets_out)		goto out;	/* SACK fastpath:	 * if the only SACK change is the increase of the end_seq of	 * the first block then only apply that SACK block	 * and use retrans queue hinting otherwise slowpath */	force_one_sack = 1;	for (i = 0; i < num_sacks; i++) {		__be32 start_seq = sp[i].start_seq;		__be32 end_seq = sp[i].end_seq;		if (i == 0) {			if (tp->recv_sack_cache[i].start_seq != start_seq)				force_one_sack = 0;		} else {			if ((tp->recv_sack_cache[i].start_seq != start_seq) ||			    (tp->recv_sack_cache[i].end_seq != end_seq))				force_one_sack = 0;		}		tp->recv_sack_cache[i].start_seq = start_seq;		tp->recv_sack_cache[i].end_seq = end_seq;	}	/* Clear the rest of the cache sack blocks so they won't match mistakenly. */	for (; i < ARRAY_SIZE(tp->recv_sack_cache); i++) {		tp->recv_sack_cache[i].start_seq = 0;		tp->recv_sack_cache[i].end_seq = 0;	}	first_sack_index = 0;	if (force_one_sack)		num_sacks = 1;	else {		int j;		tp->fastpath_skb_hint = NULL;		/* order SACK blocks to allow in order walk of the retrans queue */		for (i = num_sacks-1; i > 0; i--) {			for (j = 0; j < i; j++){				if (after(ntohl(sp[j].start_seq),					  ntohl(sp[j+1].start_seq))){					struct tcp_sack_block_wire tmp;					tmp = sp[j];					sp[j] = sp[j+1];					sp[j+1] = tmp;					/* Track where the first SACK block goes to */					if (j == first_sack_index)						first_sack_index = j+1;				}			}		}	}	/* Use SACK fastpath hint if valid */	cached_skb = tp->fastpath_skb_hint;	cached_fack_count = tp->fastpath_cnt_hint;	if (!cached_skb) {		cached_skb = tcp_write_queue_head(sk);		cached_fack_count = 0;	}	for (i = 0; i < num_sacks; i++) {		struct sk_buff *skb;		__u32 start_seq = ntohl(sp->start_seq);		__u32 end_seq = ntohl(sp->end_seq);		int fack_count;		int dup_sack = (found_dup_sack && (i == first_sack_index));		int next_dup = (found_dup_sack && (i+1 == first_sack_index));		sp++;		if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq)) {			if (dup_sack) {				if (!tp->undo_marker)					NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO);				else					NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD);			} else {				/* Don't count olds caused by ACK reordering */				if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&				    !after(end_seq, tp->snd_una))					continue;				NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD);			}			continue;		}		skb = cached_skb;		fack_count = cached_fack_count;		/* Event "B" in the comment above. */		if (after(end_seq, tp->high_seq))			flag |= FLAG_DATA_LOST;		tcp_for_write_queue_from(skb, sk) {			int in_sack = 0;			u8 sacked;			if (skb == tcp_send_head(sk))				break;			cached_skb = skb;			cached_fack_count = fack_count;			if (i == first_sack_index) {				tp->fastpath_skb_hint = skb;				tp->fastpath_cnt_hint = fack_count;			}			/* The retransmission queue is always in order, so			 * we can short-circuit the walk early.			 */			if (!before(TCP_SKB_CB(skb)->seq, end_seq))				break;			dup_sack = (found_dup_sack && (i == first_sack_index));			/* Due to sorting DSACK may reside within this SACK block! */			if (next_dup) {				u32 dup_start = ntohl(sp->start_seq);				u32 dup_end = ntohl(sp->end_seq);				if (before(TCP_SKB_CB(skb)->seq, dup_end)) {					in_sack = tcp_match_skb_to_sack(sk, skb, dup_start, dup_end);					if (in_sack > 0)						dup_sack = 1;				}			}			/* DSACK info lost if out-of-mem, try SACK still */			if (in_sack <= 0)				in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq);			if (unlikely(in_sack < 0))				break;			sacked = TCP_SKB_CB(skb)->sacked;			/* Account D-SACK for retransmitted packet. */			if ((dup_sack && in_sack) &&			    (sacked & TCPCB_RETRANS) &&			    after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))				tp->undo_retrans--;			/* The frame is ACKed. */			if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) {				if (sacked&TCPCB_RETRANS) {					if ((dup_sack && in_sack) &&					    (sacked&TCPCB_SACKED_ACKED))						reord = min(fack_count, reord);				}				/* Nothing to do; acked frame is about to be dropped. */				fack_count += tcp_skb_pcount(skb);				continue;			}			if (!in_sack) {				fack_count += tcp_skb_pcount(skb);				continue;			}			if (!(sacked&TCPCB_SACKED_ACKED)) {				if (sacked & TCPCB_SACKED_RETRANS) {					/* If the segment is not tagged as lost,					 * we do not clear RETRANS, believing					 * that retransmission is still in flight.					 */					if (sacked & TCPCB_LOST) {						TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);						tp->lost_out -= tcp_skb_pcount(skb);						tp->retrans_out -= tcp_skb_pcount(skb);						/* clear lost hint */						tp->retransmit_skb_hint = NULL;					}				} else {					if (!(sacked & TCPCB_RETRANS)) {						/* New sack for not retransmitted frame,						 * which was in hole. It is reordering.						 */						if (fack_count < prior_fackets)							reord = min(fack_count, reord);						/* SACK enhanced F-RTO (RFC4138; Appendix B) */						if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))							flag |= FLAG_ONLY_ORIG_SACKED;					}					if (sacked & TCPCB_LOST) {						TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;						tp->lost_out -= tcp_skb_pcount(skb);						/* clear lost hint */						tp->retransmit_skb_hint = NULL;					}				}				TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;				flag |= FLAG_DATA_SACKED;				tp->sacked_out += tcp_skb_pcount(skb);				fack_count += tcp_skb_pcount(skb);				if (fack_count > tp->fackets_out)					tp->fackets_out = fack_count;				if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) {					tp->highest_sack = TCP_SKB_CB(skb)->seq;					highest_sack_end_seq = TCP_SKB_CB(skb)->end_seq;				}			} else {				if (dup_sack && (sacked&TCPCB_RETRANS))					reord = min(fack_count, reord);				fack_count += tcp_skb_pcount(skb);			}			/* D-SACK. We can detect redundant retransmission			 * in S|R and plain R frames and clear it.			 * undo_retrans is decreased above, L|R frames			 * are accounted above as well.			 */			if (dup_sack &&			    (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;				tp->retrans_out -= tcp_skb_pcount(skb);				tp->retransmit_skb_hint = NULL;			}		}		/* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct		 * due to in-order walk		 */		if (after(end_seq, tp->frto_highmark))			flag &= ~FLAG_ONLY_ORIG_SACKED;	}	if (tp->retrans_out &&	    after(highest_sack_end_seq, tp->lost_retrans_low) &&	    icsk->icsk_ca_state == TCP_CA_Recovery)		flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq);	tcp_verify_left_out(tp);	if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss &&	    (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))		tcp_update_reordering(sk, tp->fackets_out - reord, 0);out:#if FASTRETRANS_DEBUG > 0	BUG_TRAP((int)tp->sacked_out >= 0);	BUG_TRAP((int)tp->lost_out >= 0);	BUG_TRAP((int)tp->retrans_out >= 0);	BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0);#endif	return flag;}/* If we receive more dupacks than we expected counting segments * in assumption of absent reordering, interpret this as reordering. * The only another reason could be bug in receiver TCP. */static void tcp_check_reno_reordering(struct sock *sk, const int addend){	struct tcp_sock *tp = tcp_sk(sk);	u32 holes;	holes = max(tp->lost_out, 1U);	holes = min(holes, tp->packets_out);	if ((tp->sacked_out + holes) > tp->packets_out) {		tp->sacked_out = tp->packets_out - holes;		tcp_update_reordering(sk, tp->packets_out + addend, 0);	}}/* Emulate SACKs for SACKless connection: account for a new dupack. */static void tcp_add_reno_sack(struct sock *sk){	struct tcp_sock *tp = tcp_sk(sk);	tp->sacked_out++;	tcp_check_reno_reordering(sk, 0);	tcp_verify_left_out(tp);}/* Account for ACK, ACKing some data in Reno Recovery phase. */static void tcp_remove_reno_sacks(struct sock *sk, int acked){	struct tcp_sock *tp = tcp_sk(sk);	if (acked > 0) {		/* One ACK acked hole. The rest eat duplicate ACKs. */		if (acked-1 >= tp->sacked_out)			tp->sacked_out = 0;		else			tp->sacked_out -= acked-1;	}	tcp_check_reno_reordering(sk, acked);	tcp_verify_left_out(tp);}static inline void tcp_reset_reno_sack(struct tcp_sock *tp){	tp->sacked_out = 0;}/* F-RTO can only be used if TCP has never retransmitted anything other than * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) */int tcp_use_frto(struct sock *sk){	const struct tcp_sock *tp = tcp_sk(sk);	struct sk_buff *skb;	if (!sysctl_tcp_frto)		return 0;	if (IsSackFrto())		return 1;	/* Avoid expensive walking of rexmit queue if possible */	if (tp->retrans_out > 1)		return 0;	skb = tcp_write_queue_head(sk);	skb = tcp_write_queue_next(sk, skb);	/* Skips head */	tcp_for_write_queue_from(skb, sk) {		if (skb == tcp_send_head(sk))			break;		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)			return 0;		/* Short-circuit when first non-SACKed skb has been checked */		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED))			break;	}	return 1;}/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO * recovery a bit and use heuristics in tcp_process_frto() to detect if * the RTO was spurious. Only clear SACKED_RETRANS of the head here to * keep retrans_out counting accurate (with SACK F-RTO, other than head * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS * bits are handled if the Loss state is really to be entered (in * tcp_enter_frto_loss). * * Do like tcp_enter_loss() would; when RTO expires the second time it * does: *  "Reduce ssthresh if it has not yet been made inside this window." */void tcp_enter_frto(struct sock *sk){	const struct inet_connection_sock *icsk = inet_csk(sk);	struct tcp_sock *tp = tcp_sk(sk);	struct sk_buff *skb;	if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||	    tp->snd_una == tp->high_seq ||	    ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&	     !icsk->icsk_retransmits)) {		tp->prior_ssthresh = tcp_current_ssthresh(sk);		/* Our state is too optimistic in ssthresh() call because cwnd		 * is not reduced until tcp_enter_frto_loss() when previous F-RTO		 * recovery has not yet completed. Pattern would be this: RTO,		 * Cumulative ACK, RTO (2xRTO for the same segment does not end		 * up here twice).		 * RFC4138 should be more specific on what to do, even though		 * RTO is quite unlikely to occur after the first Cumulative ACK		 * due to back-off and complexity of triggering events ...		 */		if (tp->frto_counter) {			u32 stored_cwnd;			stored_cwnd = tp->snd_cwnd;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -