📄 tcp_input.c
字号:
struct tcp_sock *tp = tcp_sk(sk); unsigned char *ptr = (skb_transport_header(ack_skb) + TCP_SKB_CB(ack_skb)->sacked); struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); struct sk_buff *cached_skb; int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; int reord = tp->packets_out; int prior_fackets; u32 highest_sack_end_seq = tp->lost_retrans_low; int flag = 0; int found_dup_sack = 0; int cached_fack_count; int i; int first_sack_index; int force_one_sack; if (!tp->sacked_out) { if (WARN_ON(tp->fackets_out)) tp->fackets_out = 0; tp->highest_sack = tp->snd_una; } prior_fackets = tp->fackets_out; found_dup_sack = tcp_check_dsack(tp, ack_skb, sp, num_sacks, prior_snd_una); if (found_dup_sack) flag |= FLAG_DSACKING_ACK; /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can * contain valid SACK info. */ if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window)) return 0; if (!tp->packets_out) goto out; /* SACK fastpath: * if the only SACK change is the increase of the end_seq of * the first block then only apply that SACK block * and use retrans queue hinting otherwise slowpath */ force_one_sack = 1; for (i = 0; i < num_sacks; i++) { __be32 start_seq = sp[i].start_seq; __be32 end_seq = sp[i].end_seq; if (i == 0) { if (tp->recv_sack_cache[i].start_seq != start_seq) force_one_sack = 0; } else { if ((tp->recv_sack_cache[i].start_seq != start_seq) || (tp->recv_sack_cache[i].end_seq != end_seq)) force_one_sack = 0; } tp->recv_sack_cache[i].start_seq = start_seq; tp->recv_sack_cache[i].end_seq = end_seq; } /* Clear the rest of the cache sack blocks so they won't match mistakenly. */ for (; i < ARRAY_SIZE(tp->recv_sack_cache); i++) { tp->recv_sack_cache[i].start_seq = 0; tp->recv_sack_cache[i].end_seq = 0; } first_sack_index = 0; if (force_one_sack) num_sacks = 1; else { int j; tp->fastpath_skb_hint = NULL; /* order SACK blocks to allow in order walk of the retrans queue */ for (i = num_sacks-1; i > 0; i--) { for (j = 0; j < i; j++){ if (after(ntohl(sp[j].start_seq), ntohl(sp[j+1].start_seq))){ struct tcp_sack_block_wire tmp; tmp = sp[j]; sp[j] = sp[j+1]; sp[j+1] = tmp; /* Track where the first SACK block goes to */ if (j == first_sack_index) first_sack_index = j+1; } } } } /* Use SACK fastpath hint if valid */ cached_skb = tp->fastpath_skb_hint; cached_fack_count = tp->fastpath_cnt_hint; if (!cached_skb) { cached_skb = tcp_write_queue_head(sk); cached_fack_count = 0; } for (i = 0; i < num_sacks; i++) { struct sk_buff *skb; __u32 start_seq = ntohl(sp->start_seq); __u32 end_seq = ntohl(sp->end_seq); int fack_count; int dup_sack = (found_dup_sack && (i == first_sack_index)); int next_dup = (found_dup_sack && (i+1 == first_sack_index)); sp++; if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq)) { if (dup_sack) { if (!tp->undo_marker) NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO); else NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD); } else { /* Don't count olds caused by ACK reordering */ if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) && !after(end_seq, tp->snd_una)) continue; NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD); } continue; } skb = cached_skb; fack_count = cached_fack_count; /* Event "B" in the comment above. */ if (after(end_seq, tp->high_seq)) flag |= FLAG_DATA_LOST; tcp_for_write_queue_from(skb, sk) { int in_sack = 0; u8 sacked; if (skb == tcp_send_head(sk)) break; cached_skb = skb; cached_fack_count = fack_count; if (i == first_sack_index) { tp->fastpath_skb_hint = skb; tp->fastpath_cnt_hint = fack_count; } /* The retransmission queue is always in order, so * we can short-circuit the walk early. */ if (!before(TCP_SKB_CB(skb)->seq, end_seq)) break; dup_sack = (found_dup_sack && (i == first_sack_index)); /* Due to sorting DSACK may reside within this SACK block! */ if (next_dup) { u32 dup_start = ntohl(sp->start_seq); u32 dup_end = ntohl(sp->end_seq); if (before(TCP_SKB_CB(skb)->seq, dup_end)) { in_sack = tcp_match_skb_to_sack(sk, skb, dup_start, dup_end); if (in_sack > 0) dup_sack = 1; } } /* DSACK info lost if out-of-mem, try SACK still */ if (in_sack <= 0) in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq); if (unlikely(in_sack < 0)) break; sacked = TCP_SKB_CB(skb)->sacked; /* Account D-SACK for retransmitted packet. */ if ((dup_sack && in_sack) && (sacked & TCPCB_RETRANS) && after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) tp->undo_retrans--; /* The frame is ACKed. */ if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) { if (sacked&TCPCB_RETRANS) { if ((dup_sack && in_sack) && (sacked&TCPCB_SACKED_ACKED)) reord = min(fack_count, reord); } /* Nothing to do; acked frame is about to be dropped. */ fack_count += tcp_skb_pcount(skb); continue; } if (!in_sack) { fack_count += tcp_skb_pcount(skb); continue; } if (!(sacked&TCPCB_SACKED_ACKED)) { if (sacked & TCPCB_SACKED_RETRANS) { /* If the segment is not tagged as lost, * we do not clear RETRANS, believing * that retransmission is still in flight. */ if (sacked & TCPCB_LOST) { TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); tp->lost_out -= tcp_skb_pcount(skb); tp->retrans_out -= tcp_skb_pcount(skb); /* clear lost hint */ tp->retransmit_skb_hint = NULL; } } else { if (!(sacked & TCPCB_RETRANS)) { /* New sack for not retransmitted frame, * which was in hole. It is reordering. */ if (fack_count < prior_fackets) reord = min(fack_count, reord); /* SACK enhanced F-RTO (RFC4138; Appendix B) */ if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) flag |= FLAG_ONLY_ORIG_SACKED; } if (sacked & TCPCB_LOST) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; tp->lost_out -= tcp_skb_pcount(skb); /* clear lost hint */ tp->retransmit_skb_hint = NULL; } } TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; flag |= FLAG_DATA_SACKED; tp->sacked_out += tcp_skb_pcount(skb); fack_count += tcp_skb_pcount(skb); if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) { tp->highest_sack = TCP_SKB_CB(skb)->seq; highest_sack_end_seq = TCP_SKB_CB(skb)->end_seq; } } else { if (dup_sack && (sacked&TCPCB_RETRANS)) reord = min(fack_count, reord); fack_count += tcp_skb_pcount(skb); } /* D-SACK. We can detect redundant retransmission * in S|R and plain R frames and clear it. * undo_retrans is decreased above, L|R frames * are accounted above as well. */ if (dup_sack && (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); tp->retransmit_skb_hint = NULL; } } /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct * due to in-order walk */ if (after(end_seq, tp->frto_highmark)) flag &= ~FLAG_ONLY_ORIG_SACKED; } if (tp->retrans_out && after(highest_sack_end_seq, tp->lost_retrans_low) && icsk->icsk_ca_state == TCP_CA_Recovery) flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq); tcp_verify_left_out(tp); if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss && (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) tcp_update_reordering(sk, tp->fackets_out - reord, 0);out:#if FASTRETRANS_DEBUG > 0 BUG_TRAP((int)tp->sacked_out >= 0); BUG_TRAP((int)tp->lost_out >= 0); BUG_TRAP((int)tp->retrans_out >= 0); BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0);#endif return flag;}/* If we receive more dupacks than we expected counting segments * in assumption of absent reordering, interpret this as reordering. * The only another reason could be bug in receiver TCP. */static void tcp_check_reno_reordering(struct sock *sk, const int addend){ struct tcp_sock *tp = tcp_sk(sk); u32 holes; holes = max(tp->lost_out, 1U); holes = min(holes, tp->packets_out); if ((tp->sacked_out + holes) > tp->packets_out) { tp->sacked_out = tp->packets_out - holes; tcp_update_reordering(sk, tp->packets_out + addend, 0); }}/* Emulate SACKs for SACKless connection: account for a new dupack. */static void tcp_add_reno_sack(struct sock *sk){ struct tcp_sock *tp = tcp_sk(sk); tp->sacked_out++; tcp_check_reno_reordering(sk, 0); tcp_verify_left_out(tp);}/* Account for ACK, ACKing some data in Reno Recovery phase. */static void tcp_remove_reno_sacks(struct sock *sk, int acked){ struct tcp_sock *tp = tcp_sk(sk); if (acked > 0) { /* One ACK acked hole. The rest eat duplicate ACKs. */ if (acked-1 >= tp->sacked_out) tp->sacked_out = 0; else tp->sacked_out -= acked-1; } tcp_check_reno_reordering(sk, acked); tcp_verify_left_out(tp);}static inline void tcp_reset_reno_sack(struct tcp_sock *tp){ tp->sacked_out = 0;}/* F-RTO can only be used if TCP has never retransmitted anything other than * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) */int tcp_use_frto(struct sock *sk){ const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; if (!sysctl_tcp_frto) return 0; if (IsSackFrto()) return 1; /* Avoid expensive walking of rexmit queue if possible */ if (tp->retrans_out > 1) return 0; skb = tcp_write_queue_head(sk); skb = tcp_write_queue_next(sk, skb); /* Skips head */ tcp_for_write_queue_from(skb, sk) { if (skb == tcp_send_head(sk)) break; if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) return 0; /* Short-circuit when first non-SACKed skb has been checked */ if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) break; } return 1;}/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO * recovery a bit and use heuristics in tcp_process_frto() to detect if * the RTO was spurious. Only clear SACKED_RETRANS of the head here to * keep retrans_out counting accurate (with SACK F-RTO, other than head * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS * bits are handled if the Loss state is really to be entered (in * tcp_enter_frto_loss). * * Do like tcp_enter_loss() would; when RTO expires the second time it * does: * "Reduce ssthresh if it has not yet been made inside this window." */void tcp_enter_frto(struct sock *sk){ const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) || tp->snd_una == tp->high_seq || ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) && !icsk->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(sk); /* Our state is too optimistic in ssthresh() call because cwnd * is not reduced until tcp_enter_frto_loss() when previous F-RTO * recovery has not yet completed. Pattern would be this: RTO, * Cumulative ACK, RTO (2xRTO for the same segment does not end * up here twice). * RFC4138 should be more specific on what to do, even though * RTO is quite unlikely to occur after the first Cumulative ACK * due to back-off and complexity of triggering events ... */ if (tp->frto_counter) { u32 stored_cwnd; stored_cwnd = tp->snd_cwnd;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -