📄 tcp_input.c
字号:
* to low value, and then abruptly stops to do it and starts to delay * ACKs, wait for troubles. */ if (dst_metric(dst, RTAX_RTT) > tp->srtt) { tp->srtt = dst_metric(dst, RTAX_RTT); tp->rtt_seq = tp->snd_nxt; } if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) { tp->mdev = dst_metric(dst, RTAX_RTTVAR); tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); } tcp_set_rto(tp); tcp_bound_rto(tp); if (tp->rto < TCP_TIMEOUT_INIT && !tp->saw_tstamp) goto reset; tp->snd_cwnd = tcp_init_cwnd(tp, dst); tp->snd_cwnd_stamp = tcp_time_stamp; return;reset: /* Play conservative. If timestamps are not * supported, TCP will fail to recalculate correct * rtt, if initial rto is too small. FORGET ALL AND RESET! */ if (!tp->saw_tstamp && tp->srtt) { tp->srtt = 0; tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; tp->rto = TCP_TIMEOUT_INIT; }}static void tcp_update_reordering(struct tcp_opt *tp, int metric, int ts){ if (metric > tp->reordering) { tp->reordering = min(TCP_MAX_REORDERING, metric); /* This exciting event is worth to be remembered. 8) */ if (ts) NET_INC_STATS_BH(LINUX_MIB_TCPTSREORDER); else if (IsReno(tp)) NET_INC_STATS_BH(LINUX_MIB_TCPRENOREORDER); else if (IsFack(tp)) NET_INC_STATS_BH(LINUX_MIB_TCPFACKREORDER); else NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER);#if FASTRETRANS_DEBUG > 1 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", tp->sack_ok, tp->ca_state, tp->reordering, tcp_get_pcount(&tp->fackets_out), tcp_get_pcount(&tp->sacked_out), tp->undo_marker ? tp->undo_retrans : 0);#endif /* Disable FACK yet. */ tp->sack_ok &= ~2; }}/* This procedure tags the retransmission queue when SACKs arrive. * * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). * Packets in queue with these bits set are counted in variables * sacked_out, retrans_out and lost_out, correspondingly. * * Valid combinations are: * Tag InFlight Description * 0 1 - orig segment is in flight. * S 0 - nothing flies, orig reached receiver. * L 0 - nothing flies, orig lost by net. * R 2 - both orig and retransmit are in flight. * L|R 1 - orig is lost, retransmit is in flight. * S|R 1 - orig reached receiver, retrans is still in flight. * (L|S|R is logically valid, it could occur when L|R is sacked, * but it is equivalent to plain S and code short-curcuits it to S. * L|S is logically invalid, it would mean -1 packet in flight 8)) * * These 6 states form finite state machine, controlled by the following events: * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) * 3. Loss detection event of one of three flavors: * A. Scoreboard estimator decided the packet is lost. * A'. Reno "three dupacks" marks head of queue lost. * A''. Its FACK modfication, head until snd.fack is lost. * B. SACK arrives sacking data transmitted after never retransmitted * hole was sent out. * C. SACK arrives sacking SND.NXT at the moment, when the * segment was retransmitted. * 4. D-SACK added new rule: D-SACK changes any tag to S. * * It is pleasant to note, that state diagram turns out to be commutative, * so that we are allowed not to be bothered by order of our actions, * when multiple events arrive simultaneously. (see the function below). * * Reordering detection. * -------------------- * Reordering metric is maximal distance, which a packet can be displaced * in packet stream. With SACKs we can estimate it: * * 1. SACK fills old hole and the corresponding segment was not * ever retransmitted -> reordering. Alas, we cannot use it * when segment was retransmitted. * 2. The last flaw is solved with D-SACK. D-SACK arrives * for retransmitted and already SACKed segment -> reordering.. * Both of these heuristics are not used in Loss state, when we cannot * account for retransmits accurately. */static inttcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una){ struct tcp_opt *tp = tcp_sk(sk); unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; int reord = tcp_get_pcount(&tp->packets_out); int prior_fackets; u32 lost_retrans = 0; int flag = 0; int i; /* So, SACKs for already sent large segments will be lost. * Not good, but alternative is to resegment the queue. */ if (sk->sk_route_caps & NETIF_F_TSO) { sk->sk_route_caps &= ~NETIF_F_TSO; sk->sk_no_largesend = 1; tp->mss_cache = tp->mss_cache_std; } if (!tcp_get_pcount(&tp->sacked_out)) tcp_set_pcount(&tp->fackets_out, 0); prior_fackets = tcp_get_pcount(&tp->fackets_out); for (i=0; i<num_sacks; i++, sp++) { struct sk_buff *skb; __u32 start_seq = ntohl(sp->start_seq); __u32 end_seq = ntohl(sp->end_seq); int fack_count = 0; int dup_sack = 0; /* Check for D-SACK. */ if (i == 0) { u32 ack = TCP_SKB_CB(ack_skb)->ack_seq; if (before(start_seq, ack)) { dup_sack = 1; tp->sack_ok |= 4; NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); } else if (num_sacks > 1 && !after(end_seq, ntohl(sp[1].end_seq)) && !before(start_seq, ntohl(sp[1].start_seq))) { dup_sack = 1; tp->sack_ok |= 4; NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); } /* D-SACK for already forgotten data... * Do dumb counting. */ if (dup_sack && !after(end_seq, prior_snd_una) && after(end_seq, tp->undo_marker)) tp->undo_retrans--; /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can * contain valid SACK info. */ if (before(ack, prior_snd_una - tp->max_window)) return 0; } /* Event "B" in the comment above. */ if (after(end_seq, tp->high_seq)) flag |= FLAG_DATA_LOST; sk_stream_for_retrans_queue(skb, sk) { u8 sacked = TCP_SKB_CB(skb)->sacked; int in_sack; /* The retransmission queue is always in order, so * we can short-circuit the walk early. */ if(!before(TCP_SKB_CB(skb)->seq, end_seq)) break; fack_count += tcp_skb_pcount(skb); in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && !before(end_seq, TCP_SKB_CB(skb)->end_seq); /* Account D-SACK for retransmitted packet. */ if ((dup_sack && in_sack) && (sacked & TCPCB_RETRANS) && after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) tp->undo_retrans--; /* The frame is ACKed. */ if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) { if (sacked&TCPCB_RETRANS) { if ((dup_sack && in_sack) && (sacked&TCPCB_SACKED_ACKED)) reord = min(fack_count, reord); } else { /* If it was in a hole, we detected reordering. */ if (fack_count < prior_fackets && !(sacked&TCPCB_SACKED_ACKED)) reord = min(fack_count, reord); } /* Nothing to do; acked frame is about to be dropped. */ continue; } if ((sacked&TCPCB_SACKED_RETRANS) && after(end_seq, TCP_SKB_CB(skb)->ack_seq) && (!lost_retrans || after(end_seq, lost_retrans))) lost_retrans = end_seq; if (!in_sack) continue; if (!(sacked&TCPCB_SACKED_ACKED)) { if (sacked & TCPCB_SACKED_RETRANS) { /* If the segment is not tagged as lost, * we do not clear RETRANS, believing * that retransmission is still in flight. */ if (sacked & TCPCB_LOST) { TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); tcp_dec_pcount(&tp->lost_out, skb); tcp_dec_pcount(&tp->retrans_out, skb); } } else { /* New sack for not retransmitted frame, * which was in hole. It is reordering. */ if (!(sacked & TCPCB_RETRANS) && fack_count < prior_fackets) reord = min(fack_count, reord); if (sacked & TCPCB_LOST) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; tcp_dec_pcount(&tp->lost_out, skb); } } TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; flag |= FLAG_DATA_SACKED; tcp_inc_pcount(&tp->sacked_out, skb); if (fack_count > tcp_get_pcount(&tp->fackets_out)) tcp_set_pcount(&tp->fackets_out, fack_count); } else { if (dup_sack && (sacked&TCPCB_RETRANS)) reord = min(fack_count, reord); } /* D-SACK. We can detect redundant retransmission * in S|R and plain R frames and clear it. * undo_retrans is decreased above, L|R frames * are accounted above as well. */ if (dup_sack && (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tcp_dec_pcount(&tp->retrans_out, skb); } } } /* Check for lost retransmit. This superb idea is * borrowed from "ratehalving". Event "C". * Later note: FACK people cheated me again 8), * we have to account for reordering! Ugly, * but should help. */ if (lost_retrans && tp->ca_state == TCP_CA_Recovery) { struct sk_buff *skb; sk_stream_for_retrans_queue(skb, sk) { if (after(TCP_SKB_CB(skb)->seq, lost_retrans)) break; if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) continue; if ((TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) && after(lost_retrans, TCP_SKB_CB(skb)->ack_seq) && (IsFack(tp) || !before(lost_retrans, TCP_SKB_CB(skb)->ack_seq + tp->reordering * tp->mss_cache_std))) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tcp_dec_pcount(&tp->retrans_out, skb); if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { tcp_inc_pcount(&tp->lost_out, skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; flag |= FLAG_DATA_SACKED; NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); } } } } tcp_set_pcount(&tp->left_out, (tcp_get_pcount(&tp->sacked_out) + tcp_get_pcount(&tp->lost_out))); if ((reord < tcp_get_pcount(&tp->fackets_out)) && tp->ca_state != TCP_CA_Loss) tcp_update_reordering(tp, ((tcp_get_pcount(&tp->fackets_out) + 1) - reord), 0);#if FASTRETRANS_DEBUG > 0 BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0); BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0); BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0); BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0);#endif return flag;}/* RTO occurred, but do not yet enter loss state. Instead, transmit two new * segments to see from the next ACKs whether any data was really missing. * If the RTO was spurious, new ACKs should arrive. */void tcp_enter_frto(struct sock *sk){ struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb; tp->frto_counter = 1; if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(tp); if (!tcp_westwood_ssthresh(tp)) tp->snd_ssthresh = tcp_recalc_ssthresh(tp); } /* Have to clear retransmission markers here to keep the bookkeeping * in shape, even though we are not yet in Loss state. * If something was really lost, it is eventually caught up * in tcp_enter_frto_loss. */ tcp_set_pcount(&tp->retrans_out, 0); tp->undo_marker = tp->snd_una; tp->undo_retrans = 0; sk_stream_for_retrans_queue(skb, sk) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_RETRANS; } tcp_sync_left_out(tp); tcp_set_ca_state(tp, TCP_CA_Open); tp->frto_highmark = tp->snd_nxt;}/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO, * which indicates that we should follow the traditional RTO recovery, * i.e. mark everything lost and do go-back-N retransmission. */static void tcp_enter_frto_loss(struct sock *sk){ struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb; int cnt = 0; tcp_set_pcount(&tp->sacked_out, 0); tcp_set_pcount(&tp->lost_out, 0); tcp_set_pcount(&tp->fackets_out, 0); sk_stream_for_retrans_queue(skb, sk) { cnt += tcp_skb_pcount(skb); TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { /* Do not mark those segments lost that were * forward transmitted after RTO */ if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tcp_inc_pcount(&tp->lost_out, skb); } } else { tcp_inc_pcount(&tp->sacked_out, skb); tcp_set_pcount(&tp->fackets_out, cnt); } } tcp_sync_left_out(tp); tp->snd_cwnd = tp->frto_counter + tcp_packets_in_flight(tp)+1; tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; tp->undo_marker = 0; tp->frto_counter = 0; tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); tcp_set_ca_state(tp, TCP_CA_Loss); tp->high_seq = tp->frto_highmark; TCP_ECN_queue_cwr(tp); init_bictcp(tp);}void tcp_clear_retrans(struct tcp_opt *tp){ tcp_set_pcount(&tp->left_out, 0); tcp_set_pcount(&tp->retrans_out, 0); tcp_set_pcount(&tp->fackets_out, 0); tcp_set_pcount(&tp->sacked_out, 0); tcp_set_pcount(&tp->lost_out, 0); tp->undo_marker = 0; tp->undo_retrans = 0;}/* Enter Loss state. If "how" is not zero, forget all SACK information * and reset tags completely, otherwise preserve SACKs. If receiver * dropped its ofo queue, we will know this due to reneging detection. */void tcp_enter_loss(struct sock *sk, int how){ struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -