📄 tcp_input.c
字号:
/* Happy end! We did not retransmit anything * or our original transmission succeeded. */ DBGUNDO(sk, tp, tp->ca_state == TCP_CA_Loss ? "loss" : "retrans"); tcp_undo_cwr(tp, 1); if (tp->ca_state == TCP_CA_Loss) NET_INC_STATS_BH(TCPLossUndo); else NET_INC_STATS_BH(TCPFullUndo); tp->undo_marker = 0; } if (tp->snd_una == tp->high_seq && IsReno(tp)) { /* Hold old state until something *above* high_seq * is ACKed. For Reno it is MUST to prevent false * fast retransmits (RFC2582). SACK TCP is safe. */ tcp_moderate_cwnd(tp); return 1; } tp->ca_state = TCP_CA_Open; return 0;}/* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */static void tcp_try_undo_dsack(struct sock *sk, struct tcp_opt *tp){ if (tp->undo_marker && !tp->undo_retrans) { DBGUNDO(sk, tp, "D-SACK"); tcp_undo_cwr(tp, 1); tp->undo_marker = 0; NET_INC_STATS_BH(TCPDSACKUndo); }}/* Undo during fast recovery after partial ACK. */static int tcp_try_undo_partial(struct sock *sk, struct tcp_opt *tp, int acked){ /* Partial ACK arrived. Force Hoe's retransmit. */ int failed = IsReno(tp) || tp->fackets_out>tp->reordering; if (tcp_may_undo(tp)) { /* Plain luck! Hole if filled with delayed * packet, rather than with a retransmit. */ if (tp->retrans_out == 0) tp->retrans_stamp = 0; tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1); DBGUNDO(sk, tp, "Hoe"); tcp_undo_cwr(tp, 0); NET_INC_STATS_BH(TCPPartialUndo); /* So... Do not make Hoe's retransmit yet. * If the first packet was delayed, the rest * ones are most probably delayed as well. */ failed = 0; } return failed;}/* Undo during loss recovery after partial ACK. */static int tcp_try_undo_loss(struct sock *sk, struct tcp_opt *tp){ if (tcp_may_undo(tp)) { struct sk_buff *skb; for_retrans_queue(skb, sk, tp) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; } DBGUNDO(sk, tp, "partial loss"); tp->lost_out = 0; tp->left_out = tp->sacked_out; tcp_undo_cwr(tp, 1); NET_INC_STATS_BH(TCPLossUndo); tp->retransmits = 0; tp->undo_marker = 0; if (!IsReno(tp)) tp->ca_state = TCP_CA_Open; return 1; } return 0;}static __inline__ void tcp_complete_cwr(struct tcp_opt *tp){ tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); tp->snd_cwnd_stamp = tcp_time_stamp;}static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag){ tp->left_out = tp->sacked_out; if (tp->retrans_out == 0) tp->retrans_stamp = 0; if (flag&FLAG_ECE) tcp_enter_cwr(tp); if (tp->ca_state != TCP_CA_CWR) { int state = TCP_CA_Open; if (tp->left_out || tp->retrans_out || tp->undo_marker) state = TCP_CA_Disorder; if (tp->ca_state != state) { tp->ca_state = state; tp->high_seq = tp->snd_nxt; } tcp_moderate_cwnd(tp); } else { tcp_cwnd_down(tp); }}/* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and * packets lost by network. * * Besides that it does CWND reduction, when packet loss is detected * and changes state of machine. * * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */static voidtcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, int prior_packets, int flag){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP)); /* Some technical things: * 1. Reno does not count dupacks (sacked_out) automatically. */ if (!tp->packets_out) tp->sacked_out = 0; /* 2. SACK counts snd_fack in packets inaccurately. */ if (tp->sacked_out == 0) tp->fackets_out = 0; /* Now state machine starts. * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ if (flag&FLAG_ECE) tp->prior_ssthresh = 0; /* B. In all the states check for reneging SACKs. */ if (tp->sacked_out && tcp_check_sack_reneging(sk, tp)) return; /* C. Process data loss notification, provided it is valid. */ if ((flag&FLAG_DATA_LOST) && before(tp->snd_una, tp->high_seq) && tp->ca_state != TCP_CA_Open && tp->fackets_out > tp->reordering) { tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq); NET_INC_STATS_BH(TCPLoss); } /* D. Synchronize left_out to current state. */ tp->left_out = tp->sacked_out + tp->lost_out; /* E. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ if (tp->ca_state == TCP_CA_Open) { BUG_TRAP(tp->retrans_out == 0); tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { switch (tp->ca_state) { case TCP_CA_Loss: tp->retransmits = 0; if (tcp_try_undo_recovery(sk, tp)) return; break; case TCP_CA_CWR: /* CWR is to be held something *above* high_seq * is ACKed for CWR bit to reach receiver. */ if (tp->snd_una != tp->high_seq) { tcp_complete_cwr(tp); tp->ca_state = TCP_CA_Open; } break; case TCP_CA_Disorder: tcp_try_undo_dsack(sk, tp); tp->undo_marker = 0; tp->ca_state = TCP_CA_Open; break; case TCP_CA_Recovery: if (IsReno(tp)) tcp_reset_reno_sack(tp); if (tcp_try_undo_recovery(sk, tp)) return; tcp_complete_cwr(tp); break; } } /* F. Process state. */ switch (tp->ca_state) { case TCP_CA_Recovery: if (prior_snd_una == tp->snd_una) { if (IsReno(tp) && is_dupack) tcp_add_reno_sack(tp); } else { int acked = prior_packets - tp->packets_out; if (IsReno(tp)) tcp_remove_reno_sacks(sk, tp, acked); is_dupack = tcp_try_undo_partial(sk, tp, acked); } break; case TCP_CA_Loss: if (flag & FLAG_ACKED) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); if (!tcp_try_undo_loss(sk, tp)) { tcp_moderate_cwnd(tp); tcp_xmit_retransmit_queue(sk); return; } if (tp->ca_state != TCP_CA_Open) return; /* Loss is undone; fall through to processing in Open state. */ default: if (IsReno(tp)) { if (tp->snd_una != prior_snd_una) tcp_reset_reno_sack(tp); if (is_dupack) tcp_add_reno_sack(tp); } if (tp->ca_state == TCP_CA_Disorder) tcp_try_undo_dsack(sk, tp); if (!tcp_time_to_recover(sk, tp)) { tcp_try_to_open(sk, tp, flag); return; } /* Otherwise enter Recovery state */ if (IsReno(tp)) NET_INC_STATS_BH(TCPRenoRecovery); else NET_INC_STATS_BH(TCPSackRecovery); tp->high_seq = tp->snd_nxt; tp->prior_ssthresh = 0; tp->undo_marker = tp->snd_una; tp->undo_retrans = tp->retrans_out; if (tp->ca_state < TCP_CA_CWR) { if (!(flag&FLAG_ECE)) tp->prior_ssthresh = tcp_current_ssthresh(tp); tp->snd_ssthresh = tcp_recalc_ssthresh(tp); TCP_ECN_queue_cwr(tp); } tp->snd_cwnd_cnt = 0; tp->ca_state = TCP_CA_Recovery; } if (is_dupack) tcp_update_scoreboard(sk, tp); tcp_cwnd_down(tp); tcp_xmit_retransmit_queue(sk);}/* Read draft-ietf-tcplw-high-performance before mucking * with this code. (Superceeds RFC1323) */static void tcp_ack_saw_tstamp(struct tcp_opt *tp, int flag){ __u32 seq_rtt; /* RTTM Rule: A TSecr value received in a segment is used to * update the averaged RTT measurement only if the segment * acknowledges some new data, i.e., only if it advances the * left edge of the send window. * * See draft-ietf-tcplw-high-performance-00, section 3.3. * 1998/04/10 Andrey V. Savochkin <saw@msu.ru> */ seq_rtt = tcp_time_stamp - tp->rcv_tsecr; tcp_rtt_estimator(tp, seq_rtt); tcp_set_rto(tp); if (tp->backoff) { if (!tp->retransmits || !(flag & FLAG_RETRANS_DATA_ACKED)) tp->backoff = 0; else tp->rto <<= tp->backoff; } tcp_bound_rto(tp);}static void tcp_ack_no_tstamp(struct tcp_opt *tp, u32 seq_rtt, int flag){ /* We don't have a timestamp. Can only use * packets that are not retransmitted to determine * rtt estimates. Also, we must not reset the * backoff for rto until we get a non-retransmitted * packet. This allows us to deal with a situation * where the network delay has increased suddenly. * I.e. Karn's algorithm. (SIGCOMM '87, p5.) */ if (flag & FLAG_RETRANS_DATA_ACKED) return; tcp_rtt_estimator(tp, seq_rtt); tcp_set_rto(tp); if (tp->backoff) { /* To relax it? We have valid sample as soon as we are * here. Why not to clear backoff? */ if (!tp->retransmits) tp->backoff = 0; else tp->rto <<= tp->backoff; } tcp_bound_rto(tp);}static __inline__ voidtcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt){ /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ if (tp->saw_tstamp && tp->rcv_tsecr) tcp_ack_saw_tstamp(tp, flag); else if (seq_rtt >= 0) tcp_ack_no_tstamp(tp, seq_rtt, flag);}/* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */static __inline__ void tcp_cong_avoid(struct tcp_opt *tp){ if (tp->snd_cwnd <= tp->snd_ssthresh) { /* In "safe" area, increase. */ if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; } else { /* In dangerous area, increase slowly. * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */ if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; tp->snd_cwnd_cnt=0; } else tp->snd_cwnd_cnt++; }}/* Restart timer after forward progress on connection. * RFC2988 recommends (and BSD does) to restart timer to now+rto, * which is certainly wrong and effectively means that * rto includes one more _full_ rtt. * * For details see: * ftp://ftp.inr.ac.ru:/ip-routing/README.rto */static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp){ if (tp->packets_out==0) { tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS); } else { struct sk_buff *skb = skb_peek(&sk->write_queue); __u32 when = tp->rto + tp->rttvar - (tcp_time_stamp - TCP_SKB_CB(skb)->when); if ((__s32)when < (__s32)tp->rttvar) when = tp->rttvar; tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, when); }}/* Remove acknowledged frames from the retransmission queue. */static int tcp_clean_rtx_queue(struct sock *sk){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sk_buff *skb; __u32 now = tcp_time_stamp; int acked = 0; __s32 seq_rtt = -1; while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); __u8 sacked = scb->sacked; /* If our packet is before the ack sequence we can * discard it as it's confirmed to have arrived at * the other end. */ if (after(scb->end_seq, tp->snd_una)) break; /* Initial outgoing SYN's get put onto the write_queue * just like anything else we transmit. It is not * true data, and if we misinform our callers that * this ACK acks real data, we will erroneously exit * connection startup slow start one packet too * quickly. This is severely frowned upon behavior. */ if(!(scb->flags & TCPCB_FLAG_SYN)) { acked |= FLAG_DATA_ACKED; } else { acked |= FLAG_SYN_ACKED; } if (sacked) { if(sacked & TCPCB_RETRANS) { if(sacked & TCPCB_SACKED_RETRANS) tp->retrans_out--; acked |= FLAG_RETRANS_DATA_ACKED; seq_rtt = -1; } else if (seq_rtt < 0) seq_rtt = now - scb->when; if(sacked & TCPCB_SACKED_ACKED) tp->sacked_out--; if(sacked & TCPCB_LOST) tp->lost_out--; if(sacked & TCPCB_URG) { if (tp->urg_mode && !before(scb->end_seq, tp->snd_up)) tp->urg_mode = 0; } } else if (seq_rtt < 0) seq_rtt = now - scb->when; if(tp->fackets_out) tp->fackets_out--; tp->packets_out--; __skb_unlink(skb, skb->list); tcp_free_skb(sk, skb); } if (acked&FLAG_ACKED) { tcp_ack_update_rtt(tp, acked, seq_rtt);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -