📄 tcp_input.c
字号:
tcp_ack_packets_out(sk, tp); }#if FASTRETRANS_DEBUG > 0 BUG_TRAP((int)tp->sacked_out >= 0); BUG_TRAP((int)tp->lost_out >= 0); BUG_TRAP((int)tp->retrans_out >= 0); if (tp->packets_out==0 && tp->sack_ok) { if (tp->lost_out) { printk(KERN_DEBUG "Leak l=%u %d\n", tp->lost_out, tp->ca_state); tp->lost_out = 0; } if (tp->sacked_out) { printk(KERN_DEBUG "Leak s=%u %d\n", tp->sacked_out, tp->ca_state); tp->sacked_out = 0; } if (tp->retrans_out) { printk(KERN_DEBUG "Leak r=%u %d\n", tp->retrans_out, tp->ca_state); tp->retrans_out = 0; } }#endif return acked;}static void tcp_ack_probe(struct sock *sk){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); /* Was it a usable window open? */ if (!after(TCP_SKB_CB(tp->send_head)->end_seq, tp->snd_una + tp->snd_wnd)) { tp->backoff = 0; tcp_clear_xmit_timer(sk, TCP_TIME_PROBE0); /* Socket must be waked up by subsequent tcp_data_snd_check(). * This function is not for random using! */ } else { tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, min(tp->rto << tp->backoff, TCP_RTO_MAX)); }}static __inline__ int tcp_ack_is_dubious(struct tcp_opt *tp, int flag){ return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || tp->ca_state != TCP_CA_Open);}static __inline__ int tcp_may_raise_cwnd(struct tcp_opt *tp, int flag){ return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && !((1<<tp->ca_state)&(TCPF_CA_Recovery|TCPF_CA_CWR));}/* Check that window update is acceptable. * The function assumes that snd_una<=ack<=snd_next. */static __inline__ inttcp_may_update_window(struct tcp_opt *tp, u32 ack, u32 ack_seq, u32 nwin){ return (after(ack, tp->snd_una) || after(ack_seq, tp->snd_wl1) || (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd));}/* Update our send window. * * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 * and in FreeBSD. NetBSD's one is even worse.) is wrong. */static int tcp_ack_update_window(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb, u32 ack, u32 ack_seq){ int flag = 0; u32 nwin = ntohs(skb->h.th->window) << tp->snd_wscale; if (tcp_may_update_window(tp, ack, ack_seq, nwin)) { flag |= FLAG_WIN_UPDATE; tcp_update_wl(tp, ack, ack_seq); if (tp->snd_wnd != nwin) { tp->snd_wnd = nwin; /* Note, it is the only place, where * fast path is recovered for sending TCP. */ if (skb_queue_len(&tp->out_of_order_queue) == 0 &&#ifdef TCP_FORMAL_WINDOW tcp_receive_window(tp) &&#endif !tp->urg_data) tcp_fast_path_on(tp); if (nwin > tp->max_window) { tp->max_window = nwin; tcp_sync_mss(sk, tp->pmtu_cookie); } } } tp->snd_una = ack;#ifdef TCP_DEBUG if (before(tp->snd_una + tp->snd_wnd, tp->snd_nxt)) { if (tp->snd_nxt-(tp->snd_una + tp->snd_wnd) >= (1<<tp->snd_wscale) && net_ratelimit()) printk(KERN_DEBUG "TCP: peer %u.%u.%u.%u:%u/%u shrinks window %u:%u:%u. Bad, what else can I say?\n", NIPQUAD(sk->daddr), htons(sk->dport), sk->num, tp->snd_una, tp->snd_wnd, tp->snd_nxt); }#endif return flag;}/* This routine deals with incoming acks, but not outgoing ones. */static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); u32 prior_snd_una = tp->snd_una; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; u32 prior_in_flight; int prior_packets; /* If the ack is newer than sent or older than previous acks * then we can probably ignore it. */ if (after(ack, tp->snd_nxt)) goto uninteresting_ack; if (before(ack, prior_snd_una)) goto old_ack; if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { /* Window is constant, pure forward advance. * No more checks are required. * Note, we use the fact that SND.UNA>=SND.WL2. */ tcp_update_wl(tp, ack, ack_seq); tp->snd_una = ack; flag |= FLAG_WIN_UPDATE; NET_INC_STATS_BH(TCPHPAcks); } else { if (ack_seq != TCP_SKB_CB(skb)->end_seq) flag |= FLAG_DATA; else NET_INC_STATS_BH(TCPPureAcks); flag |= tcp_ack_update_window(sk, tp, skb, ack, ack_seq); if (TCP_SKB_CB(skb)->sacked) flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th)) flag |= FLAG_ECE; } /* We passed data and got it acked, remove any soft error * log. Something worked... */ sk->err_soft = 0; tp->rcv_tstamp = tcp_time_stamp; if ((prior_packets = tp->packets_out) == 0) goto no_queue; prior_in_flight = tcp_packets_in_flight(tp); /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk); if (tcp_ack_is_dubious(tp, flag)) { /* Advanve CWND, if state allows this. */ if ((flag&FLAG_DATA_ACKED) && prior_in_flight >= tp->snd_cwnd && tcp_may_raise_cwnd(tp, flag)) tcp_cong_avoid(tp); tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); } else { if ((flag&FLAG_DATA_ACKED) && prior_in_flight >= tp->snd_cwnd) tcp_cong_avoid(tp); } if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) dst_confirm(sk->dst_cache); return 1;no_queue: tp->probes_out = 0; /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. */ if (tp->send_head) tcp_ack_probe(sk); return 1;old_ack: if (TCP_SKB_CB(skb)->sacked) tcp_sacktag_write_queue(sk, skb, prior_snd_una);uninteresting_ack: SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); return 0;}/* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when * the fast version below fails. */void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp, int estab){ unsigned char *ptr; struct tcphdr *th = skb->h.th; int length=(th->doff*4)-sizeof(struct tcphdr); ptr = (unsigned char *)(th + 1); tp->saw_tstamp = 0; while(length>0) { int opcode=*ptr++; int opsize; switch (opcode) { case TCPOPT_EOL: return; case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ length--; continue; default: opsize=*ptr++; if (opsize < 2) /* "silly options" */ return; if (opsize > length) return; /* don't parse partial options */ switch(opcode) { case TCPOPT_MSS: if(opsize==TCPOLEN_MSS && th->syn && !estab) { u16 in_mss = ntohs(*(__u16 *)ptr); if (in_mss) { if (tp->user_mss && tp->user_mss < in_mss) in_mss = tp->user_mss; tp->mss_clamp = in_mss; } } break; case TCPOPT_WINDOW: if(opsize==TCPOLEN_WINDOW && th->syn && !estab) if (sysctl_tcp_window_scaling) { tp->wscale_ok = 1; tp->snd_wscale = *(__u8 *)ptr; if(tp->snd_wscale > 14) { if(net_ratelimit()) printk("tcp_parse_options: Illegal window " "scaling value %d >14 received.", tp->snd_wscale); tp->snd_wscale = 14; } } break; case TCPOPT_TIMESTAMP: if(opsize==TCPOLEN_TIMESTAMP) { if ((estab && tp->tstamp_ok) || (!estab && sysctl_tcp_timestamps)) { tp->saw_tstamp = 1; tp->rcv_tsval = ntohl(*(__u32 *)ptr); tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4)); } } break; case TCPOPT_SACK_PERM: if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) { if (sysctl_tcp_sack) { tp->sack_ok = 1; tcp_sack_reset(tp); } } break; case TCPOPT_SACK: if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) && !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) && tp->sack_ok) { TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th; } }; ptr+=opsize-2; length-=opsize; }; }}/* Fast parse options. This hopes to only see timestamps. * If it is wrong it falls back on tcp_parse_options(). */static __inline__ int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, struct tcp_opt *tp){ if (th->doff == sizeof(struct tcphdr)>>2) { tp->saw_tstamp = 0; return 0; } else if (tp->tstamp_ok && th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { __u32 *ptr = (__u32 *)(th + 1); if (*ptr == __constant_ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { tp->saw_tstamp = 1; ++ptr; tp->rcv_tsval = ntohl(*ptr); ++ptr; tp->rcv_tsecr = ntohl(*ptr); return 1; } } tcp_parse_options(skb, tp, 1); return 1;}extern __inline__ voidtcp_store_ts_recent(struct tcp_opt *tp){ tp->ts_recent = tp->rcv_tsval; tp->ts_recent_stamp = xtime.tv_sec;}extern __inline__ voidtcp_replace_ts_recent(struct tcp_opt *tp, u32 seq){ if (tp->saw_tstamp && !after(seq, tp->rcv_wup)) { /* PAWS bug workaround wrt. ACK frames, the PAWS discard * extra check below makes sure this can only happen * for pure ACK frames. -DaveM * * Not only, also it occurs for expired timestamps. */ if((s32)(tp->rcv_tsval - tp->ts_recent) >= 0 || xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS) tcp_store_ts_recent(tp); }}/* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM * * It is not fatal. If this ACK does _not_ change critical state (seqs, window) * it can pass through stack. So, the following predicate verifies that * this segment is not used for anything but congestion avoidance or * fast retransmit. Moreover, we even are able to eliminate most of such * second order effects, if we apply some small "replay" window (~RTO) * to timestamp space. * * All these measures still do not guarantee that we reject wrapped ACKs * on networks with high bandwidth, when sequence space is recycled fastly, * but it guarantees that such events will be very rare and do not affect * connection seriously. This doesn't look nice, but alas, PAWS is really * buggy extension. * * [ Later note. Even worse! It is buggy for segments _with_ data. RFC * states that events when retransmit arrives after original data are rare. * It is a blatant lie. VJ forgot about fast retransmit! 8)8) It is * the biggest problem on large power networks even with minor reordering. * OK, let's give it small replay window. If peer clock is even 1hz, it is safe * up to bandwidth of 18Gigabit/sec. 8) ] */static int tcp_disordered_ack(struct tcp_opt *tp, struct sk_buff *skb){ struct tcphdr *th = skb->h.th; u32 seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; return (/* 1. Pure ACK with correct sequence number. */ (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) && /* 2. ... and duplicate ACK. */ ack == tp->snd_una && /* 3. ... and does not update window. */ !tcp_may_update_window(tp, ack, seq, ntohs(th->window)<<tp->snd_wscale) && /* 4. ... and sits in replay window. */ (s32)(tp->ts_recent - tp->rcv_tsval) <= (tp->rto*1024)/HZ);}extern __inline__ int tcp_paws_discard(struct tcp_opt *tp, struct sk_buff *skb){ return ((s32)(tp->ts_recent - tp->rcv_tsval) > TCP_PAWS_WINDOW && xtime.tv_sec < tp->ts_recent_stamp + TCP_PAWS_24DAYS && !tcp_disordered_ack(tp, skb));}static int __tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq){ u32 end_window = tp->rcv_wup + tp->rcv_wnd;#ifdef TCP_FORMAL_WINDOW u32 rcv_wnd = tcp_receive_window(tp);#else u32 rcv_wnd = tp->rcv_wnd;#endif if (rcv_wnd && after(end_seq, tp->rcv_nxt) && before(seq, end_window)) return 1; if (seq != end_window) return 0; return (seq == end_seq);}/* This functions checks to see if the tcp header is actually acceptable. * * Actually, our check is seriously broken, we must accept RST,ACK,URG * even on zero window effectively trimming data. It is RFC, guys. * But our check is so beautiful, that I do not want to repair it * now. However, taking into account those stupid plans to start to * send some texts with RST, we have to handle at least this case. --ANK */extern __inline__ int tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq, int rst){#ifdef TCP_FORMAL_WINDOW u32 rcv_wnd = tcp_receive_window(tp);#else u32 rcv_wnd = tp->rcv_wnd;#endif if (seq == tp->rcv_nxt) return (rcv_wnd || (end_seq == seq) || rst); return __tcp_sequence(tp, seq, end_seq);}/* When we get a reset we do this. */static void tcp_reset(struct sock *sk){ /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->state) { case TCP_SYN_SENT:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -