📄 tcp_input.c
字号:
* - it does not update window. * 3. or new SACK. It is difficult to check, so that we ignore it. * * Forward progress is also indicated by arrival new data, * which was caused by window open from our side. This case is more * difficult and it is made (alas, incorrectly) in tcp_data_queue(). * --ANK (990513) */ if (ack != tp->snd_una || (flag == 0 && !th->fin)) dst_confirm(sk->dst_cache); /* Remember the highest ack received. */ tp->snd_una = ack; return 1;uninteresting_ack: SOCK_DEBUG(sk, "Ack ignored %u %u\n", ack, tp->snd_nxt); return 0;}/* New-style handling of TIME_WAIT sockets. */extern void tcp_tw_schedule(struct tcp_tw_bucket *tw);extern void tcp_tw_reschedule(struct tcp_tw_bucket *tw);extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);void tcp_timewait_kill(struct tcp_tw_bucket *tw){ struct tcp_bind_bucket *tb = tw->tb; /* Disassociate with bind bucket. */ if(tw->bind_next) tw->bind_next->bind_pprev = tw->bind_pprev; *(tw->bind_pprev) = tw->bind_next; if (tb->owners == NULL) { if (tb->next) tb->next->pprev = tb->pprev; *(tb->pprev) = tb->next; kmem_cache_free(tcp_bucket_cachep, tb); } /* Unlink from established hashes. */ if(tw->next) tw->next->pprev = tw->pprev; *tw->pprev = tw->next; /* We decremented the prot->inuse count when we entered TIME_WAIT * and the sock from which this came was destroyed. */ tw->sklist_next->sklist_prev = tw->sklist_prev; tw->sklist_prev->sklist_next = tw->sklist_next; /* Ok, now free it up. */ kmem_cache_free(tcp_timewait_cachep, tw);}/* We come here as a special case from the AF specific TCP input processing, * and the SKB has no owner. Essentially handling this is very simple, * we just keep silently eating rx'd packets, acking them if necessary, * until none show up for the entire timeout period. * * Return 0, TCP_TW_ACK, TCP_TW_RST */enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb, struct tcphdr *th, unsigned len){ /* RFC 1122: * "When a connection is [...] on TIME-WAIT state [...] * [a TCP] MAY accept a new SYN from the remote TCP to * reopen the connection directly, if it: * * (1) assigns its initial sequence number for the new * connection to be larger than the largest sequence * number it used on the previous connection incarnation, * and * * (2) returns to TIME-WAIT state if the SYN turns out * to be an old duplicate". */ if(th->syn && !th->rst && after(TCP_SKB_CB(skb)->seq, tw->rcv_nxt)) { struct sock *sk; struct tcp_func *af_specific = tw->af_specific; __u32 isn; isn = tw->snd_nxt + 128000; if(isn == 0) isn++; tcp_tw_deschedule(tw); tcp_timewait_kill(tw); sk = af_specific->get_sock(skb, th); if(sk == NULL || !ipsec_sk_policy(sk,skb) || atomic_read(&sk->sock_readers) != 0) return 0; skb_set_owner_r(skb, sk); af_specific = sk->tp_pinfo.af_tcp.af_specific; if(af_specific->conn_request(sk, skb, isn) < 0) return TCP_TW_RST; /* Toss a reset back. */ return 0; /* Discard the frame. */ } /* Check RST or SYN */ if(th->rst || th->syn) { /* This is TIME_WAIT assasination, in two flavors. * Oh well... nobody has a sufficient solution to this * protocol bug yet. */ if(sysctl_tcp_rfc1337 == 0) { tcp_tw_deschedule(tw); tcp_timewait_kill(tw); } if(!th->rst) return TCP_TW_RST; /* toss a reset back */ return 0; } else { /* In this case we must reset the TIMEWAIT timer. */ if(th->ack) tcp_tw_reschedule(tw); } /* Ack old packets if necessary */ if (!after(TCP_SKB_CB(skb)->end_seq, tw->rcv_nxt) && (th->doff * 4) > len) return TCP_TW_ACK; return 0; }/* Enter the time wait state. This is always called from BH * context. Essentially we whip up a timewait bucket, copy the * relevant info into it from the SK, and mess with hash chains * and list linkage. */static __inline__ void tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw){ struct sock **head, *sktw; /* Step 1: Remove SK from established hash. */ if(sk->next) sk->next->pprev = sk->pprev; *sk->pprev = sk->next; sk->pprev = NULL; tcp_reg_zap(sk); /* Step 2: Put TW into bind hash where SK was. */ tw->tb = (struct tcp_bind_bucket *)sk->prev; if((tw->bind_next = sk->bind_next) != NULL) sk->bind_next->bind_pprev = &tw->bind_next; tw->bind_pprev = sk->bind_pprev; *sk->bind_pprev = (struct sock *)tw; sk->prev = NULL; /* Step 3: Same for the protocol sklist. */ (tw->sklist_next = sk->sklist_next)->sklist_prev = (struct sock *)tw; (tw->sklist_prev = sk->sklist_prev)->sklist_next = (struct sock *)tw; sk->sklist_next = NULL; sk->prot->inuse--; /* Step 4: Hash TW into TIMEWAIT half of established hash table. */ head = &tcp_ehash[sk->hashent + (tcp_ehash_size/2)]; sktw = (struct sock *)tw; if((sktw->next = *head) != NULL) (*head)->pprev = &sktw->next; *head = sktw; sktw->pprev = head;}void tcp_time_wait(struct sock *sk){ struct tcp_tw_bucket *tw; tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC); if(tw != NULL) { /* Give us an identity. */ tw->daddr = sk->daddr; tw->rcv_saddr = sk->rcv_saddr; tw->bound_dev_if= sk->bound_dev_if; tw->num = sk->num; tw->state = TCP_TIME_WAIT; tw->sport = sk->sport; tw->dport = sk->dport; tw->family = sk->family; tw->reuse = sk->reuse; tw->rcv_nxt = sk->tp_pinfo.af_tcp.rcv_nxt; tw->snd_nxt = sk->tp_pinfo.af_tcp.snd_nxt; tw->window = tcp_select_window(sk); tw->af_specific = sk->tp_pinfo.af_tcp.af_specific;#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if(tw->family == PF_INET6) { memcpy(&tw->v6_daddr, &sk->net_pinfo.af_inet6.daddr, sizeof(struct in6_addr)); memcpy(&tw->v6_rcv_saddr, &sk->net_pinfo.af_inet6.rcv_saddr, sizeof(struct in6_addr)); }#endif /* Linkage updates. */ tcp_tw_hashdance(sk, tw); /* Get the TIME_WAIT timeout firing. */ tcp_tw_schedule(tw); /* CLOSE the SK. */ if(sk->state == TCP_ESTABLISHED) tcp_statistics.TcpCurrEstab--; sk->state = TCP_CLOSE; net_reset_timer(sk, TIME_DONE, min(sk->tp_pinfo.af_tcp.srtt * 2, TCP_DONE_TIME)); } else { /* Sorry, we're out of memory, just CLOSE this * socket up. We've got bigger problems than * non-graceful socket closings. */ tcp_set_state(sk, TCP_CLOSE); } /* Prevent rcvmsg/sndmsg calls, and wake people up. */ sk->shutdown = SHUTDOWN_MASK; if(!sk->dead) sk->state_change(sk);}/* * Process the FIN bit. This now behaves as it is supposed to work * and the FIN takes effect when it is validly part of sequence * space. Not before when we get holes. * * If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT * (and thence onto LAST-ACK and finally, CLOSE, we never enter * TIME-WAIT) * * If we are in FINWAIT-1, a received FIN indicates simultaneous * close and we go into CLOSING (and later onto TIME-WAIT) * * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT. */ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th){ sk->tp_pinfo.af_tcp.fin_seq = TCP_SKB_CB(skb)->end_seq; tcp_send_ack(sk); if (!sk->dead) { sk->state_change(sk); sock_wake_async(sk->socket, 1); } switch(sk->state) { case TCP_SYN_RECV: case TCP_ESTABLISHED: /* Move to CLOSE_WAIT */ tcp_set_state(sk, TCP_CLOSE_WAIT); if (th->rst) sk->shutdown = SHUTDOWN_MASK; break; case TCP_CLOSE_WAIT: case TCP_CLOSING: /* Received a retransmission of the FIN, do * nothing. */ break; case TCP_LAST_ACK: /* RFC793: Remain in the LAST-ACK state. */ break; case TCP_FIN_WAIT1: /* This case occurs when a simultaneous close * happens, we must ack the received FIN and * enter the CLOSING state. * * This causes a WRITE timeout, which will either * move on to TIME_WAIT when we timeout, or resend * the FIN properly (maybe we get rid of that annoying * FIN lost hang). The TIME_WRITE code is already * correct for handling this timeout. */ tcp_set_state(sk, TCP_CLOSING); break; case TCP_FIN_WAIT2: /* Received a FIN -- send ACK and enter TIME_WAIT. */ tcp_time_wait(sk); break; default: /* Only TCP_LISTEN and TCP_CLOSE are left, in these * cases we should never reach this piece of code. */ printk("tcp_fin: Impossible, sk->state=%d\n", sk->state); break; };}/* These routines update the SACK block as out-of-order packets arrive or * in-order packets close up the sequence space. */static void tcp_sack_maybe_coalesce(struct tcp_opt *tp, struct tcp_sack_block *sp){ int this_sack, num_sacks = tp->num_sacks; struct tcp_sack_block *swalk = &tp->selective_acks[0]; /* If more than one SACK block, see if the recent change to SP eats into * or hits the sequence space of other SACK blocks, if so coalesce. */ if(num_sacks != 1) { for(this_sack = 0; this_sack < num_sacks; this_sack++, swalk++) { if(swalk == sp) continue; /* First case, bottom of SP moves into top of the * sequence space of SWALK. */ if(between(sp->start_seq, swalk->start_seq, swalk->end_seq)) { sp->start_seq = swalk->start_seq; goto coalesce; } /* Second case, top of SP moves into bottom of the * sequence space of SWALK. */ if(between(sp->end_seq, swalk->start_seq, swalk->end_seq)) { sp->end_seq = swalk->end_seq; goto coalesce; } } } /* SP is the only SACK, or no coalescing cases found. */ return;coalesce: /* Zap SWALK, by moving every further SACK up by one slot. * Decrease num_sacks. */ for(; this_sack < num_sacks-1; this_sack++, swalk++) { struct tcp_sack_block *next = (swalk + 1); swalk->start_seq = next->start_seq; swalk->end_seq = next->end_seq; } tp->num_sacks--;}static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2){ __u32 tmp; tmp = sack1->start_seq; sack1->start_seq = sack2->start_seq; sack2->start_seq = tmp; tmp = sack1->end_seq; sack1->end_seq = sack2->end_seq; sack2->end_seq = tmp;}static void tcp_sack_new_ofo_skb(struct sock *sk, struct sk_buff *skb){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct tcp_sack_block *sp = &tp->selective_acks[0]; int cur_sacks = tp->num_sacks; if (!cur_sacks) goto new_sack; /* Optimize for the common case, new ofo frames arrive * "in order". ;-) This also satisfies the requirements * of RFC2018 about ordering of SACKs. */ if(sp->end_seq == TCP_SKB_CB(skb)->seq) { sp->end_seq = TCP_SKB_CB(skb)->end_seq; tcp_sack_maybe_coalesce(tp, sp); } else if(sp->start_seq == TCP_SKB_CB(skb)->end_seq) { /* Re-ordered arrival, in this case, can be optimized * as well. */ sp->start_seq = TCP_SKB_CB(skb)->seq; tcp_sack_maybe_coalesce(tp, sp); } else { struct tcp_sack_block *swap = sp + 1; int this_sack, max_sacks = (tp->tstamp_ok ? 3 : 4); /* Oh well, we have to move things around. * Try to find a SACK we can tack this onto. */ for(this_sack = 1; this_sack < cur_sacks; this_sack++, swap++) { if((swap->end_seq == TCP_SKB_CB(skb)->seq) || (swap->start_seq == TCP_SKB_CB(skb)->end_seq)) { if(swap->end_seq == TCP_SKB_CB(skb)->seq) swap->end_seq = TCP_SKB_CB(skb)->end_seq; else swap->start_seq = TCP_SKB_CB(skb)->seq; tcp_sack_swap(sp, swap); tcp_sack_maybe_coalesce(tp, sp); return; } } /* Could not find an adjacent existing SACK, build a new one, * put it at the front, and shift everyone else down. We * always know there is at least one SACK present already here. * * If the sack array is full, forget about the last one. */ if (cur_sacks >= max_sacks) { cur_sacks--; tp->num_sacks--; } while(cur_sacks >= 1) { struct tcp_sack_block *this = &tp->selective_acks[cur_sacks]; struct tcp_sack_block *prev = (this - 1); this->start_seq = prev->start_seq; this->end_seq = prev->end_seq; cur_sacks--; } new_sack: /* Build the new head SACK, and we're done. */ sp->start_seq = TCP_SKB_CB(skb)->seq; sp->end_seq = TCP_SKB_CB(skb)->end_seq; tp->num_sacks++; }}static void tcp_sack_remove_skb(struct tcp_opt *tp, struct sk_buff *skb){ struct tcp_sack_block *sp = &tp->selective_acks[0]; int num_sacks = tp->num_sacks; int this_sack; /* This is an in order data segment _or_ an out-of-order SKB being * moved to the receive queue, so we know this removed SKB will eat * from the front of a SACK. */ for(this_sack = 0; this_sack < num_sacks; this_sack++, sp++) { /* Check if the start of the sack is covered by skb. */ if(!before(sp->start_seq, TCP_SKB_CB(skb)->seq) && before(sp->start_seq, TCP_SKB_CB(skb)->end_seq)) break; } /* This should only happen if so many SACKs get built that some get * pushed out before we get here, or we eat some in sequence packets * which are before the first SACK block. */ if(this_sack >= num_sacks)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -