📄 tcp_input.c
字号:
* disabled when: * - A zero window was announced from us - zero window probing * is only handled properly in the slow path. * - Out of order segments arrived. * - Urgent data is expected. * - There is no buffer space left * - Unexpected TCP flags/window values/header lengths are received * (detected by checking the TCP header against pred_flags) * - Data is sent in both directions. Fast path only supports pure senders * or pure receivers (this means either the sequence number or the ack * value must stay constant) * * When these conditions are not satisfied it drops into a standard * receive procedure patterned after RFC793 to handle all cases. * The first three cases are guaranteed by proper pred_flags setting, * the rest is checked inline. Fast processing is turned on in * tcp_data_queue when everything is OK. */int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); int queued; u32 flg; /* * Header prediction. * The code follows the one in the famous * "30 instruction TCP receive" Van Jacobson mail. * * Van's trick is to deposit buffers into socket queue * on a device interrupt, to call tcp_recv function * on the receive process context and checksum and copy * the buffer to user space. smart... * * Our current scheme is not silly either but we take the * extra cost of the net_bh soft interrupt processing... * We do checksum and copy also but from device to kernel. */ /* * RFC1323: H1. Apply PAWS check first. */ if (tcp_fast_parse_options(sk, th, tp)) { if (tp->saw_tstamp) { if (tcp_paws_discard(tp, th, len)) { tcp_statistics.TcpInErrs++; if (!th->rst) { tcp_send_ack(sk); goto discard; } } tcp_replace_ts_recent(sk, tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); } } flg = *(((u32 *)th) + 3) & ~htonl(0xFC8 << 16); /* pred_flags is 0xS?10 << 16 + snd_wnd * if header_predition is to be made * 'S' will always be tp->tcp_header_len >> 2 * '?' will be 0 else it will be !0 * (when there are holes in the receive * space for instance) * PSH flag is ignored. */ if (flg == tp->pred_flags && TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { if (len <= th->doff*4) { /* Bulk data transfer: sender */ if (len == th->doff*4) { tcp_ack(sk, th, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->ack_seq, len); kfree_skb(skb); tcp_data_snd_check(sk); return 0; } else { /* Header too small */ tcp_statistics.TcpInErrs++; goto discard; } } else if (TCP_SKB_CB(skb)->ack_seq == tp->snd_una && atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) { /* Bulk data transfer: receiver */ __skb_pull(skb,th->doff*4); tcp_measure_rcv_mss(sk, skb); /* DO NOT notify forward progress here. * It saves dozen of CPU instructions in fast path. --ANK */ __skb_queue_tail(&sk->receive_queue, skb); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; /* FIN bit check is not done since if FIN is set in * this frame, the pred_flags won't match up. -DaveM */ sk->data_ready(sk, 0); tcp_delack_estimator(tp); tcp_remember_ack(tp, th, skb); __tcp_ack_snd_check(sk); return 0; } } /* * Standard slow path. */ if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { /* RFC793, page 37: "In all states except SYN-SENT, all reset * (RST) segments are validated by checking their SEQ-fields." * And page 69: "If an incoming segment is not acceptable, * an acknowledgment should be sent in reply (unless the RST bit * is set, if so drop the segment and return)". */ if (th->rst) goto discard; if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { SOCK_DEBUG(sk, "seq:%d end:%d wup:%d wnd:%d\n", TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_wup, tp->rcv_wnd); } tcp_send_ack(sk); goto discard; } if(th->syn && TCP_SKB_CB(skb)->seq != tp->syn_seq) { SOCK_DEBUG(sk, "syn in established state\n"); tcp_statistics.TcpInErrs++; tcp_reset(sk); return 1; } if(th->rst) { tcp_reset(sk); goto discard; } if(th->ack) tcp_ack(sk, th, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->ack_seq, len); /* Process urgent data. */ tcp_urg(sk, th, len); /* step 7: process the segment text */ queued = tcp_data(skb, sk, len); /* This must be after tcp_data() does the skb_pull() to * remove the header size from skb->len. * * Dave!!! Phrase above (and all about rcv_mss) has * nothing to do with reality. rcv_mss must measure TOTAL * size, including sacks, IP options etc. Hence, measure_rcv_mss * must occure before pulling etc, otherwise it will flap * like hell. Even putting it before tcp_data is wrong, * it should use skb->tail - skb->nh.raw instead. * --ANK (980805) * * BTW I broke it. Now all TCP options are handled equally * in mss_clamp calculations (i.e. ignored, rfc1122), * and mss_cache does include all of them (i.e. tstamps) * except for sacks, to calulate effective mss faster. * --ANK (980805) */ tcp_measure_rcv_mss(sk, skb); /* Be careful, tcp_data() may have put this into TIME_WAIT. */ if(sk->state != TCP_CLOSE) { tcp_data_snd_check(sk); tcp_ack_snd_check(sk); } if (!queued) { discard: kfree_skb(skb); } return 0;}/* * Process an incoming SYN or SYN-ACK for SYN_RECV sockets represented * as an open_request. */struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct open_request *req){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); u32 flg; /* assumption: the socket is not in use. * as we checked the user count on tcp_rcv and we're * running from a soft interrupt. */ /* Check for syn retransmission */ flg = *(((u32 *)skb->h.th) + 3); flg &= __constant_htonl(0x00170000); /* Only SYN set? */ if (flg == __constant_htonl(0x00020000)) { if (TCP_SKB_CB(skb)->seq == req->rcv_isn) { /* retransmited syn. */ req->class->rtx_syn_ack(sk, req); return NULL; } else { return sk; /* Pass new SYN to the listen socket. */ } } /* We know it's an ACK here */ if (req->sk) { /* socket already created but not * yet accepted()... */ sk = req->sk; } else { /* In theory the packet could be for a cookie, but * TIME_WAIT should guard us against this. * XXX: Nevertheless check for cookies? * This sequence number check is done again later, * but we do it here to prevent syn flood attackers * from creating big SYN_RECV sockets. */ if (!between(TCP_SKB_CB(skb)->ack_seq, req->snt_isn, req->snt_isn+1) || !between(TCP_SKB_CB(skb)->seq, req->rcv_isn, req->rcv_isn+1+req->rcv_wnd)) { req->class->send_reset(skb); return NULL; } sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL); tcp_dec_slow_timer(TCP_SLT_SYNACK); if (sk == NULL) return NULL; req->expires = 0UL; req->sk = sk; } skb_orphan(skb); skb_set_owner_r(skb, sk); return sk; }/* * This function implements the receiving procedure of RFC 793 for * all states except ESTABLISHED and TIME_WAIT. * It's called from both tcp_v4_rcv and tcp_v6_rcv and should be * address independent. */ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len){ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); int queued = 0; switch (sk->state) { case TCP_CLOSE: /* When state == CLOSED, hash lookup always fails. * * But, there is a back door, the backlog queue. * If we have a sequence of packets in the backlog * during __release_sock() which have a sequence such * that: * packet X causes entry to TCP_CLOSE state * ... * packet X + N has FIN bit set * * We report a (luckily) harmless error in this case. * The issue is that backlog queue processing bypasses * any hash lookups (we know which socket packets are for). * The correct behavior here is what 2.0.x did, since * a TCP_CLOSE socket does not exist. Drop the frame * and send a RST back to the other end. */ return 1; case TCP_LISTEN: /* These use the socket TOS.. * might want to be the received TOS */ if(th->ack) return 1; if(th->syn) { if(tp->af_specific->conn_request(sk, skb, 0) < 0) return 1; /* Now we have several options: In theory there is * nothing else in the frame. KA9Q has an option to * send data with the syn, BSD accepts data with the * syn up to the [to be] advertised window and * Solaris 2.1 gives you a protocol error. For now * we just ignore it, that fits the spec precisely * and avoids incompatibilities. It would be nice in * future to drop through and process the data. * * Now that TTCP is starting to be used we ought to * queue this data. * But, this leaves one open to an easy denial of * service attack, and SYN cookies can't defend * against this problem. So, we drop the data * in the interest of security over speed. */ goto discard; } goto discard; break; case TCP_SYN_SENT: /* SYN sent means we have to look for a suitable ack and * either reset for bad matches or go to connected. * The SYN_SENT case is unusual and should * not be in line code. [AC] */ if(th->ack) { /* rfc793: * "If the state is SYN-SENT then * first check the ACK bit * If the ACK bit is set * If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send * a reset (unless the RST bit is set, if so drop * the segment and return)" * * I cite this place to emphasize one essential * detail, this check is different of one * in established state: SND.UNA <= SEG.ACK <= SND.NXT. * SEG_ACK == SND.UNA == ISS is invalid in SYN-SENT, * because we have no previous data sent before SYN. * --ANK(990513) * * We do not send data with SYN, so that RFC-correct * test reduces to: */ if (sk->zapped || TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt) return 1; /* Now ACK is acceptable. * * "If the RST bit is set * If the ACK was acceptable then signal the user "error: * connection reset", drop the segment, enter CLOSED state, * delete TCB, and return." */ if (th->rst) { tcp_reset(sk); goto discard; } /* rfc793: * "fifth, if neither of the SYN or RST bits is set then * drop the segment and return." * * See note below! * --ANK(990513) */ if (!th->syn) goto discard; /* rfc793: * "If the SYN bit is on ... * are acceptable then ... * (our SYN has been ACKed), change the connection * state to ESTABLISHED..." * * Do you see? SYN-less ACKs in SYN-SENT state are * completely ignored. * * The bug causing stalled SYN-SENT sockets * was here: tcp_ack advanced snd_una and canceled * retransmit timer, so that bare ACK received * in SYN-SENT state (even with invalid ack==ISS, * because tcp_ack check is too weak for SYN-SENT) * causes moving socket to invalid semi-SYN-SENT, * semi-ESTABLISHED state and connection hangs. * * There exist buggy stacks, which really send * such ACKs: f.e. 202.226.91.94 (okigate.oki.co.jp) * Actually, if this host did not try to get something * from ftp.inr.ac.ru I'd never find this bug 8) * * --ANK (990514) */ tp->snd_wl1 = TCP_SKB_CB(skb)->seq; tcp_ack(sk,th, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->ack_seq, len); /* Ok.. it's good. Set up sequence numbers and * move to established. */ tp->rcv_nxt = TCP_SKB_CB(skb)->seq+1; tp->rcv_wup = TCP_SKB_CB(skb)->seq+1; /* RFC1323: The window in SYN & SYN/ACK segments is * never scaled. */ tp->snd_wnd = htons(th->window); tp->snd_wl1 = TCP_SKB_CB(skb)->seq; tp->snd_wl2 = TCP_SKB_CB(skb)->ack_seq; tp->fin_seq = TCP_SKB_CB(skb)->seq; tcp_set_state(sk, TCP_ESTABLISHED); tcp_parse_options(sk, th, tp, 0); if (tp->wscale_ok == 0) { tp->snd_wscale = tp->rcv_wscale = 0; tp->window_clamp = min(tp->window_clamp,65535); } if (tp->tstamp_ok) { tp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; } else tp->tcp_header_len = sizeof(struct tcphdr); if (tp->saw_tstamp) { tp->ts_recent = tp->rcv_tsval; tp->ts_recent_stamp = tcp_time_stamp; } /* Can't be earlier, doff would be wrong. */ tcp_send_ack(sk); sk->dport = th->source; tp->copied_seq = tp->rcv_nxt; if(!sk->dead) { sk->state_change(sk); sock_wake_async(sk->socket, 0); } } else { if(th->syn
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -