📄 tcp_ipv4.c
字号:
break; default: goto out; } switch (sk->sk_state) { struct open_request *req, **prev; case TCP_LISTEN: if (sock_owned_by_user(sk)) goto out; req = tcp_v4_search_req(tp, &prev, th->dest, iph->daddr, iph->saddr); if (!req) goto out; /* ICMPs are not backlogged, hence we cannot get an established socket here. */ BUG_TRAP(!req->sk); if (seq != req->snt_isn) { NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); goto out; } /* * Still in SYN_RECV, just remove it silently. * There is no good way to pass the error to the newly * created socket, and POSIX does not want network * errors returned from accept(). */ tcp_synq_drop(sk, req, prev); goto out; case TCP_SYN_SENT: case TCP_SYN_RECV: /* Cannot happen. It can f.e. if SYNs crossed. */ if (!sock_owned_by_user(sk)) { TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); tcp_done(sk); } else { sk->sk_err_soft = err; } goto out; } /* If we've already connected we will keep trying * until we time out, or the user gives up. * * rfc1122 4.2.3.9 allows to consider as hard errors * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, * but it is obsoleted by pmtu discovery). * * Note, that in modern internet, where routing is unreliable * and in each dark corner broken firewalls sit, sending random * errors ordered by their masters even this two messages finally lose * their original sense (even Linux sends invalid PORT_UNREACHs) * * Now we are in compliance with RFCs. * --ANK (980905) */ inet = inet_sk(sk); if (!sock_owned_by_user(sk) && inet->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else { /* Only an error on timeout */ sk->sk_err_soft = err; }out: bh_unlock_sock(sk); sock_put(sk);}/* This routine computes an IPv4 TCP checksum. */void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb){ struct inet_opt *inet = inet_sk(sk); if (skb->ip_summed == CHECKSUM_HW) { th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); skb->csum = offsetof(struct tcphdr, check); } else { th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr, csum_partial((char *)th, th->doff << 2, skb->csum)); }}/* * This routine will send an RST to the other tcp. * * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) * for reset. * Answer: if a packet caused RST, it is not for a socket * existing in our system, if it is matched to a socket, * it is just duplicate segment or bug in other side's TCP. * So that we build reply only basing on parameters * arrived with segment. * Exception: precedence violation. We do not implement it in any case. */static void tcp_v4_send_reset(struct sk_buff *skb){ struct tcphdr *th = skb->h.th; struct tcphdr rth; struct ip_reply_arg arg; /* Never send a reset in response to a reset. */ if (th->rst) return; if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL) return; /* Swap the send and the receive. */ memset(&rth, 0, sizeof(struct tcphdr)); rth.dest = th->source; rth.source = th->dest; rth.doff = sizeof(struct tcphdr) / 4; rth.rst = 1; if (th->ack) { rth.seq = th->ack_seq; } else { rth.ack = 1; rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2)); } memset(&arg, 0, sizeof arg); arg.iov[0].iov_base = (unsigned char *)&rth; arg.iov[0].iov_len = sizeof rth; arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, skb->nh.iph->saddr, /*XXX*/ sizeof(struct tcphdr), IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth); TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);}/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states outside socket context is ugly, certainly. What can I do? */static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts){ struct tcphdr *th = skb->h.th; struct { struct tcphdr th; u32 tsopt[3]; } rep; struct ip_reply_arg arg; memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof arg); arg.iov[0].iov_base = (unsigned char *)&rep; arg.iov[0].iov_len = sizeof(rep.th); if (ts) { rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); rep.tsopt[1] = htonl(tcp_time_stamp); rep.tsopt[2] = htonl(ts); arg.iov[0].iov_len = sizeof(rep); } /* Swap the send and the receive. */ rep.th.dest = th->source; rep.th.source = th->dest; rep.th.doff = arg.iov[0].iov_len / 4; rep.th.seq = htonl(seq); rep.th.ack_seq = htonl(ack); rep.th.ack = 1; rep.th.window = htons(win); arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, skb->nh.iph->saddr, /*XXX*/ arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);}static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb){ struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt, tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent); tcp_tw_put(tw);}static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req){ tcp_v4_send_ack(skb, req->snt_isn + 1, req->rcv_isn + 1, req->rcv_wnd, req->ts_recent);}static struct dst_entry* tcp_v4_route_req(struct sock *sk, struct open_request *req){ struct rtable *rt; struct ip_options *opt = req->af.v4_req.opt; struct flowi fl = { .oif = sk->sk_bound_dev_if, .nl_u = { .ip4_u = { .daddr = ((opt && opt->srr) ? opt->faddr : req->af.v4_req.rmt_addr), .saddr = req->af.v4_req.loc_addr, .tos = RT_CONN_FLAGS(sk) } }, .proto = IPPROTO_TCP, .uli_u = { .ports = { .sport = inet_sk(sk)->sport, .dport = req->rmt_port } } }; if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; } if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { ip_rt_put(rt); IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; } return &rt->u.dst;}/* * Send a SYN-ACK after having received an ACK. * This still operates on a open_request only, not on a big * socket. */static int tcp_v4_send_synack(struct sock *sk, struct open_request *req, struct dst_entry *dst){ int err = -1; struct sk_buff * skb; /* First, grab a route. */ if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) goto out; skb = tcp_make_synack(sk, dst, req); if (skb) { struct tcphdr *th = skb->h.th; th->check = tcp_v4_check(th, skb->len, req->af.v4_req.loc_addr, req->af.v4_req.rmt_addr, csum_partial((char *)th, skb->len, skb->csum)); err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr, req->af.v4_req.rmt_addr, req->af.v4_req.opt); if (err == NET_XMIT_CN) err = 0; }out: dst_release(dst); return err;}/* * IPv4 open_request destructor. */static void tcp_v4_or_free(struct open_request *req){ if (req->af.v4_req.opt) kfree(req->af.v4_req.opt);}static inline void syn_flood_warning(struct sk_buff *skb){ static unsigned long warntime; if (time_after(jiffies, (warntime + HZ * 60))) { warntime = jiffies; printk(KERN_INFO "possible SYN flooding on port %d. Sending cookies.\n", ntohs(skb->h.th->dest)); }}/* * Save and compile IPv4 options into the open_request if needed. */static inline struct ip_options *tcp_v4_save_options(struct sock *sk, struct sk_buff *skb){ struct ip_options *opt = &(IPCB(skb)->opt); struct ip_options *dopt = NULL; if (opt && opt->optlen) { int opt_size = optlength(opt); dopt = kmalloc(opt_size, GFP_ATOMIC); if (dopt) { if (ip_options_echo(dopt, skb)) { kfree(dopt); dopt = NULL; } } } return dopt;}/* * Maximum number of SYN_RECV sockets in queue per LISTEN socket. * One SYN_RECV socket costs about 80bytes on a 32bit machine. * It would be better to replace it with a global counter for all sockets * but then some measure against one socket starving all other sockets * would be needed. * * It was 128 by default. Experiments with real servers show, that * it is absolutely not enough even at 100conn/sec. 256 cures most * of problems. This value is adjusted to 128 for very small machines * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb). * Further increasing requires to change hash table size. */int sysctl_max_syn_backlog = 256;struct or_calltable or_ipv4 = { .family = PF_INET, .rtx_syn_ack = tcp_v4_send_synack, .send_ack = tcp_v4_or_send_ack, .destructor = tcp_v4_or_free, .send_reset = tcp_v4_send_reset,};int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb){ struct tcp_opt tp; struct open_request *req; __u32 saddr = skb->nh.iph->saddr; __u32 daddr = skb->nh.iph->daddr; __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL;#ifdef CONFIG_SYN_COOKIES int want_cookie = 0;#else#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */#endif /* Never answer to SYNs send to broadcast or multicast */ if (((struct rtable *)skb->dst)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) goto drop; /* TW buckets are converted to open requests without * limitations, they conserve resources and peer is * evidently real one. */ if (tcp_synq_is_full(sk) && !isn) {#ifdef CONFIG_SYN_COOKIES if (sysctl_tcp_syncookies) { want_cookie = 1; } else#endif goto drop; } /* Accept backlog is full. If we have already queued enough * of warm entries in syn queue, drop request. It is better than * clogging syn queue with openreqs with exponentially increasing * timeout. */ if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) goto drop; req = tcp_openreq_alloc(); if (!req) goto drop; tcp_clear_options(&tp); tp.mss_clamp = 536; tp.user_mss = tcp_sk(sk)->user_mss; tcp_parse_options(skb, &tp, 0); if (want_cookie) { tcp_clear_options(&tp); tp.saw_tstamp = 0; } if (tp.saw_tstamp && !tp.rcv_tsval) { /* Some OSes (unknown ones, but I see them on web server, which * contains information interesting only for windows' * users) do not send their stamp in SYN. It is easy case. * We simply do not advertise TS support. */ tp.saw_tstamp = 0; tp.tstamp_ok = 0; } tp.tstamp_ok = tp.saw_tstamp; tcp_openreq_init(req, &tp, skb); req->af.v4_req.loc_addr = daddr; req->af.v4_req.rmt_addr = saddr; req->af.v4_req.opt = tcp_v4_save_options(sk, skb); req->class = &or_ipv4; if (!want_cookie) TCP_ECN_create_request(req, skb->h.th); if (want_cookie) {#ifdef CONFIG_SYN_COOKIES syn_flood_warning(skb);#endif isn = cookie_v4_init_sequence(sk, skb, &req->mss); } else if (!isn) { struct inet_peer *peer = NULL; /* VJ's idea. We save last timestamp seen * from the destination in peer table, when entering * state TIME-WAIT, and check against it before * accepting new connection request. * * If "isn" is not zero, this request hit alive * timewait bucket, so that all the necessary checks * are made in the function processing timewait state. */ if (tp.saw_tstamp && sysctl_tcp_tw_recycle && (dst = tcp_v4_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL && (s32)(peer->tcp_ts - req->ts_recent) > TCP_PAWS_WINDOW) { NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); dst_release(dst); goto drop_and_free; } } /* Kill the following clause, if you dislike this way. */ else if (!sysctl_tcp_syncookies && (sysctl_max_syn_backlog - tcp_synq_len(sk) < (sysctl_max_syn_backlog >> 2)) && (!peer || !peer->tcp_ts_stamp) && (!dst || !dst_metric(dst, RTAX_RTT))) { /* Without syncookies last quarter of * backlog is filled with destinations, * proven to be alive. * It means that we continue to communicate * to destinations, already remembered * to the moment of synflood. */ NETDEBUG(if (net_ratelimit()) \ printk(KERN_DEBUG "TCP: drop open " "request from %u.%u." "%u.%u/%u\n", \ NIPQUAD(saddr), ntohs(skb->h.th->source))); dst_release(dst); goto drop_and_free; } isn = tcp_v4_init_sequence(sk, skb); } req->snt_isn = isn; if (tcp_v4_send_synack(sk, req, dst)) goto drop_and_free; if (want_cookie) { tcp_openreq_free(req); } else { tcp_v4_synq_add(sk, req); } return 0;drop_and_free: tcp_openreq_free(req);drop: TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); return 0;}/* * The three way handshake has completed - we got a valid synack - * now create the new socket. */struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct open_request *req, struct dst_entry *dst){ struct inet_opt *newinet; struct tcp_opt *newtp; struct sock *newsk; if (sk_acceptq_is_full(sk)) goto exit_overflow; if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) goto exit; newsk = tcp_create_openreq_child(sk, req, skb); if (!newsk) goto exit; newsk->sk_dst_cache = dst; tcp_v4_setup_caps(newsk, dst); newtp = tcp_sk(newsk); newinet = inet_sk(newsk); newinet->daddr = req->af.v4_req.rmt_addr;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -