📄 tcp_ipv4.c
字号:
rep.th.doff = arg.iov[0].iov_len/4; rep.th.seq = htonl(seq); rep.th.ack_seq = htonl(ack); rep.th.ack = 1; rep.th.window = htons(win); arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, skb->nh.iph->saddr, /*XXX*/ arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); TCP_INC_STATS_BH(TcpOutSegs);}static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb){ struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; tcp_v4_send_ack(skb, tw->snd_nxt, tw->rcv_nxt, tw->rcv_wnd>>tw->rcv_wscale, tw->ts_recent); tcp_tw_put(tw);}static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req){ tcp_v4_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);}static struct dst_entry* tcp_v4_route_req(struct sock *sk, struct open_request *req){ struct rtable *rt; struct ip_options *opt; opt = req->af.v4_req.opt; if(ip_route_output(&rt, ((opt && opt->srr) ? opt->faddr : req->af.v4_req.rmt_addr), req->af.v4_req.loc_addr, RT_CONN_FLAGS(sk), sk->bound_dev_if)) { IP_INC_STATS_BH(IpOutNoRoutes); return NULL; } if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { ip_rt_put(rt); IP_INC_STATS_BH(IpOutNoRoutes); return NULL; } return &rt->u.dst;}/* * Send a SYN-ACK after having received an ACK. * This still operates on a open_request only, not on a big * socket. */ static int tcp_v4_send_synack(struct sock *sk, struct open_request *req, struct dst_entry *dst){ int err = -1; struct sk_buff * skb; /* First, grab a route. */ if (dst == NULL && (dst = tcp_v4_route_req(sk, req)) == NULL) goto out; skb = tcp_make_synack(sk, dst, req); if (skb) { struct tcphdr *th = skb->h.th; th->check = tcp_v4_check(th, skb->len, req->af.v4_req.loc_addr, req->af.v4_req.rmt_addr, csum_partial((char *)th, skb->len, skb->csum)); err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr, req->af.v4_req.rmt_addr, req->af.v4_req.opt); if (err == NET_XMIT_CN) err = 0; }out: dst_release(dst); return err;}/* * IPv4 open_request destructor. */ static void tcp_v4_or_free(struct open_request *req){ if (req->af.v4_req.opt) kfree(req->af.v4_req.opt);}static inline void syn_flood_warning(struct sk_buff *skb){ static unsigned long warntime; if (jiffies - warntime > HZ*60) { warntime = jiffies; printk(KERN_INFO "possible SYN flooding on port %d. Sending cookies.\n", ntohs(skb->h.th->dest)); }}/* * Save and compile IPv4 options into the open_request if needed. */static inline struct ip_options * tcp_v4_save_options(struct sock *sk, struct sk_buff *skb){ struct ip_options *opt = &(IPCB(skb)->opt); struct ip_options *dopt = NULL; if (opt && opt->optlen) { int opt_size = optlength(opt); dopt = kmalloc(opt_size, GFP_ATOMIC); if (dopt) { if (ip_options_echo(dopt, skb)) { kfree(dopt); dopt = NULL; } } } return dopt;}/* * Maximum number of SYN_RECV sockets in queue per LISTEN socket. * One SYN_RECV socket costs about 80bytes on a 32bit machine. * It would be better to replace it with a global counter for all sockets * but then some measure against one socket starving all other sockets * would be needed. * * It was 128 by default. Experiments with real servers show, that * it is absolutely not enough even at 100conn/sec. 256 cures most * of problems. This value is adjusted to 128 for very small machines * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb). * Further increasing requires to change hash table size. */int sysctl_max_syn_backlog = 256; struct or_calltable or_ipv4 = { PF_INET, tcp_v4_send_synack, tcp_v4_or_send_ack, tcp_v4_or_free, tcp_v4_send_reset};int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb){ struct tcp_opt tp; struct open_request *req; __u32 saddr = skb->nh.iph->saddr; __u32 daddr = skb->nh.iph->daddr; __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL;#ifdef CONFIG_SYN_COOKIES int want_cookie = 0;#else#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */#endif /* Never answer to SYNs send to broadcast or multicast */ if (((struct rtable *)skb->dst)->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) goto drop; /* TW buckets are converted to open requests without * limitations, they conserve resources and peer is * evidently real one. */ if (tcp_synq_is_full(sk) && !isn) {#ifdef CONFIG_SYN_COOKIES if (sysctl_tcp_syncookies) { want_cookie = 1; } else#endif goto drop; } /* Accept backlog is full. If we have already queued enough * of warm entries in syn queue, drop request. It is better than * clogging syn queue with openreqs with exponentially increasing * timeout. */ if (tcp_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) goto drop; req = tcp_openreq_alloc(); if (req == NULL) goto drop; tcp_clear_options(&tp); tp.mss_clamp = 536; tp.user_mss = sk->tp_pinfo.af_tcp.user_mss; tcp_parse_options(skb, &tp, 0); if (want_cookie) { tcp_clear_options(&tp); tp.saw_tstamp = 0; } if (tp.saw_tstamp && tp.rcv_tsval == 0) { /* Some OSes (unknown ones, but I see them on web server, which * contains information interesting only for windows' * users) do not send their stamp in SYN. It is easy case. * We simply do not advertise TS support. */ tp.saw_tstamp = 0; tp.tstamp_ok = 0; } tp.tstamp_ok = tp.saw_tstamp; tcp_openreq_init(req, &tp, skb); req->af.v4_req.loc_addr = daddr; req->af.v4_req.rmt_addr = saddr; req->af.v4_req.opt = tcp_v4_save_options(sk, skb); req->class = &or_ipv4; if (!want_cookie) TCP_ECN_create_request(req, skb->h.th); if (want_cookie) {#ifdef CONFIG_SYN_COOKIES syn_flood_warning(skb);#endif isn = cookie_v4_init_sequence(sk, skb, &req->mss); } else if (isn == 0) { struct inet_peer *peer = NULL; /* VJ's idea. We save last timestamp seen * from the destination in peer table, when entering * state TIME-WAIT, and check against it before * accepting new connection request. * * If "isn" is not zero, this request hit alive * timewait bucket, so that all the necessary checks * are made in the function processing timewait state. */ if (tp.saw_tstamp && sysctl_tcp_tw_recycle && (dst = tcp_v4_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable*)dst)) != NULL && peer->v4daddr == saddr) { if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL && (s32)(peer->tcp_ts - req->ts_recent) > TCP_PAWS_WINDOW) { NET_INC_STATS_BH(PAWSPassiveRejected); dst_release(dst); goto drop_and_free; } } /* Kill the following clause, if you dislike this way. */ else if (!sysctl_tcp_syncookies && (sysctl_max_syn_backlog - tcp_synq_len(sk) < (sysctl_max_syn_backlog>>2)) && (!peer || !peer->tcp_ts_stamp) && (!dst || !dst->rtt)) { /* Without syncookies last quarter of * backlog is filled with destinations, proven to be alive. * It means that we continue to communicate * to destinations, already remembered * to the moment of synflood. */ NETDEBUG(if (net_ratelimit()) \ printk(KERN_DEBUG "TCP: drop open request from %u.%u.%u.%u/%u\n", \ NIPQUAD(saddr), ntohs(skb->h.th->source))); TCP_INC_STATS_BH(TcpAttemptFails); dst_release(dst); goto drop_and_free; } isn = tcp_v4_init_sequence(sk, skb); } req->snt_isn = isn; if (tcp_v4_send_synack(sk, req, dst)) goto drop_and_free; if (want_cookie) { tcp_openreq_free(req); } else { tcp_v4_synq_add(sk, req); } return 0;drop_and_free: tcp_openreq_free(req); drop: TCP_INC_STATS_BH(TcpAttemptFails); return 0;}/* * The three way handshake has completed - we got a valid synack - * now create the new socket. */struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct open_request *req, struct dst_entry *dst){ struct tcp_opt *newtp; struct sock *newsk; if (tcp_acceptq_is_full(sk)) goto exit_overflow; if (dst == NULL && (dst = tcp_v4_route_req(sk, req)) == NULL) goto exit; newsk = tcp_create_openreq_child(sk, req, skb); if (!newsk) goto exit; newsk->dst_cache = dst; newsk->route_caps = dst->dev->features; newtp = &(newsk->tp_pinfo.af_tcp); newsk->daddr = req->af.v4_req.rmt_addr; newsk->saddr = req->af.v4_req.loc_addr; newsk->rcv_saddr = req->af.v4_req.loc_addr; newsk->protinfo.af_inet.opt = req->af.v4_req.opt; req->af.v4_req.opt = NULL; newsk->protinfo.af_inet.mc_index = tcp_v4_iif(skb); newsk->protinfo.af_inet.mc_ttl = skb->nh.iph->ttl; newtp->ext_header_len = 0; if (newsk->protinfo.af_inet.opt) newtp->ext_header_len = newsk->protinfo.af_inet.opt->optlen; newsk->protinfo.af_inet.id = newtp->write_seq^jiffies; tcp_sync_mss(newsk, dst->pmtu); newtp->advmss = dst->advmss; tcp_initialize_rcv_mss(newsk); __tcp_v4_hash(newsk); __tcp_inherit_port(sk, newsk); return newsk;exit_overflow: NET_INC_STATS_BH(ListenOverflows);exit: NET_INC_STATS_BH(ListenDrops); dst_release(dst); return NULL;}static struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb){ struct open_request *req, **prev; struct tcphdr *th = skb->h.th; struct iphdr *iph = skb->nh.iph; struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sock *nsk; /* Find possible connection requests. */ req = tcp_v4_search_req(tp, &prev, th->source, iph->saddr, iph->daddr); if (req) return tcp_check_req(sk, skb, req, prev); nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr, th->source, skb->nh.iph->daddr, ntohs(th->dest), tcp_v4_iif(skb)); if (nsk) { if (nsk->state != TCP_TIME_WAIT) { bh_lock_sock(nsk); return nsk; } tcp_tw_put((struct tcp_tw_bucket*)nsk); return NULL; }#ifdef CONFIG_SYN_COOKIES if (!th->rst && !th->syn && th->ack) sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));#endif return sk;}static int tcp_v4_checksum_init(struct sk_buff *skb){ if (skb->ip_summed == CHECKSUM_HW) { skb->ip_summed = CHECKSUM_UNNECESSARY; if (!tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr, skb->nh.iph->daddr,skb->csum)) return 0; NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "hw tcp v4 csum failed\n")); skb->ip_summed = CHECKSUM_NONE; } if (skb->len <= 76) { if (tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr, skb->nh.iph->daddr, skb_checksum(skb, 0, skb->len, 0))) return -1; skb->ip_summed = CHECKSUM_UNNECESSARY; } else { skb->csum = ~tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr, skb->nh.iph->daddr,0); } return 0;}/* The socket must have it's spinlock held when we get * here. * * We have a potential double-lock case here, so even when * doing backlog processing we use the BH locking scheme. * This is because we cannot sleep with the original spinlock * held. */int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb){#ifdef CONFIG_FILTER struct sk_filter *filter = sk->filter; if (filter && sk_filter(skb, filter)) goto discard;#endif /* CONFIG_FILTER */ IP_INC_STATS_BH(IpInDelivers); if (sk->state == TCP_ESTABLISHED) { /* Fast path */ TCP_CHECK_TIMER(sk); if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) goto reset; TCP_CHECK_TIMER(sk); return 0; } if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb)) goto csum_err; if (sk->state == TCP_LISTEN) { struct sock *nsk = tcp_v4_hnd_req(sk, skb); if (!nsk) goto discard; if (nsk != sk) { if (tcp_child_process(sk, nsk, skb)) goto reset; return 0; } } TCP_CHECK_TIMER(sk); if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) goto reset; TCP_CHECK_TIMER(sk); return 0;reset: tcp_v4_send_reset(skb);discard: kfree_skb(skb); /* Be careful here. If this function gets more complicated and * gcc suffers from register pressure on the x86, sk (in %ebx) * might be destroyed here. This current version compiles correctly, * but you have been warned. */ return 0;csum_err: TCP_INC_STATS_BH(TcpInErrs); goto discard;}/* * From tcp_input.c */int tcp_v4_rcv(struct sk_buff *skb){ struct tcphdr *th; struct sock *sk; int ret; if (skb->pkt_type!=PACKET_HOST) goto discard_it; /* Count it even if it's bad */ TCP_INC_STATS_BH(TcpInSegs); if (!pskb_may_pull(skb, sizeof(struct tcphdr))) goto discard_it; th = skb->h.th; if (th->doff < sizeof(struct tcphdr)/4) goto bad_packet; if (!pskb_may_pull(skb, th->doff*4)) goto discard_it; /* An explanation is required here, I think. * Packet length and doff are validated by header prediction, * provided case of th->doff==0 is elimineted. * So, we defer the checks. */ if ((skb->ip_summed != CHECKSUM_UNNECESSARY && tcp_v4_checksum_init(skb) < 0)) goto bad_packet; th = skb->h.th; TCP_SKB_CB(skb)->seq = ntohl(th->seq); TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff*4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); TCP_SKB_CB(skb)->when = 0; TCP_SKB_CB(skb)->flags = skb->nh.iph->tos; TCP_SKB_CB(skb)->sacked = 0; sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source, skb->nh.iph->daddr, ntohs(th->dest), tcp_v4_iif(skb)); if (!sk) goto no_tcp_socket;process: if(!ipsec_sk_policy(sk,skb)) goto discard_and_relse; if (sk->state == TCP_TIME_WAIT) goto do_time_wait; skb->dev = NULL; bh_lock_sock(sk); ret = 0; if (!sk->lock.users) { if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); } else sk_add_backlog(sk, skb); bh_unlock_sock(sk); sock_put(sk); return ret;no_tcp_socket:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -