📄 tcp_ipv6.c
字号:
/* * TCP over IPv6 * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $ * * Based on: * linux/net/ipv4/tcp.c * linux/net/ipv4/tcp_input.c * linux/net/ipv4/tcp_output.c * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind * a single port at the same time. * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */#include <linux/module.h>#include <linux/config.h>#include <linux/errno.h>#include <linux/types.h>#include <linux/socket.h>#include <linux/sockios.h>#include <linux/net.h>#include <linux/jiffies.h>#include <linux/in.h>#include <linux/in6.h>#include <linux/netdevice.h>#include <linux/init.h>#include <linux/jhash.h>#include <linux/ipsec.h>#include <linux/times.h>#include <linux/ipv6.h>#include <linux/icmpv6.h>#include <linux/random.h>#include <net/tcp.h>#include <net/ndisc.h>#include <net/ipv6.h>#include <net/transp_v6.h>#include <net/addrconf.h>#include <net/ip6_route.h>#include <net/ip6_checksum.h>#include <net/inet_ecn.h>#include <net/protocol.h>#include <net/xfrm.h>#include <net/addrconf.h>#include <net/snmp.h>#include <net/dsfield.h>#include <asm/uaccess.h>#include <linux/proc_fs.h>#include <linux/seq_file.h>static void tcp_v6_send_reset(struct sk_buff *skb);static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb);static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);static struct tcp_func ipv6_mapped;static struct tcp_func ipv6_specific;/* I have no idea if this is a good hash for v6 or not. -DaveM */static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport, struct in6_addr *faddr, u16 fport){ int hashent = (lport ^ fport); hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); hashent ^= hashent>>16; hashent ^= hashent>>8; return (hashent & (tcp_ehash_size - 1));}static __inline__ int tcp_v6_sk_hashfn(struct sock *sk){ struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *laddr = &np->rcv_saddr; struct in6_addr *faddr = &np->daddr; __u16 lport = inet->num; __u16 fport = inet->dport; return tcp_v6_hashfn(laddr, lport, faddr, fport);}static inline int tcp_v6_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb){ struct sock *sk2; struct hlist_node *node; /* We must walk the whole port owner list in this case. -DaveM */ sk_for_each_bound(sk2, node, &tb->owners) { if (sk != sk2 && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && (!sk->sk_reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) && ipv6_rcv_saddr_equal(sk, sk2)) break; } return node != NULL;}/* Grrr, addr_type already calculated by caller, but I don't want * to add some silly "cookie" argument to this method just for that. * But it doesn't matter, the recalculation is in the rarest path * this function ever takes. */static int tcp_v6_get_port(struct sock *sk, unsigned short snum){ struct tcp_bind_hashbucket *head; struct tcp_bind_bucket *tb; struct hlist_node *node; int ret; local_bh_disable(); if (snum == 0) { int low = sysctl_local_port_range[0]; int high = sysctl_local_port_range[1]; int remaining = (high - low) + 1; int rover; spin_lock(&tcp_portalloc_lock); rover = tcp_port_rover; do { rover++; if ((rover < low) || (rover > high)) rover = low; head = &tcp_bhash[tcp_bhashfn(rover)]; spin_lock(&head->lock); tb_for_each(tb, node, &head->chain) if (tb->port == rover) goto next; break; next: spin_unlock(&head->lock); } while (--remaining > 0); tcp_port_rover = rover; spin_unlock(&tcp_portalloc_lock); /* Exhausted local port range during search? */ ret = 1; if (remaining <= 0) goto fail; /* OK, here is the one we will use. */ snum = rover; } else { head = &tcp_bhash[tcp_bhashfn(snum)]; spin_lock(&head->lock); tb_for_each(tb, node, &head->chain) if (tb->port == snum) goto tb_found; } tb = NULL; goto tb_not_found;tb_found: if (tb && !hlist_empty(&tb->owners)) { if (tb->fastreuse > 0 && sk->sk_reuse && sk->sk_state != TCP_LISTEN) { goto success; } else { ret = 1; if (tcp_v6_bind_conflict(sk, tb)) goto fail_unlock; } }tb_not_found: ret = 1; if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL) goto fail_unlock; if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) tb->fastreuse = 1; else tb->fastreuse = 0; } else if (tb->fastreuse && (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) tb->fastreuse = 0;success: if (!tcp_sk(sk)->bind_hash) tcp_bind_hash(sk, tb, snum); BUG_TRAP(tcp_sk(sk)->bind_hash == tb); ret = 0;fail_unlock: spin_unlock(&head->lock);fail: local_bh_enable(); return ret;}static __inline__ void __tcp_v6_hash(struct sock *sk){ struct hlist_head *list; rwlock_t *lock; BUG_TRAP(sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; lock = &tcp_lhash_lock; tcp_listen_wlock(); } else { sk->sk_hashent = tcp_v6_sk_hashfn(sk); list = &tcp_ehash[sk->sk_hashent].chain; lock = &tcp_ehash[sk->sk_hashent].lock; write_lock(lock); } __sk_add_node(sk, list); sock_prot_inc_use(sk->sk_prot); write_unlock(lock);}static void tcp_v6_hash(struct sock *sk){ if (sk->sk_state != TCP_CLOSE) { struct tcp_opt *tp = tcp_sk(sk); if (tp->af_specific == &ipv6_mapped) { tcp_prot.hash(sk); return; } local_bh_disable(); __tcp_v6_hash(sk); local_bh_enable(); }}static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif){ struct sock *sk; struct hlist_node *node; struct sock *result = NULL; int score, hiscore; hiscore=0; read_lock(&tcp_lhash_lock); sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) { if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); score = 1; if (!ipv6_addr_any(&np->rcv_saddr)) { if (ipv6_addr_cmp(&np->rcv_saddr, daddr)) continue; score++; } if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) continue; score++; } if (score == 3) { result = sk; break; } if (score > hiscore) { hiscore = score; result = sk; } } } if (result) sock_hold(result); read_unlock(&tcp_lhash_lock); return result;}/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM * * The sockhash lock must be held as a reader here. */static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport, struct in6_addr *daddr, u16 hnum, int dif){ struct tcp_ehash_bucket *head; struct sock *sk; struct hlist_node *node; __u32 ports = TCP_COMBINED_PORTS(sport, hnum); int hash; /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); head = &tcp_ehash[hash]; read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) { /* FIXME: acme: check this... */ struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; if(*((__u32 *)&(tw->tw_dport)) == ports && sk->sk_family == PF_INET6) { if(!ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) && !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) goto hit; } } read_unlock(&head->lock); return NULL;hit: sock_hold(sk); read_unlock(&head->lock); return sk;}static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport, struct in6_addr *daddr, u16 hnum, int dif){ struct sock *sk; sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif); if (sk) return sk; return tcp_v6_lookup_listener(daddr, hnum, dif);}inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport, struct in6_addr *daddr, u16 dport, int dif){ struct sock *sk; local_bh_disable(); sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif); local_bh_enable(); return sk;}/* * Open request hash tables. */static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd){ u32 a, b, c; a = raddr->s6_addr32[0]; b = raddr->s6_addr32[1]; c = raddr->s6_addr32[2]; a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; c += rnd; __jhash_mix(a, b, c); a += raddr->s6_addr32[3]; b += (u32) rport; __jhash_mix(a, b, c); return c & (TCP_SYNQ_HSIZE - 1);}static struct open_request *tcp_v6_search_req(struct tcp_opt *tp, struct open_request ***prevp, __u16 rport, struct in6_addr *raddr, struct in6_addr *laddr, int iif){ struct tcp_listen_opt *lopt = tp->listen_opt; struct open_request *req, **prev; for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; (req = *prev) != NULL; prev = &req->dl_next) { if (req->rmt_port == rport && req->class->family == AF_INET6 && !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, raddr) && !ipv6_addr_cmp(&req->af.v6_req.loc_addr, laddr) && (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) { BUG_TRAP(req->sk == NULL); *prevp = prev; return req; } } return NULL;}static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len, struct in6_addr *saddr, struct in6_addr *daddr, unsigned long base){ return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);}static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb){ if (skb->protocol == htons(ETH_P_IPV6)) { return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32, skb->nh.ipv6h->saddr.s6_addr32, skb->h.th->dest, skb->h.th->source); } else { return secure_tcp_sequence_number(skb->nh.iph->daddr, skb->nh.iph->saddr, skb->h.th->dest, skb->h.th->source); }}static int tcp_v6_check_established(struct sock *sk){ struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *daddr = &np->rcv_saddr; struct in6_addr *saddr = &np->daddr; int dif = sk->sk_bound_dev_if; u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num); int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); struct tcp_ehash_bucket *head = &tcp_ehash[hash]; struct sock *sk2; struct hlist_node *node; struct tcp_tw_bucket *tw; write_lock_bh(&head->lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) { tw = (struct tcp_tw_bucket*)sk2; if(*((__u32 *)&(tw->tw_dport)) == ports && sk2->sk_family == PF_INET6 && !ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) && !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { struct tcp_opt *tp = tcp_sk(sk); if (tw->tw_ts_recent_stamp) { /* See comment in tcp_ipv4.c */ tp->write_seq = tw->tw_snd_nxt + 65535 + 2; if (!tp->write_seq) tp->write_seq = 1; tp->ts_recent = tw->tw_ts_recent; tp->ts_recent_stamp = tw->tw_ts_recent_stamp; sock_hold(sk2); goto unique; } else goto not_unique; } } tw = NULL; /* And established part... */ sk_for_each(sk2, node, &head->chain) { if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif)) goto not_unique; }unique: BUG_TRAP(sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sk->sk_hashent = hash; sock_prot_inc_use(sk->sk_prot); write_unlock_bh(&head->lock); if (tw) { /* Silly. Should hash-dance instead... */ local_bh_disable(); tcp_tw_deschedule(tw); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); local_bh_enable(); tcp_tw_put(tw); } return 0;not_unique: write_unlock_bh(&head->lock); return -EADDRNOTAVAIL;}static int tcp_v6_hash_connect(struct sock *sk){ struct tcp_bind_hashbucket *head; struct tcp_bind_bucket *tb; /* XXX */ if (inet_sk(sk)->num == 0) { int err = tcp_v6_get_port(sk, inet_sk(sk)->num); if (err) return err; inet_sk(sk)->sport = htons(inet_sk(sk)->num); } head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)]; tb = tb_head(head); spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { __tcp_v6_hash(sk); spin_unlock_bh(&head->lock); return 0; } else { spin_unlock_bh(&head->lock); return tcp_v6_check_established(sk); }}static __inline__ int tcp_v6_iif(struct sk_buff *skb){ return IP6CB(skb)->iif;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -