📄 ip_conntrack_core.c
字号:
/* Connection state tracking for netfilter. This is separated from, but required by, the NAT layer; it can also be used by an iptables extension. *//* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General * Public Licence. * * 23 Apr 2001: Harald Welte <laforge@gnumonks.org> * - new API and handling of conntrack/nat helpers * - now capable of multiple expectations for one master * 16 Jul 2002: Harald Welte <laforge@gnumonks.org> * - add usage/reference counts to ip_conntrack_expect * - export ip_conntrack[_expect]_{find_get,put} functions * */#ifdef MODULE#define __NO_VERSION__#endif#include <linux/version.h>#include <linux/config.h>#include <linux/types.h>#include <linux/ip.h>#include <linux/netfilter.h>#include <linux/netfilter_ipv4.h>#include <linux/module.h>#include <linux/skbuff.h>#include <linux/proc_fs.h>#include <linux/vmalloc.h>#include <linux/brlock.h>#include <net/checksum.h>#include <linux/stddef.h>#include <linux/sysctl.h>#include <linux/slab.h>/* For ERR_PTR(). Yeah, I know... --RR */#include <linux/fs.h>/* This rwlock protects the main hash table, protocol/helper/expected registrations, conntrack timers*/#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)#include <linux/netfilter_ipv4/ip_conntrack.h>#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>#include <linux/netfilter_ipv4/ip_conntrack_helper.h>#include <linux/netfilter_ipv4/ip_conntrack_core.h>#include <linux/netfilter_ipv4/listhelp.h>#define IP_CONNTRACK_VERSION "2.1"#if 0#define DEBUGP printk#else#define DEBUGP(format, args...)#endifDECLARE_RWLOCK(ip_conntrack_lock);DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock);void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;LIST_HEAD(ip_conntrack_expect_list);LIST_HEAD(protocol_list);static LIST_HEAD(helpers);unsigned int ip_conntrack_htable_size = 0;static int ip_conntrack_max = 0;static atomic_t ip_conntrack_count = ATOMIC_INIT(0);struct list_head *ip_conntrack_hash;static kmem_cache_t *ip_conntrack_cachep;extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr, u_int8_t protocol){ return protocol == curr->proto;}struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol){ struct ip_conntrack_protocol *p; MUST_BE_READ_LOCKED(&ip_conntrack_lock); p = LIST_FIND(&protocol_list, proto_cmpfn, struct ip_conntrack_protocol *, protocol); if (!p) p = &ip_conntrack_generic_protocol; return p;}struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol){ struct ip_conntrack_protocol *p; READ_LOCK(&ip_conntrack_lock); p = __ip_ct_find_proto(protocol); READ_UNLOCK(&ip_conntrack_lock); return p;}inline void ip_conntrack_put(struct ip_conntrack *ct){ IP_NF_ASSERT(ct); IP_NF_ASSERT(ct->infos[0].master); /* nf_conntrack_put wants to go via an info struct, so feed it one at random. */ nf_conntrack_put(&ct->infos[0]);}static inline u_int32_thash_conntrack(const struct ip_conntrack_tuple *tuple){#if 0 dump_tuple(tuple);#endif /* ntohl because more differences in low bits. */ /* To ensure that halves of the same connection don't hash clash, we add the source per-proto again. */ return (ntohl(tuple->src.ip + tuple->dst.ip + tuple->src.u.all + tuple->dst.u.all + tuple->dst.protonum) + ntohs(tuple->src.u.all)) % ip_conntrack_htable_size;}inline intget_tuple(const struct iphdr *iph, size_t len, struct ip_conntrack_tuple *tuple, struct ip_conntrack_protocol *protocol){ int ret; /* Never happen */ if (iph->frag_off & htons(IP_OFFSET)) { printk("ip_conntrack_core: Frag of proto %u.\n", iph->protocol); return 0; } /* Guarantee 8 protocol bytes: if more wanted, use len param */ else if (iph->ihl * 4 + 8 > len) return 0; tuple->src.ip = iph->saddr; tuple->dst.ip = iph->daddr; tuple->dst.protonum = iph->protocol; ret = protocol->pkt_to_tuple((u_int32_t *)iph + iph->ihl, len - 4*iph->ihl, tuple); return ret;}static intinvert_tuple(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_tuple *orig, const struct ip_conntrack_protocol *protocol){ inverse->src.ip = orig->dst.ip; inverse->dst.ip = orig->src.ip; inverse->dst.protonum = orig->dst.protonum; return protocol->invert_tuple(inverse, orig);}/* ip_conntrack_expect helper functions *//* Compare tuple parts depending on mask. */static inline int expect_cmp(const struct ip_conntrack_expect *i, const struct ip_conntrack_tuple *tuple){ MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock); return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask);}static voiddestroy_expect(struct ip_conntrack_expect *exp){ DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(exp->use)); IP_NF_ASSERT(atomic_read(exp->use)); IP_NF_ASSERT(!timer_pending(&exp->timeout)); kfree(exp);}inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp){ IP_NF_ASSERT(exp); if (atomic_dec_and_test(&exp->use)) { /* usage count dropped to zero */ destroy_expect(exp); }}static inline struct ip_conntrack_expect *__ip_ct_expect_find(const struct ip_conntrack_tuple *tuple){ MUST_BE_READ_LOCKED(&ip_conntrack_lock); MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock); return LIST_FIND(&ip_conntrack_expect_list, expect_cmp, struct ip_conntrack_expect *, tuple);}/* Find a expectation corresponding to a tuple. */struct ip_conntrack_expect *ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple){ struct ip_conntrack_expect *exp; READ_LOCK(&ip_conntrack_lock); READ_LOCK(&ip_conntrack_expect_tuple_lock); exp = __ip_ct_expect_find(tuple); if (exp) atomic_inc(&exp->use); READ_UNLOCK(&ip_conntrack_expect_tuple_lock); READ_UNLOCK(&ip_conntrack_lock); return exp;}/* remove one specific expectation from all lists and drop refcount, * does _NOT_ delete the timer. */static void __unexpect_related(struct ip_conntrack_expect *expect){ DEBUGP("unexpect_related(%p)\n", expect); MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); /* we're not allowed to unexpect a confirmed expectation! */ IP_NF_ASSERT(!expect->sibling); /* delete from global and local lists */ list_del(&expect->list); list_del(&expect->expected_list); /* decrement expect-count of master conntrack */ if (expect->expectant) expect->expectant->expecting--; ip_conntrack_expect_put(expect);}/* remove one specific expecatation from all lists, drop refcount * and expire timer. * This function can _NOT_ be called for confirmed expects! */static void unexpect_related(struct ip_conntrack_expect *expect){ IP_NF_ASSERT(expect->expectant); IP_NF_ASSERT(expect->expectant->helper); /* if we are supposed to have a timer, but we can't delete * it: race condition. __unexpect_related will * be calledd by timeout function */ if (expect->expectant->helper->timeout && !del_timer(&expect->timeout)) return; __unexpect_related(expect);}/* delete all unconfirmed expectations for this conntrack */static void remove_expectations(struct ip_conntrack *ct){ struct list_head *exp_entry, *next; struct ip_conntrack_expect *exp; DEBUGP("remove_expectations(%p)\n", ct); for (exp_entry = ct->sibling_list.next; exp_entry != &ct->sibling_list; exp_entry = next) { next = exp_entry->next; exp = list_entry(exp_entry, struct ip_conntrack_expect, expected_list); /* we skip established expectations, as we want to delete * the un-established ones only */ if (exp->sibling) { DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct); continue; } IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp)); IP_NF_ASSERT(exp->expectant == ct); /* delete expectation from global and private lists */ unexpect_related(exp); }}static voidclean_from_lists(struct ip_conntrack *ct){ DEBUGP("clean_from_lists(%p)\n", ct); MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); /* Remove from both hash lists: must not NULL out next ptrs, otherwise we'll look unconfirmed. Fortunately, LIST_DELETE doesn't do this. --RR */ LIST_DELETE(&ip_conntrack_hash [hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); LIST_DELETE(&ip_conntrack_hash [hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple)], &ct->tuplehash[IP_CT_DIR_REPLY]); /* Destroy all un-established, pending expectations */ remove_expectations(ct);}static voiddestroy_conntrack(struct nf_conntrack *nfct){ struct ip_conntrack *ct = (struct ip_conntrack *)nfct; struct ip_conntrack_protocol *proto; DEBUGP("destroy_conntrack(%p)\n", ct); IP_NF_ASSERT(atomic_read(&nfct->use) == 0); IP_NF_ASSERT(!timer_pending(&ct->timeout)); if (ct->master && master_ct(ct)) ip_conntrack_put(master_ct(ct)); /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to ip_conntrack_lock!!! -HW */ proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); if (proto && proto->destroy) proto->destroy(ct); if (ip_conntrack_destroyed) ip_conntrack_destroyed(ct); WRITE_LOCK(&ip_conntrack_lock); /* Delete our master expectation */ if (ct->master) { /* can't call __unexpect_related here, * since it would screw up expect_list */ list_del(&ct->master->expected_list); kfree(ct->master); } WRITE_UNLOCK(&ip_conntrack_lock); DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); kmem_cache_free(ip_conntrack_cachep, ct); atomic_dec(&ip_conntrack_count);}static void death_by_timeout(unsigned long ul_conntrack){ struct ip_conntrack *ct = (void *)ul_conntrack; WRITE_LOCK(&ip_conntrack_lock); clean_from_lists(ct); WRITE_UNLOCK(&ip_conntrack_lock); ip_conntrack_put(ct);}static inline intconntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack){ MUST_BE_READ_LOCKED(&ip_conntrack_lock); return i->ctrack != ignored_conntrack && ip_ct_tuple_equal(tuple, &i->tuple);}static struct ip_conntrack_tuple_hash *__ip_conntrack_find(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack){ struct ip_conntrack_tuple_hash *h; MUST_BE_READ_LOCKED(&ip_conntrack_lock); h = LIST_FIND(&ip_conntrack_hash[hash_conntrack(tuple)], conntrack_tuple_cmp, struct ip_conntrack_tuple_hash *, tuple, ignored_conntrack); return h;}/* Find a connection corresponding to a tuple. */struct ip_conntrack_tuple_hash *ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack){ struct ip_conntrack_tuple_hash *h; READ_LOCK(&ip_conntrack_lock); h = __ip_conntrack_find(tuple, ignored_conntrack); if (h) atomic_inc(&h->ctrack->ct_general.use); READ_UNLOCK(&ip_conntrack_lock); return h;}static inline struct ip_conntrack *__ip_conntrack_get(struct nf_ct_info *nfct, enum ip_conntrack_info *ctinfo){ struct ip_conntrack *ct = (struct ip_conntrack *)nfct->master; /* ctinfo is the index of the nfct inside the conntrack */ *ctinfo = nfct - ct->infos; IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER); return ct;}/* Return conntrack and conntrack_info given skb->nfct->master */struct ip_conntrack *ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo){ if (skb->nfct) return __ip_conntrack_get(skb->nfct, ctinfo); return NULL;}/* Confirm a connection given skb->nfct; places it in hash table */int__ip_conntrack_confirm(struct nf_ct_info *nfct){ unsigned int hash, repl_hash; struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; ct = __ip_conntrack_get(nfct, &ctinfo); /* ipt_REJECT uses ip_conntrack_attach to attach related ICMP/TCP RST packets in other direction. Actual packet which created connection will be IP_CT_NEW or for an expected connection, IP_CT_RELATED. */ if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return NF_ACCEPT; hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); /* We're not in hash table, and we refuse to set up related connections for unconfirmed conns. But packet copies and REJECT will give spurious warnings here. */ /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ /* No external references means noone else could have confirmed us. */ IP_NF_ASSERT(!is_confirmed(ct)); DEBUGP("Confirming conntrack %p\n", ct); WRITE_LOCK(&ip_conntrack_lock); /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ if (!LIST_FIND(&ip_conntrack_hash[hash], conntrack_tuple_cmp, struct ip_conntrack_tuple_hash *, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) && !LIST_FIND(&ip_conntrack_hash[repl_hash], conntrack_tuple_cmp, struct ip_conntrack_tuple_hash *, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { list_prepend(&ip_conntrack_hash[hash], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); list_prepend(&ip_conntrack_hash[repl_hash], &ct->tuplehash[IP_CT_DIR_REPLY]); /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ ct->timeout.expires += jiffies; add_timer(&ct->timeout); atomic_inc(&ct->ct_general.use); WRITE_UNLOCK(&ip_conntrack_lock); return NF_ACCEPT; } WRITE_UNLOCK(&ip_conntrack_lock); return NF_DROP;}/* Returns true if a connection correspondings to the tuple (required for NAT). */intip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack){ struct ip_conntrack_tuple_hash *h; READ_LOCK(&ip_conntrack_lock); h = __ip_conntrack_find(tuple, ignored_conntrack); READ_UNLOCK(&ip_conntrack_lock); return h != NULL;}/* Returns conntrack if it dealt with ICMP, and filled in skb fields */struct ip_conntrack *icmp_error_track(struct sk_buff *skb,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -