📄 ip_conntrack_core.c
字号:
/* Connection state tracking for netfilter. This is separated from, but required by, the NAT layer; it can also be used by an iptables extension. *//* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General Public Licence. */#ifdef MODULE#define __NO_VERSION__#endif#include <linux/version.h>#include <linux/config.h>#include <linux/types.h>#include <linux/ip.h>#include <linux/netfilter.h>#include <linux/netfilter_ipv4.h>#include <linux/module.h>#include <linux/skbuff.h>#include <linux/proc_fs.h>#include <linux/vmalloc.h>#include <linux/brlock.h>#include <net/checksum.h>#include <linux/stddef.h>#include <linux/sysctl.h>#include <linux/slab.h>/* This rwlock protects the main hash table, protocol/helper/expected registrations, conntrack timers*/#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)#include <linux/netfilter_ipv4/ip_conntrack.h>#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>#include <linux/netfilter_ipv4/ip_conntrack_helper.h>#include <linux/netfilter_ipv4/ip_conntrack_core.h>#include <linux/netfilter_ipv4/listhelp.h>#if 0#define DEBUGP printk#else#define DEBUGP(format, args...)#endifDECLARE_RWLOCK(ip_conntrack_lock);void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;LIST_HEAD(expect_list);LIST_HEAD(protocol_list);static LIST_HEAD(helpers);unsigned int ip_conntrack_htable_size = 0;static int ip_conntrack_max = 0;static atomic_t ip_conntrack_count = ATOMIC_INIT(0);struct list_head *ip_conntrack_hash;static kmem_cache_t *ip_conntrack_cachep;extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr, u_int8_t protocol){ return protocol == curr->proto;}struct ip_conntrack_protocol *__find_proto(u_int8_t protocol){ struct ip_conntrack_protocol *p; MUST_BE_READ_LOCKED(&ip_conntrack_lock); p = LIST_FIND(&protocol_list, proto_cmpfn, struct ip_conntrack_protocol *, protocol); if (!p) p = &ip_conntrack_generic_protocol; return p;}struct ip_conntrack_protocol *find_proto(u_int8_t protocol){ struct ip_conntrack_protocol *p; READ_LOCK(&ip_conntrack_lock); p = __find_proto(protocol); READ_UNLOCK(&ip_conntrack_lock); return p;}static inline void ip_conntrack_put(struct ip_conntrack *ct){ IP_NF_ASSERT(ct); IP_NF_ASSERT(ct->infos[0].master); /* nf_conntrack_put wants to go via an info struct, so feed it one at random. */ nf_conntrack_put(&ct->infos[0]);}static inline u_int32_thash_conntrack(const struct ip_conntrack_tuple *tuple){#if 0 dump_tuple(tuple);#endif /* ntohl because more differences in low bits. */ /* To ensure that halves of the same connection don't hash clash, we add the source per-proto again. */ return (ntohl(tuple->src.ip + tuple->dst.ip + tuple->src.u.all + tuple->dst.u.all + tuple->dst.protonum) + ntohs(tuple->src.u.all)) % ip_conntrack_htable_size;}inline intget_tuple(const struct iphdr *iph, size_t len, struct ip_conntrack_tuple *tuple, struct ip_conntrack_protocol *protocol){ int ret; /* Never happen */ if (iph->frag_off & htons(IP_OFFSET)) { printk("ip_conntrack_core: Frag of proto %u.\n", iph->protocol); return 0; } /* Guarantee 8 protocol bytes: if more wanted, use len param */ else if (iph->ihl * 4 + 8 > len) return 0; tuple->src.ip = iph->saddr; tuple->dst.ip = iph->daddr; tuple->dst.protonum = iph->protocol; ret = protocol->pkt_to_tuple((u_int32_t *)iph + iph->ihl, len - 4*iph->ihl, tuple); return ret;}static intinvert_tuple(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_tuple *orig, const struct ip_conntrack_protocol *protocol){ inverse->src.ip = orig->dst.ip; inverse->dst.ip = orig->src.ip; inverse->dst.protonum = orig->dst.protonum; return protocol->invert_tuple(inverse, orig);}static voidclean_from_lists(struct ip_conntrack *ct){ MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); /* Remove from both hash lists */ LIST_DELETE(&ip_conntrack_hash [hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); LIST_DELETE(&ip_conntrack_hash [hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple)], &ct->tuplehash[IP_CT_DIR_REPLY]); /* If our expected is in the list, take it out. */ if (ct->expected.expectant) { IP_NF_ASSERT(list_inlist(&expect_list, &ct->expected)); IP_NF_ASSERT(ct->expected.expectant == ct); LIST_DELETE(&expect_list, &ct->expected); }}static voiddestroy_conntrack(struct nf_conntrack *nfct){ struct ip_conntrack *ct = (struct ip_conntrack *)nfct; /* Unconfirmed connections haven't been cleaned up by the timer: hence they cannot be simply deleted here. */ if (!(ct->status & IPS_CONFIRMED)) { WRITE_LOCK(&ip_conntrack_lock); /* Race check: they can't get a reference if noone has one and we have the write lock. */ if (atomic_read(&ct->ct_general.use) == 0) { clean_from_lists(ct); WRITE_UNLOCK(&ip_conntrack_lock); } else { /* Either a last-minute confirmation (ie. ct now has timer attached), or a last-minute new skb has reference (still unconfirmed). */ WRITE_UNLOCK(&ip_conntrack_lock); return; } } IP_NF_ASSERT(atomic_read(&nfct->use) == 0); IP_NF_ASSERT(!timer_pending(&ct->timeout)); if (ct->master.master) nf_conntrack_put(&ct->master); if (ip_conntrack_destroyed) ip_conntrack_destroyed(ct); kmem_cache_free(ip_conntrack_cachep, ct); atomic_dec(&ip_conntrack_count);}static void death_by_timeout(unsigned long ul_conntrack){ struct ip_conntrack *ct = (void *)ul_conntrack; WRITE_LOCK(&ip_conntrack_lock); IP_NF_ASSERT(ct->status & IPS_CONFIRMED); clean_from_lists(ct); WRITE_UNLOCK(&ip_conntrack_lock); ip_conntrack_put(ct);}static inline intconntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack){ MUST_BE_READ_LOCKED(&ip_conntrack_lock); return i->ctrack != ignored_conntrack && ip_ct_tuple_equal(tuple, &i->tuple);}static struct ip_conntrack_tuple_hash *__ip_conntrack_find(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack){ struct ip_conntrack_tuple_hash *h; MUST_BE_READ_LOCKED(&ip_conntrack_lock); h = LIST_FIND(&ip_conntrack_hash[hash_conntrack(tuple)], conntrack_tuple_cmp, struct ip_conntrack_tuple_hash *, tuple, ignored_conntrack); return h;}/* Find a connection corresponding to a tuple. */struct ip_conntrack_tuple_hash *ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack){ struct ip_conntrack_tuple_hash *h; READ_LOCK(&ip_conntrack_lock); h = __ip_conntrack_find(tuple, ignored_conntrack); if (h) atomic_inc(&h->ctrack->ct_general.use); READ_UNLOCK(&ip_conntrack_lock); return h;}/* Confirm a connection */voidip_conntrack_confirm(struct ip_conntrack *ct){ DEBUGP("Confirming conntrack %p\n", ct); WRITE_LOCK(&ip_conntrack_lock); /* Race check */ if (!(ct->status & IPS_CONFIRMED)) { IP_NF_ASSERT(!timer_pending(&ct->timeout)); set_bit(IPS_CONFIRMED_BIT, &ct->status); /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in wierd delay cases. */ ct->timeout.expires += jiffies; add_timer(&ct->timeout); atomic_inc(&ct->ct_general.use); } WRITE_UNLOCK(&ip_conntrack_lock);}/* Returns true if a connection correspondings to the tuple (required for NAT). */intip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack){ struct ip_conntrack_tuple_hash *h; READ_LOCK(&ip_conntrack_lock); h = __ip_conntrack_find(tuple, ignored_conntrack); READ_UNLOCK(&ip_conntrack_lock); return h != NULL;}/* Returns conntrack if it dealt with ICMP, and filled in skb fields */struct ip_conntrack *icmp_error_track(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum){ const struct iphdr *iph; struct icmphdr *hdr; struct ip_conntrack_tuple innertuple, origtuple; struct iphdr *inner; size_t datalen; struct ip_conntrack_protocol *innerproto; struct ip_conntrack_tuple_hash *h; IP_NF_ASSERT(iph->protocol == IPPROTO_ICMP); IP_NF_ASSERT(skb->nfct == NULL); iph = skb->nh.iph; hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl); inner = (struct iphdr *)(hdr + 1); datalen = skb->len - iph->ihl*4 - sizeof(*hdr); if (skb->len < iph->ihl * 4 + sizeof(struct icmphdr)) { DEBUGP("icmp_error_track: too short\n"); return NULL; } if (hdr->type != ICMP_DEST_UNREACH && hdr->type != ICMP_SOURCE_QUENCH && hdr->type != ICMP_TIME_EXCEEDED && hdr->type != ICMP_PARAMETERPROB && hdr->type != ICMP_REDIRECT) return NULL; /* Ignore ICMP's containing fragments (shouldn't happen) */ if (inner->frag_off & htons(IP_OFFSET)) { DEBUGP("icmp_error_track: fragment of proto %u\n", inner->protocol); return NULL; } /* Ignore it if the checksum's bogus. */ if (ip_compute_csum((unsigned char *)hdr, sizeof(*hdr) + datalen)) { DEBUGP("icmp_error_track: bad csum\n"); return NULL; } innerproto = find_proto(inner->protocol); /* Are they talking about one of our connections? */ if (inner->ihl * 4 + 8 > datalen || !get_tuple(inner, datalen, &origtuple, innerproto)) { DEBUGP("icmp_error: ! get_tuple p=%u (%u*4+%u dlen=%u)\n", inner->protocol, inner->ihl, 8, datalen); return NULL; } /* Ordinarily, we'd expect the inverted tupleproto, but it's been preserved inside the ICMP. */ if (!invert_tuple(&innertuple, &origtuple, innerproto)) { DEBUGP("icmp_error_track: Can't invert tuple\n"); return NULL; } *ctinfo = IP_CT_RELATED; h = ip_conntrack_find_get(&innertuple, NULL); if (!h) { /* Locally generated ICMPs will match inverted if they haven't been SNAT'ed yet */ /* FIXME: NAT code has to handle half-done double NAT --RR */ if (hooknum == NF_IP_LOCAL_OUT) h = ip_conntrack_find_get(&origtuple, NULL); if (!h) { DEBUGP("icmp_error_track: no match\n"); return NULL; } /* Reverse direction from that found */ if (DIRECTION(h) != IP_CT_DIR_REPLY) *ctinfo += IP_CT_IS_REPLY; } else { if (DIRECTION(h) == IP_CT_DIR_REPLY) *ctinfo += IP_CT_IS_REPLY; } /* REJECT target does this commonly, so allow locally
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -