📄 ip_conntrack_core.c
字号:
}static int early_drop(struct list_head *chain){ /* Traverse backwards: gives us oldest, which is roughly LRU */ struct ip_conntrack_tuple_hash *h; int dropped = 0; READ_LOCK(&ip_conntrack_lock); h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *); if (h) atomic_inc(&h->ctrack->ct_general.use); READ_UNLOCK(&ip_conntrack_lock); if (!h) return dropped; if (del_timer(&h->ctrack->timeout)) { death_by_timeout((unsigned long)h->ctrack); dropped = 1; CONNTRACK_STAT_INC(early_drop); } ip_conntrack_put(h->ctrack); return dropped;}static inline int helper_cmp(const struct ip_conntrack_helper *i, const struct ip_conntrack_tuple *rtuple){ return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);}struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple){ return LIST_FIND(&helpers, helper_cmp, struct ip_conntrack_helper *, tuple);}/* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */static struct ip_conntrack_tuple_hash *init_conntrack(const struct ip_conntrack_tuple *tuple, struct ip_conntrack_protocol *protocol, struct sk_buff *skb){ struct ip_conntrack *conntrack; struct ip_conntrack_tuple repl_tuple; size_t hash; struct ip_conntrack_expect *expected; if (!ip_conntrack_hash_rnd_initted) { get_random_bytes(&ip_conntrack_hash_rnd, 4); ip_conntrack_hash_rnd_initted = 1; } hash = hash_conntrack(tuple); if (ip_conntrack_max && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { /* Try dropping from this hash chain. */ if (!early_drop(&ip_conntrack_hash[hash])) { if (net_ratelimit()) printk(KERN_WARNING "ip_conntrack: table full, dropping" " packet.\n"); return ERR_PTR(-ENOMEM); } } if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) { DEBUGP("Can't invert tuple.\n"); return NULL; } conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); if (!conntrack) { DEBUGP("Can't allocate conntrack.\n"); return ERR_PTR(-ENOMEM); } memset(conntrack, 0, sizeof(*conntrack)); atomic_set(&conntrack->ct_general.use, 1); conntrack->ct_general.destroy = destroy_conntrack; conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple; conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack; conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple; conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack; if (!protocol->new(conntrack, skb)) { kmem_cache_free(ip_conntrack_cachep, conntrack); return NULL; } /* Don't set timer yet: wait for confirmation */ init_timer(&conntrack->timeout); conntrack->timeout.data = (unsigned long)conntrack; conntrack->timeout.function = death_by_timeout; INIT_LIST_HEAD(&conntrack->sibling_list); WRITE_LOCK(&ip_conntrack_lock); /* Need finding and deleting of expected ONLY if we win race */ READ_LOCK(&ip_conntrack_expect_tuple_lock); expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp, struct ip_conntrack_expect *, tuple); READ_UNLOCK(&ip_conntrack_expect_tuple_lock); if (expected) { /* If master is not in hash table yet (ie. packet hasn't left this machine yet), how can other end know about expected? Hence these are not the droids you are looking for (if master ct never got confirmed, we'd hold a reference to it and weird things would happen to future packets). */ if (!is_confirmed(expected->expectant)) { conntrack->helper = ip_ct_find_helper(&repl_tuple); goto end; } /* Expectation is dying... */ if (expected->expectant->helper->timeout && !del_timer(&expected->timeout)) goto end; DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", conntrack, expected); /* Welcome, Mr. Bond. We've been expecting you... */ IP_NF_ASSERT(expected->expectant); __set_bit(IPS_EXPECTED_BIT, &conntrack->status); conntrack->master = expected; expected->sibling = conntrack; LIST_DELETE(&ip_conntrack_expect_list, expected); expected->expectant->expecting--; nf_conntrack_get(&master_ct(conntrack)->ct_general); /* this is a braindead... --pablo */ atomic_inc(&ip_conntrack_count); WRITE_UNLOCK(&ip_conntrack_lock); if (expected->expectfn) expected->expectfn(conntrack); CONNTRACK_STAT_INC(expect_new); goto ret; } else { conntrack->helper = ip_ct_find_helper(&repl_tuple); CONNTRACK_STAT_INC(new); }end: atomic_inc(&ip_conntrack_count); WRITE_UNLOCK(&ip_conntrack_lock);ret: return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];}/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */static inline struct ip_conntrack *resolve_normal_ct(struct sk_buff *skb, struct ip_conntrack_protocol *proto, int *set_reply, unsigned int hooknum, enum ip_conntrack_info *ctinfo){ struct ip_conntrack_tuple tuple; struct ip_conntrack_tuple_hash *h; IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0); if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, &tuple,proto)) return NULL; /* look for tuple match */ h = ip_conntrack_find_get(&tuple, NULL); if (!h) { h = init_conntrack(&tuple, proto, skb); if (!h) return NULL; if (IS_ERR(h)) return (void *)h; } /* It exists; we have (non-exclusive) reference. */ if (DIRECTION(h) == IP_CT_DIR_REPLY) { *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; /* Please set reply bit if this packet OK */ *set_reply = 1; } else { /* Once we've had two way comms, always ESTABLISHED. */ if (test_bit(IPS_SEEN_REPLY_BIT, &h->ctrack->status)) { DEBUGP("ip_conntrack_in: normal packet for %p\n", h->ctrack); *ctinfo = IP_CT_ESTABLISHED; } else if (test_bit(IPS_EXPECTED_BIT, &h->ctrack->status)) { DEBUGP("ip_conntrack_in: related packet for %p\n", h->ctrack); *ctinfo = IP_CT_RELATED; } else { DEBUGP("ip_conntrack_in: new packet for %p\n", h->ctrack); *ctinfo = IP_CT_NEW; } *set_reply = 0; } skb->nfct = &h->ctrack->ct_general; skb->nfctinfo = *ctinfo; return h->ctrack;}/* Netfilter hook itself. */unsigned int ip_conntrack_in(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)){ struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; struct ip_conntrack_protocol *proto; int set_reply; int ret; /* Previously seen (loopback or untracked)? Ignore. */ if ((*pskb)->nfct) { CONNTRACK_STAT_INC(ignore); return NF_ACCEPT; } /* Never happen */ if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) { if (net_ratelimit()) { printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n", (*pskb)->nh.iph->protocol, hooknum); } return NF_DROP; } /* FIXME: Do this right please. --RR */ (*pskb)->nfcache |= NFC_UNKNOWN;/* Doesn't cover locally-generated broadcast, so not worth it. */#if 0 /* Ignore broadcast: no `connection'. */ if ((*pskb)->pkt_type == PACKET_BROADCAST) { printk("Broadcast packet!\n"); return NF_ACCEPT; } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF)) == htonl(0x000000FF)) { printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n", NIPQUAD((*pskb)->nh.iph->saddr), NIPQUAD((*pskb)->nh.iph->daddr), (*pskb)->sk, (*pskb)->pkt_type); }#endif proto = ip_ct_find_proto((*pskb)->nh.iph->protocol); /* It may be an special packet, error, unclean... * inverse of the return code tells to the netfilter * core what to do with the packet. */ if (proto->error != NULL && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) { CONNTRACK_STAT_INC(error); CONNTRACK_STAT_INC(invalid); return -ret; } if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) { /* Not valid part of a connection */ CONNTRACK_STAT_INC(invalid); return NF_ACCEPT; } if (IS_ERR(ct)) { /* Too stressed to deal. */ CONNTRACK_STAT_INC(drop); return NF_DROP; } IP_NF_ASSERT((*pskb)->nfct); ret = proto->packet(ct, *pskb, ctinfo); if (ret < 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do*/ nf_conntrack_put((*pskb)->nfct); (*pskb)->nfct = NULL; CONNTRACK_STAT_INC(invalid); return -ret; } if (ret != NF_DROP && ct->helper) { ret = ct->helper->help(*pskb, ct, ctinfo); if (ret == -1) { /* Invalid */ CONNTRACK_STAT_INC(invalid); nf_conntrack_put((*pskb)->nfct); (*pskb)->nfct = NULL; return NF_ACCEPT; } } if (set_reply) set_bit(IPS_SEEN_REPLY_BIT, &ct->status); return ret;}int invert_tuplepr(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_tuple *orig){ return ip_ct_invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum));}static inline int resent_expect(const struct ip_conntrack_expect *i, const struct ip_conntrack_tuple *tuple, const struct ip_conntrack_tuple *mask){ DEBUGP("resent_expect\n"); DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple); DEBUGP("ct_tuple: "); DUMP_TUPLE(&i->ct_tuple); DEBUGP("test tuple: "); DUMP_TUPLE(tuple); return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple)) || (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple))) && ip_ct_tuple_equal(&i->mask, mask));}/* Would two expected things clash? */static inline int expect_clash(const struct ip_conntrack_expect *i, const struct ip_conntrack_tuple *tuple, const struct ip_conntrack_tuple *mask){ /* Part covered by intersection of masks must be unequal, otherwise they clash */ struct ip_conntrack_tuple intersect_mask = { { i->mask.src.ip & mask->src.ip, { i->mask.src.u.all & mask->src.u.all } }, { i->mask.dst.ip & mask->dst.ip, { i->mask.dst.u.all & mask->dst.u.all }, i->mask.dst.protonum & mask->dst.protonum } }; return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask);}inline void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect){ WRITE_LOCK(&ip_conntrack_lock); unexpect_related(expect); WRITE_UNLOCK(&ip_conntrack_lock);} static void expectation_timed_out(unsigned long ul_expect){ struct ip_conntrack_expect *expect = (void *) ul_expect; DEBUGP("expectation %p timed out\n", expect); WRITE_LOCK(&ip_conntrack_lock); __unexpect_related(expect); WRITE_UNLOCK(&ip_conntrack_lock);}struct ip_conntrack_expect *ip_conntrack_expect_alloc(void){ struct ip_conntrack_expect *new; new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC); if (!new) { DEBUGP("expect_related: OOM allocating expect\n"); return NULL; } /* tuple_cmp compares whole union, we have to initialized cleanly */ memset(new, 0, sizeof(struct ip_conntrack_expect)); atomic_set(&new->use, 1); return new;}static voidip_conntrack_expect_insert(struct ip_conntrack_expect *new, struct ip_conntrack *related_to){ DEBUGP("new expectation %p of conntrack %p\n", new, related_to); new->expectant = related_to; new->sibling = NULL; /* add to expected list for this connection */ list_add_tail(&new->expected_list, &related_to->sibling_list); /* add to global list of expectations */ list_prepend(&ip_conntrack_expect_list, &new->list); /* add and start timer if required */ if (related_to->helper->timeout) { init_timer(&new->timeout); new->timeout.data = (unsigned long)new; new->timeout.function = expectation_timed_out; new->timeout.expires = jiffies + related_to->helper->timeout * HZ; add_timer(&new->timeout); } related_to->expecting++;}/* Add a related connection. */int ip_conntrack_expect_related(struct ip_conntrack_expect *expect, struct ip_conntrack *related_to){ struct ip_conntrack_expect *old; int ret = 0; WRITE_LOCK(&ip_conntrack_lock); /* Because of the write lock, no reader can walk the lists, * so there is no need to use the tuple lock too */ DEBUGP("ip_conntrack_expect_related %p\n", related_to); DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple); DEBUGP("mask: "); DUMP_TUPLE(&expect->mask); old = LIST_FIND(&ip_conntrack_expect_list, resent_expect, struct ip_conntrack_expect *, &expect->tuple, &expect->mask); if (old) { /* Helper private data may contain offsets but no pointers pointing into the payload - otherwise we should have to copy the data filled out by the helper over the old one */ DEBUGP("expect_related: resent packet\n"); if (related_to->helper->timeout) { if (!del_timer(&old->timeout)) { /* expectation is dying. Fall through */ goto out; } else { old->timeout.expires = jiffies + related_to->helper->timeout * HZ; add_timer(&old->timeout); } } WRITE_UNLOCK(&ip_conntrack_lock); /* This expectation is not inserted so no need to lock */ kmem_cache_free(ip_conntrack_expect_cachep, expect); return -EEXIST; } else if (related_to->helper->max_expected && related_to->expecting >= related_to->helper->max_expected) { /* old == NULL */ if (!(related_to->helper->flags & IP_CT_HELPER_F_REUSE_EXPECT)) { WRITE_UNLOCK(&ip_conntrack_lock); if (net_ratelimit()) printk(KERN_WARNING "ip_conntrack: max number of expected " "connections %i of %s reached for " "%u.%u.%u.%u->%u.%u.%u.%u\n", related_to->helper->max_expected, related_to->helper->name, NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip), NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip)); kmem_cache_free(ip_conntrack_expect_cachep, expect); return -EPERM; } DEBUGP("ip_conntrack: max number of expected " "connections %i of %s reached for " "%u.%u.%u.%u->%u.%u.%u.%u, reusing\n", related_to->helper->max_expected, related_to->helper->name, NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip), NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -