📄 ip_conntrack_core.c
字号:
/* choose the the oldest expectation to evict */ list_for_each_entry(old, &related_to->sibling_list, expected_list) if (old->sibling == NULL) break; /* We cannot fail since related_to->expecting is the number * of unconfirmed expectations */ IP_NF_ASSERT(old && old->sibling == NULL); /* newnat14 does not reuse the real allocated memory * structures but rather unexpects the old and * allocates a new. unexpect_related will decrement * related_to->expecting. */ unexpect_related(old); ret = -EPERM; } else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash, struct ip_conntrack_expect *, &expect->tuple, &expect->mask)) { WRITE_UNLOCK(&ip_conntrack_lock); DEBUGP("expect_related: busy!\n"); kmem_cache_free(ip_conntrack_expect_cachep, expect); return -EBUSY; }out: ip_conntrack_expect_insert(expect, related_to); WRITE_UNLOCK(&ip_conntrack_lock); CONNTRACK_STAT_INC(expect_create); return ret;}/* Change tuple in an existing expectation */int ip_conntrack_change_expect(struct ip_conntrack_expect *expect, struct ip_conntrack_tuple *newtuple){ int ret; MUST_BE_READ_LOCKED(&ip_conntrack_lock); WRITE_LOCK(&ip_conntrack_expect_tuple_lock); DEBUGP("change_expect:\n"); DEBUGP("exp tuple: "); DUMP_TUPLE(&expect->tuple); DEBUGP("exp mask: "); DUMP_TUPLE(&expect->mask); DEBUGP("newtuple: "); DUMP_TUPLE(newtuple); if (expect->ct_tuple.dst.protonum == 0) { /* Never seen before */ DEBUGP("change expect: never seen before\n"); if (!ip_ct_tuple_equal(&expect->tuple, newtuple) && LIST_FIND(&ip_conntrack_expect_list, expect_clash, struct ip_conntrack_expect *, newtuple, &expect->mask)) { /* Force NAT to find an unused tuple */ ret = -1; } else { memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple)); memcpy(&expect->tuple, newtuple, sizeof(expect->tuple)); ret = 0; } } else { /* Resent packet */ DEBUGP("change expect: resent packet\n"); if (ip_ct_tuple_equal(&expect->tuple, newtuple)) { ret = 0; } else { /* Force NAT to choose again the same port */ ret = -1; } } WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock); return ret;}/* Alter reply tuple (maybe alter helper). If it's already taken, return 0 and don't do alteration. */int ip_conntrack_alter_reply(struct ip_conntrack *conntrack, const struct ip_conntrack_tuple *newreply){ WRITE_LOCK(&ip_conntrack_lock); if (__ip_conntrack_find(newreply, conntrack)) { WRITE_UNLOCK(&ip_conntrack_lock); return 0; } /* Should be unconfirmed, so not in hash table yet */ IP_NF_ASSERT(!is_confirmed(conntrack)); DEBUGP("Altering reply tuple of %p to ", conntrack); DUMP_TUPLE(newreply); conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; if (!conntrack->master && list_empty(&conntrack->sibling_list)) conntrack->helper = ip_ct_find_helper(newreply); WRITE_UNLOCK(&ip_conntrack_lock); return 1;}int ip_conntrack_helper_register(struct ip_conntrack_helper *me){ WRITE_LOCK(&ip_conntrack_lock); list_prepend(&helpers, me); WRITE_UNLOCK(&ip_conntrack_lock); return 0;}static inline int unhelp(struct ip_conntrack_tuple_hash *i, const struct ip_conntrack_helper *me){ if (i->ctrack->helper == me) { /* Get rid of any expected. */ remove_expectations(i->ctrack, 0); /* And *then* set helper to NULL */ i->ctrack->helper = NULL; } return 0;}void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me){ unsigned int i; /* Need write lock here, to delete helper. */ WRITE_LOCK(&ip_conntrack_lock); LIST_DELETE(&helpers, me); /* Get rid of expecteds, set helpers to NULL. */ for (i = 0; i < ip_conntrack_htable_size; i++) LIST_FIND_W(&ip_conntrack_hash[i], unhelp, struct ip_conntrack_tuple_hash *, me); WRITE_UNLOCK(&ip_conntrack_lock); /* Someone could be still looking at the helper in a bh. */ synchronize_net();}static inline void ct_add_counters(struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb){#ifdef CONFIG_IP_NF_CT_ACCT if (skb) { ct->counters[CTINFO2DIR(ctinfo)].packets++; ct->counters[CTINFO2DIR(ctinfo)].bytes += ntohs(skb->nh.iph->tot_len); }#endif}/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */void ip_ct_refresh_acct(struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, unsigned long extra_jiffies){ IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); /* If not in hash table, timer will not be active yet */ if (!is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; ct_add_counters(ct, ctinfo, skb); } else { WRITE_LOCK(&ip_conntrack_lock); /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); } ct_add_counters(ct, ctinfo, skb); WRITE_UNLOCK(&ip_conntrack_lock); }}int ip_ct_no_defrag;/* Returns new sk_buff, or NULL */struct sk_buff *ip_ct_gather_frags(struct sk_buff *skb){ struct sock *sk = skb->sk;#ifdef CONFIG_NETFILTER_DEBUG unsigned int olddebug = skb->nf_debug;#endif if (unlikely(ip_ct_no_defrag)) { kfree_skb(skb); return NULL; } if (sk) { sock_hold(sk); skb_orphan(skb); } local_bh_disable(); skb = ip_defrag(skb); local_bh_enable(); if (!skb) { if (sk) sock_put(sk); return skb; } if (sk) { skb_set_owner_w(skb, sk); sock_put(sk); } ip_send_check(skb->nh.iph); skb->nfcache |= NFC_ALTERED;#ifdef CONFIG_NETFILTER_DEBUG /* Packet path as if nothing had happened. */ skb->nf_debug = olddebug;#endif return skb;}/* Used by ipt_REJECT. */static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb){ struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; /* This ICMP is in reverse direction to the packet which caused it */ ct = ip_conntrack_get(skb, &ctinfo); if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; else ctinfo = IP_CT_RELATED; /* Attach to new skbuff, and increment count */ nskb->nfct = &ct->ct_general; nskb->nfctinfo = ctinfo; nf_conntrack_get(nskb->nfct);}static inline intdo_kill(const struct ip_conntrack_tuple_hash *i, int (*kill)(const struct ip_conntrack *i, void *data), void *data){ return kill(i->ctrack, data);}/* Bring out ya dead! */static struct ip_conntrack_tuple_hash *get_next_corpse(int (*kill)(const struct ip_conntrack *i, void *data), void *data, unsigned int *bucket){ struct ip_conntrack_tuple_hash *h = NULL; READ_LOCK(&ip_conntrack_lock); for (; !h && *bucket < ip_conntrack_htable_size; (*bucket)++) { h = LIST_FIND(&ip_conntrack_hash[*bucket], do_kill, struct ip_conntrack_tuple_hash *, kill, data); } if (h) atomic_inc(&h->ctrack->ct_general.use); READ_UNLOCK(&ip_conntrack_lock); return h;}voidip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data), void *data){ struct ip_conntrack_tuple_hash *h; unsigned int bucket = 0; while ((h = get_next_corpse(kill, data, &bucket)) != NULL) { /* Time to push up daises... */ if (del_timer(&h->ctrack->timeout)) death_by_timeout((unsigned long)h->ctrack); /* ... else the timer will get him soon. */ ip_conntrack_put(h->ctrack); }}/* Fast function for those who don't want to parse /proc (and I don't blame them). *//* Reversing the socket's dst/src point of view gives us the reply mapping. */static intgetorigdst(struct sock *sk, int optval, void __user *user, int *len){ struct inet_opt *inet = inet_sk(sk); struct ip_conntrack_tuple_hash *h; struct ip_conntrack_tuple tuple; IP_CT_TUPLE_U_BLANK(&tuple); tuple.src.ip = inet->rcv_saddr; tuple.src.u.tcp.port = inet->sport; tuple.dst.ip = inet->daddr; tuple.dst.u.tcp.port = inet->dport; tuple.dst.protonum = IPPROTO_TCP; /* We only do TCP at the moment: is there a better way? */ if (strcmp(sk->sk_prot->name, "TCP")) { DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); return -ENOPROTOOPT; } if ((unsigned int) *len < sizeof(struct sockaddr_in)) { DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", *len, sizeof(struct sockaddr_in)); return -EINVAL; } h = ip_conntrack_find_get(&tuple, NULL); if (h) { struct sockaddr_in sin; sin.sin_family = AF_INET; sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.dst.u.tcp.port; sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.dst.ip; DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); ip_conntrack_put(h->ctrack); if (copy_to_user(user, &sin, sizeof(sin)) != 0) return -EFAULT; else return 0; } DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port), NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port)); return -ENOENT;}static struct nf_sockopt_ops so_getorigdst = { .pf = PF_INET, .get_optmin = SO_ORIGINAL_DST, .get_optmax = SO_ORIGINAL_DST+1, .get = &getorigdst,};static int kill_all(const struct ip_conntrack *i, void *data){ return 1;}/* Mishearing the voices in his head, our hero wonders how he's supposed to kill the mall. */void ip_conntrack_cleanup(void){ ip_ct_attach = NULL; /* This makes sure all current packets have passed through netfilter framework. Roll on, two-stage module delete... */ synchronize_net(); i_see_dead_people: ip_ct_selective_cleanup(kill_all, NULL); if (atomic_read(&ip_conntrack_count) != 0) { schedule(); goto i_see_dead_people; } kmem_cache_destroy(ip_conntrack_cachep); kmem_cache_destroy(ip_conntrack_expect_cachep); vfree(ip_conntrack_hash); nf_unregister_sockopt(&so_getorigdst);}static int hashsize;module_param(hashsize, int, 0400);int __init ip_conntrack_init(void){ unsigned int i; int ret; /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ if (hashsize) { ip_conntrack_htable_size = hashsize; } else { ip_conntrack_htable_size = (((num_physpages << PAGE_SHIFT) / 16384) / sizeof(struct list_head)); if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) ip_conntrack_htable_size = 8192; if (ip_conntrack_htable_size < 16) ip_conntrack_htable_size = 16; } ip_conntrack_max = 8 * ip_conntrack_htable_size; printk("ip_conntrack version %s (%u buckets, %d max)" " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION, ip_conntrack_htable_size, ip_conntrack_max, sizeof(struct ip_conntrack)); ret = nf_register_sockopt(&so_getorigdst); if (ret != 0) { printk(KERN_ERR "Unable to register netfilter socket option\n"); return ret; } ip_conntrack_hash = vmalloc(sizeof(struct list_head) * ip_conntrack_htable_size); if (!ip_conntrack_hash) { printk(KERN_ERR "Unable to create ip_conntrack_hash\n"); goto err_unreg_sockopt; } ip_conntrack_cachep = kmem_cache_create("ip_conntrack", sizeof(struct ip_conntrack), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!ip_conntrack_cachep) { printk(KERN_ERR "Unable to create ip_conntrack slab cache\n"); goto err_free_hash; } ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect", sizeof(struct ip_conntrack_expect), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!ip_conntrack_expect_cachep) { printk(KERN_ERR "Unable to create ip_expect slab cache\n"); goto err_free_conntrack_slab; } /* Don't NEED lock here, but good form anyway. */ WRITE_LOCK(&ip_conntrack_lock); for (i = 0; i < MAX_IP_CT_PROTO; i++) ip_ct_protos[i] = &ip_conntrack_generic_protocol; /* Sew in builtin protocols. */ ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp; ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp; ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; WRITE_UNLOCK(&ip_conntrack_lock); for (i = 0; i < ip_conntrack_htable_size; i++) INIT_LIST_HEAD(&ip_conntrack_hash[i]); /* For use by ipt_REJECT */ ip_ct_attach = ip_conntrack_attach; /* Set up fake conntrack: - to never be deleted, not in any hashes */ atomic_set(&ip_conntrack_untracked.ct_general.use, 1); /* - and look it like as a confirmed connection */ set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status); return ret;err_free_conntrack_slab: kmem_cache_destroy(ip_conntrack_cachep);err_free_hash: vfree(ip_conntrack_hash);err_unreg_sockopt: nf_unregister_sockopt(&so_getorigdst); return -ENOMEM;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -