📄 ip_conntrack_core.c
字号:
struct ip_conntrack_expect, expected_list); if (cur->sibling == NULL) { old = cur; break; } } /* (!old) cannot happen, since related_to->expecting is the * number of unconfirmed expects */ IP_NF_ASSERT(old); /* newnat14 does not reuse the real allocated memory * structures but rather unexpects the old and * allocates a new. unexpect_related will decrement * related_to->expecting. */ unexpect_related(old); ret = -EPERM; } else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash, struct ip_conntrack_expect *, &expect->tuple, &expect->mask)) { WRITE_UNLOCK(&ip_conntrack_lock); DEBUGP("expect_related: busy!\n"); return -EBUSY; } new = (struct ip_conntrack_expect *) kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC); if (!new) { WRITE_UNLOCK(&ip_conntrack_lock); DEBUGP("expect_relaed: OOM allocating expect\n"); return -ENOMEM; } /* Zero out the new structure, then fill out it with the data */ DEBUGP("new expectation %p of conntrack %p\n", new, related_to); memset(new, 0, sizeof(*expect)); INIT_LIST_HEAD(&new->list); INIT_LIST_HEAD(&new->expected_list); memcpy(new, expect, sizeof(*expect)); new->expectant = related_to; new->sibling = NULL; /* increase usage count. This sucks. The memset above overwrites * old usage count [if still present] and we increase to one. Only * works because everything is done under ip_conntrack_lock() */ atomic_inc(&new->use); /* add to expected list for this connection */ list_add(&new->expected_list, &related_to->sibling_list); /* add to global list of expectations */ list_prepend(&ip_conntrack_expect_list, &new->list); /* add and start timer if required */ if (related_to->helper->timeout) { init_timer(&new->timeout); new->timeout.data = (unsigned long)new; new->timeout.function = expectation_timed_out; new->timeout.expires = jiffies + related_to->helper->timeout * HZ; add_timer(&new->timeout); } related_to->expecting++; WRITE_UNLOCK(&ip_conntrack_lock); return ret;}/* Change tuple in an existing expectation */int ip_conntrack_change_expect(struct ip_conntrack_expect *expect, struct ip_conntrack_tuple *newtuple){ int ret; MUST_BE_READ_LOCKED(&ip_conntrack_lock); WRITE_LOCK(&ip_conntrack_expect_tuple_lock); DEBUGP("change_expect:\n"); DEBUGP("exp tuple: "); DUMP_TUPLE(&expect->tuple); DEBUGP("exp mask: "); DUMP_TUPLE(&expect->mask); DEBUGP("newtuple: "); DUMP_TUPLE(newtuple); if (expect->ct_tuple.dst.protonum == 0) { /* Never seen before */ DEBUGP("change expect: never seen before\n"); if (!ip_ct_tuple_equal(&expect->tuple, newtuple) && LIST_FIND(&ip_conntrack_expect_list, expect_clash, struct ip_conntrack_expect *, newtuple, &expect->mask)) { /* Force NAT to find an unused tuple */ ret = -1; } else { memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple)); memcpy(&expect->tuple, newtuple, sizeof(expect->tuple)); ret = 0; } } else { /* Resent packet */ DEBUGP("change expect: resent packet\n"); if (ip_ct_tuple_equal(&expect->tuple, newtuple)) { ret = 0; } else { /* Force NAT to choose again the same port */ ret = -1; } } WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock); return ret;}/* Alter reply tuple (maybe alter helper). If it's already taken, return 0 and don't do alteration. */int ip_conntrack_alter_reply(struct ip_conntrack *conntrack, const struct ip_conntrack_tuple *newreply){ WRITE_LOCK(&ip_conntrack_lock); if (__ip_conntrack_find(newreply, conntrack)) { WRITE_UNLOCK(&ip_conntrack_lock); return 0; } /* Should be unconfirmed, so not in hash table yet */ IP_NF_ASSERT(!is_confirmed(conntrack)); DEBUGP("Altering reply tuple of %p to ", conntrack); DUMP_TUPLE(newreply); conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; if (!conntrack->master) conntrack->helper = LIST_FIND(&helpers, helper_cmp, struct ip_conntrack_helper *, newreply); WRITE_UNLOCK(&ip_conntrack_lock); return 1;}int ip_conntrack_helper_register(struct ip_conntrack_helper *me){ MOD_INC_USE_COUNT; WRITE_LOCK(&ip_conntrack_lock); list_prepend(&helpers, me); WRITE_UNLOCK(&ip_conntrack_lock); return 0;}static inline int unhelp(struct ip_conntrack_tuple_hash *i, const struct ip_conntrack_helper *me){ if (i->ctrack->helper == me) { /* Get rid of any expected. */ remove_expectations(i->ctrack); /* And *then* set helper to NULL */ i->ctrack->helper = NULL; } return 0;}void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me){ unsigned int i; /* Need write lock here, to delete helper. */ WRITE_LOCK(&ip_conntrack_lock); LIST_DELETE(&helpers, me); /* Get rid of expecteds, set helpers to NULL. */ for (i = 0; i < ip_conntrack_htable_size; i++) LIST_FIND_W(&ip_conntrack_hash[i], unhelp, struct ip_conntrack_tuple_hash *, me); WRITE_UNLOCK(&ip_conntrack_lock); /* Someone could be still looking at the helper in a bh. */ br_write_lock_bh(BR_NETPROTO_LOCK); br_write_unlock_bh(BR_NETPROTO_LOCK); MOD_DEC_USE_COUNT;}/* Refresh conntrack for this many jiffies. */void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies){ IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); WRITE_LOCK(&ip_conntrack_lock); /* If not in hash table, timer will not be active yet */ if (!is_confirmed(ct)) ct->timeout.expires = extra_jiffies; else { /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); } } WRITE_UNLOCK(&ip_conntrack_lock);}/* Returns new sk_buff, or NULL */struct sk_buff *ip_ct_gather_frags(struct sk_buff *skb){ struct sock *sk = skb->sk;#ifdef CONFIG_NETFILTER_DEBUG unsigned int olddebug = skb->nf_debug;#endif if (sk) { sock_hold(sk); skb_orphan(skb); } local_bh_disable(); skb = ip_defrag(skb); local_bh_enable(); if (!skb) { if (sk) sock_put(sk); return skb; } else if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) { kfree_skb(skb); if (sk) sock_put(sk); return NULL; } if (sk) { skb_set_owner_w(skb, sk); sock_put(sk); } ip_send_check(skb->nh.iph); skb->nfcache |= NFC_ALTERED;#ifdef CONFIG_NETFILTER_DEBUG /* Packet path as if nothing had happened. */ skb->nf_debug = olddebug;#endif return skb;}/* Used by ipt_REJECT. */static void ip_conntrack_attach(struct sk_buff *nskb, struct nf_ct_info *nfct){ struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; ct = __ip_conntrack_get(nfct, &ctinfo); /* This ICMP is in reverse direction to the packet which caused it */ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; else ctinfo = IP_CT_RELATED; /* Attach new skbuff, and increment count */ nskb->nfct = &ct->infos[ctinfo]; atomic_inc(&ct->ct_general.use);}static inline intdo_kill(const struct ip_conntrack_tuple_hash *i, int (*kill)(const struct ip_conntrack *i, void *data), void *data){ return kill(i->ctrack, data);}/* Bring out ya dead! */static struct ip_conntrack_tuple_hash *get_next_corpse(int (*kill)(const struct ip_conntrack *i, void *data), void *data){ struct ip_conntrack_tuple_hash *h = NULL; unsigned int i; READ_LOCK(&ip_conntrack_lock); for (i = 0; !h && i < ip_conntrack_htable_size; i++) { h = LIST_FIND(&ip_conntrack_hash[i], do_kill, struct ip_conntrack_tuple_hash *, kill, data); } if (h) atomic_inc(&h->ctrack->ct_general.use); READ_UNLOCK(&ip_conntrack_lock); return h;}voidip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data), void *data){ struct ip_conntrack_tuple_hash *h; /* This is order n^2, by the way. */ while ((h = get_next_corpse(kill, data)) != NULL) { /* Time to push up daises... */ if (del_timer(&h->ctrack->timeout)) death_by_timeout((unsigned long)h->ctrack); /* ... else the timer will get him soon. */ ip_conntrack_put(h->ctrack); }}/* Fast function for those who don't want to parse /proc (and I don't blame them). *//* Reversing the socket's dst/src point of view gives us the reply mapping. */static intgetorigdst(struct sock *sk, int optval, void *user, int *len){ struct ip_conntrack_tuple_hash *h; struct ip_conntrack_tuple tuple = { { sk->rcv_saddr, { sk->sport } }, { sk->daddr, { sk->dport }, IPPROTO_TCP } }; /* We only do TCP at the moment: is there a better way? */ if (strcmp(sk->prot->name, "TCP") != 0) { DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); return -ENOPROTOOPT; } if ((unsigned int) *len < sizeof(struct sockaddr_in)) { DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", *len, sizeof(struct sockaddr_in)); return -EINVAL; } h = ip_conntrack_find_get(&tuple, NULL); if (h) { struct sockaddr_in sin; sin.sin_family = AF_INET; sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.dst.u.tcp.port; sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.dst.ip; DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); ip_conntrack_put(h->ctrack); if (copy_to_user(user, &sin, sizeof(sin)) != 0) return -EFAULT; else return 0; } DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port), NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port)); return -ENOENT;}static struct nf_sockopt_ops so_getorigdst= { { NULL, NULL }, PF_INET, 0, 0, NULL, /* Setsockopts */ SO_ORIGINAL_DST, SO_ORIGINAL_DST+1, &getorigdst, 0, NULL };#define NET_IP_CONNTRACK_MAX 2089#define NET_IP_CONNTRACK_MAX_NAME "ip_conntrack_max"#ifdef CONFIG_SYSCTLstatic struct ctl_table_header *ip_conntrack_sysctl_header;static ctl_table ip_conntrack_table[] = { { NET_IP_CONNTRACK_MAX, NET_IP_CONNTRACK_MAX_NAME, &ip_conntrack_max, sizeof(ip_conntrack_max), 0644, NULL, proc_dointvec }, { 0 }};static ctl_table ip_conntrack_dir_table[] = { {NET_IPV4, "ipv4", NULL, 0, 0555, ip_conntrack_table, 0, 0, 0, 0, 0}, { 0 }};static ctl_table ip_conntrack_root_table[] = { {CTL_NET, "net", NULL, 0, 0555, ip_conntrack_dir_table, 0, 0, 0, 0, 0}, { 0 }};#endif /*CONFIG_SYSCTL*/static int kill_all(const struct ip_conntrack *i, void *data){ return 1;}/* Mishearing the voices in his head, our hero wonders how he's supposed to kill the mall. */void ip_conntrack_cleanup(void){#ifdef CONFIG_SYSCTL unregister_sysctl_table(ip_conntrack_sysctl_header);#endif ip_ct_attach = NULL; /* This makes sure all current packets have passed through netfilter framework. Roll on, two-stage module delete... */ br_write_lock_bh(BR_NETPROTO_LOCK); br_write_unlock_bh(BR_NETPROTO_LOCK); i_see_dead_people: ip_ct_selective_cleanup(kill_all, NULL); if (atomic_read(&ip_conntrack_count) != 0) { schedule(); goto i_see_dead_people; } kmem_cache_destroy(ip_conntrack_cachep); vfree(ip_conntrack_hash); nf_unregister_sockopt(&so_getorigdst);}static int hashsize = 0;MODULE_PARM(hashsize, "i");int __init ip_conntrack_init(void){ unsigned int i; int ret; /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ if (hashsize) { ip_conntrack_htable_size = hashsize; } else { ip_conntrack_htable_size = (((num_physpages << PAGE_SHIFT) / 16384) / sizeof(struct list_head)); if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) ip_conntrack_htable_size = 8192; if (ip_conntrack_htable_size < 16) ip_conntrack_htable_size = 16; } ip_conntrack_max = 8 * ip_conntrack_htable_size; printk("ip_conntrack version %s (%u buckets, %d max)" " - %d bytes per conntrack\n", IP_CONNTRACK_VERSION, ip_conntrack_htable_size, ip_conntrack_max, sizeof(struct ip_conntrack)); ret = nf_register_sockopt(&so_getorigdst); if (ret != 0) { printk(KERN_ERR "Unable to register netfilter socket option\n"); return ret; } ip_conntrack_hash = vmalloc(sizeof(struct list_head) * ip_conntrack_htable_size); if (!ip_conntrack_hash) { printk(KERN_ERR "Unable to create ip_conntrack_hash\n"); goto err_unreg_sockopt; } ip_conntrack_cachep = kmem_cache_create("ip_conntrack", sizeof(struct ip_conntrack), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!ip_conntrack_cachep) { printk(KERN_ERR "Unable to create ip_conntrack slab cache\n"); goto err_free_hash; } /* Don't NEED lock here, but good form anyway. */ WRITE_LOCK(&ip_conntrack_lock); /* Sew in builtin protocols. */ list_append(&protocol_list, &ip_conntrack_protocol_tcp); list_append(&protocol_list, &ip_conntrack_protocol_udp); list_append(&protocol_list, &ip_conntrack_protocol_icmp); WRITE_UNLOCK(&ip_conntrack_lock); for (i = 0; i < ip_conntrack_htable_size; i++) INIT_LIST_HEAD(&ip_conntrack_hash[i]);/* This is fucking braindead. There is NO WAY of doing this without the CONFIG_SYSCTL unless you don't want to detect errors. Grrr... --RR */#ifdef CONFIG_SYSCTL ip_conntrack_sysctl_header = register_sysctl_table(ip_conntrack_root_table, 0); if (ip_conntrack_sysctl_header == NULL) { goto err_free_ct_cachep; }#endif /*CONFIG_SYSCTL*/ /* For use by ipt_REJECT */ ip_ct_attach = ip_conntrack_attach; return ret;err_free_ct_cachep: kmem_cache_destroy(ip_conntrack_cachep);err_free_hash: vfree(ip_conntrack_hash);err_unreg_sockopt: nf_unregister_sockopt(&so_getorigdst); return -ENOMEM;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -