📄 ip_conntrack_core.c
字号:
ip_conntrack_expect_insert(expect); ip_conntrack_expect_event(IPEXP_NEW, expect); ret = 0;out: write_unlock_bh(&ip_conntrack_lock); return ret;}/* Alter reply tuple (maybe alter helper). This is for NAT, and is implicitly racy: see __ip_conntrack_confirm */void ip_conntrack_alter_reply(struct ip_conntrack *conntrack, const struct ip_conntrack_tuple *newreply){ write_lock_bh(&ip_conntrack_lock); /* Should be unconfirmed, so not in hash table yet */ IP_NF_ASSERT(!is_confirmed(conntrack)); DEBUGP("Altering reply tuple of %p to ", conntrack); DUMP_TUPLE(newreply); conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; if (!conntrack->master && conntrack->expecting == 0) conntrack->helper = __ip_conntrack_helper_find(newreply); write_unlock_bh(&ip_conntrack_lock);}int ip_conntrack_helper_register(struct ip_conntrack_helper *me){ BUG_ON(me->timeout == 0); write_lock_bh(&ip_conntrack_lock); list_prepend(&helpers, me); write_unlock_bh(&ip_conntrack_lock); return 0;}struct ip_conntrack_helper *__ip_conntrack_helper_find_byname(const char *name){ struct ip_conntrack_helper *h; list_for_each_entry(h, &helpers, list) { if (!strcmp(h->name, name)) return h; } return NULL;}static inline int unhelp(struct ip_conntrack_tuple_hash *i, const struct ip_conntrack_helper *me){ if (tuplehash_to_ctrack(i)->helper == me) { ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); tuplehash_to_ctrack(i)->helper = NULL; } return 0;}void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me){ unsigned int i; struct ip_conntrack_expect *exp, *tmp; /* Need write lock here, to delete helper. */ write_lock_bh(&ip_conntrack_lock); LIST_DELETE(&helpers, me); /* Get rid of expectations */ list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { if (exp->master->helper == me && del_timer(&exp->timeout)) { ip_ct_unlink_expect(exp); ip_conntrack_expect_put(exp); } } /* Get rid of expecteds, set helpers to NULL. */ LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me); for (i = 0; i < ip_conntrack_htable_size; i++) LIST_FIND_W(&ip_conntrack_hash[i], unhelp, struct ip_conntrack_tuple_hash *, me); write_unlock_bh(&ip_conntrack_lock); /* Someone could be still looking at the helper in a bh. */ synchronize_net();}/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */void __ip_ct_refresh_acct(struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, unsigned long extra_jiffies, int do_acct){ int event = 0; IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); IP_NF_ASSERT(skb); write_lock_bh(&ip_conntrack_lock); /* If not in hash table, timer will not be active yet */ if (!is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; event = IPCT_REFRESH; } else { /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); event = IPCT_REFRESH; } }#ifdef CONFIG_IP_NF_CT_ACCT if (do_acct) { ct->counters[CTINFO2DIR(ctinfo)].packets++; ct->counters[CTINFO2DIR(ctinfo)].bytes += ntohs(skb->nh.iph->tot_len); if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) event |= IPCT_COUNTER_FILLING; }#endif write_unlock_bh(&ip_conntrack_lock); /* must be unlocked when calling event cache */ if (event) ip_conntrack_event_cache(event, skb);}#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be * in ip_conntrack_core, since we don't want the protocols to autoload * or depend on ctnetlink */int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb, const struct ip_conntrack_tuple *tuple){ NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), &tuple->src.u.tcp.port); NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), &tuple->dst.u.tcp.port); return 0;nfattr_failure: return -1;}int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], struct ip_conntrack_tuple *t){ if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1]) return -EINVAL; t->src.u.tcp.port = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); t->dst.u.tcp.port = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); return 0;}#endif/* Returns new sk_buff, or NULL */struct sk_buff *ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user){ skb_orphan(skb); local_bh_disable(); skb = ip_defrag(skb, user); local_bh_enable(); if (skb) ip_send_check(skb->nh.iph); return skb;}/* Used by ipt_REJECT. */static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb){ struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; /* This ICMP is in reverse direction to the packet which caused it */ ct = ip_conntrack_get(skb, &ctinfo); if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; else ctinfo = IP_CT_RELATED; /* Attach to new skbuff, and increment count */ nskb->nfct = &ct->ct_general; nskb->nfctinfo = ctinfo; nf_conntrack_get(nskb->nfct);}static inline intdo_iter(const struct ip_conntrack_tuple_hash *i, int (*iter)(struct ip_conntrack *i, void *data), void *data){ return iter(tuplehash_to_ctrack(i), data);}/* Bring out ya dead! */static struct ip_conntrack_tuple_hash *get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data), void *data, unsigned int *bucket){ struct ip_conntrack_tuple_hash *h = NULL; write_lock_bh(&ip_conntrack_lock); for (; *bucket < ip_conntrack_htable_size; (*bucket)++) { h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter, struct ip_conntrack_tuple_hash *, iter, data); if (h) break; } if (!h) h = LIST_FIND_W(&unconfirmed, do_iter, struct ip_conntrack_tuple_hash *, iter, data); if (h) atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); write_unlock_bh(&ip_conntrack_lock); return h;}voidip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data){ struct ip_conntrack_tuple_hash *h; unsigned int bucket = 0; while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { struct ip_conntrack *ct = tuplehash_to_ctrack(h); /* Time to push up daises... */ if (del_timer(&ct->timeout)) death_by_timeout((unsigned long)ct); /* ... else the timer will get him soon. */ ip_conntrack_put(ct); }}/* Fast function for those who don't want to parse /proc (and I don't blame them). *//* Reversing the socket's dst/src point of view gives us the reply mapping. */static intgetorigdst(struct sock *sk, int optval, void __user *user, int *len){ struct inet_sock *inet = inet_sk(sk); struct ip_conntrack_tuple_hash *h; struct ip_conntrack_tuple tuple; IP_CT_TUPLE_U_BLANK(&tuple); tuple.src.ip = inet->rcv_saddr; tuple.src.u.tcp.port = inet->sport; tuple.dst.ip = inet->daddr; tuple.dst.u.tcp.port = inet->dport; tuple.dst.protonum = IPPROTO_TCP; /* We only do TCP at the moment: is there a better way? */ if (strcmp(sk->sk_prot->name, "TCP")) { DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); return -ENOPROTOOPT; } if ((unsigned int) *len < sizeof(struct sockaddr_in)) { DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", *len, sizeof(struct sockaddr_in)); return -EINVAL; } h = ip_conntrack_find_get(&tuple, NULL); if (h) { struct sockaddr_in sin; struct ip_conntrack *ct = tuplehash_to_ctrack(h); sin.sin_family = AF_INET; sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.dst.u.tcp.port; sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.dst.ip; DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); ip_conntrack_put(ct); if (copy_to_user(user, &sin, sizeof(sin)) != 0) return -EFAULT; else return 0; } DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port), NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port)); return -ENOENT;}static struct nf_sockopt_ops so_getorigdst = { .pf = PF_INET, .get_optmin = SO_ORIGINAL_DST, .get_optmax = SO_ORIGINAL_DST+1, .get = &getorigdst,};static int kill_all(struct ip_conntrack *i, void *data){ return 1;}void ip_conntrack_flush(void){ ip_ct_iterate_cleanup(kill_all, NULL);}static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size){ if (vmalloced) vfree(hash); else free_pages((unsigned long)hash, get_order(sizeof(struct list_head) * size));}/* Mishearing the voices in his head, our hero wonders how he's supposed to kill the mall. */void ip_conntrack_cleanup(void){ ip_ct_attach = NULL; /* This makes sure all current packets have passed through netfilter framework. Roll on, two-stage module delete... */ synchronize_net(); ip_ct_event_cache_flush(); i_see_dead_people: ip_conntrack_flush(); if (atomic_read(&ip_conntrack_count) != 0) { schedule(); goto i_see_dead_people; } /* wait until all references to ip_conntrack_untracked are dropped */ while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1) schedule(); kmem_cache_destroy(ip_conntrack_cachep); kmem_cache_destroy(ip_conntrack_expect_cachep); free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, ip_conntrack_htable_size); nf_unregister_sockopt(&so_getorigdst);}static struct list_head *alloc_hashtable(int size, int *vmalloced){ struct list_head *hash; unsigned int i; *vmalloced = 0; hash = (void*)__get_free_pages(GFP_KERNEL, get_order(sizeof(struct list_head) * size)); if (!hash) { *vmalloced = 1; printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n"); hash = vmalloc(sizeof(struct list_head) * size); } if (hash) for (i = 0; i < size; i++) INIT_LIST_HEAD(&hash[i]); return hash;}static int set_hashsize(const char *val, struct kernel_param *kp){ int i, bucket, hashsize, vmalloced; int old_vmalloced, old_size; int rnd; struct list_head *hash, *old_hash; struct ip_conntrack_tuple_hash *h; /* On boot, we can set this without any fancy locking. */ if (!ip_conntrack_htable_size) return param_set_int(val, kp); hashsize = simple_strtol(val, NULL, 0); if (!hashsize) return -EINVAL; hash = alloc_hashtable(hashsize, &vmalloced); if (!hash) return -ENOMEM; /* We have to rehash for the new table anyway, so we also can * use a new random seed */ get_random_bytes(&rnd, 4); write_lock_bh(&ip_conntrack_lock); for (i = 0; i < ip_conntrack_htable_size; i++) { while (!list_empty(&ip_conntrack_hash[i])) { h = list_entry(ip_conntrack_hash[i].next, struct ip_conntrack_tuple_hash, list); list_del(&h->list); bucket = __hash_conntrack(&h->tuple, hashsize, rnd); list_add_tail(&h->list, &hash[bucket]); } } old_size = ip_conntrack_htable_size; old_vmalloced = ip_conntrack_vmalloc; old_hash = ip_conntrack_hash; ip_conntrack_htable_size = hashsize; ip_conntrack_vmalloc = vmalloced; ip_conntrack_hash = hash; ip_conntrack_hash_rnd = rnd; write_unlock_bh(&ip_conntrack_lock); free_conntrack_hash(old_hash, old_vmalloced, old_size); return 0;}module_param_call(hashsize, set_hashsize, param_get_uint, &ip_conntrack_htable_size, 0600);int __init ip_conntrack_init(void){ unsigned int i; int ret; /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ if (!ip_conntrack_htable_size) { ip_conntrack_htable_size = (((num_physpages << PAGE_SHIFT) / 16384) / sizeof(struct list_head)); if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) ip_conntrack_htable_size = 8192; if (ip_conntrack_htable_size < 16) ip_conntrack_htable_size = 16; } ip_conntrack_max = 8 * ip_conntrack_htable_size; printk("ip_conntrack version %s (%u buckets, %d max)" " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION, ip_conntrack_htable_size, ip_conntrack_max, sizeof(struct ip_conntrack)); ret = nf_register_sockopt(&so_getorigdst); if (ret != 0) { printk(KERN_ERR "Unable to register netfilter socket option\n"); return ret; } ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size, &ip_conntrack_vmalloc); if (!ip_conntrack_hash) { printk(KERN_ERR "Unable to create ip_conntrack_hash\n"); goto err_unreg_sockopt; } ip_conntrack_cachep = kmem_cache_create("ip_conntrack", sizeof(struct ip_conntrack), 0, 0, NULL, NULL); if (!ip_conntrack_cachep) { printk(KERN_ERR "Unable to create ip_conntrack slab cache\n"); goto err_free_hash; } ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect", sizeof(struct ip_conntrack_expect), 0, 0, NULL, NULL); if (!ip_conntrack_expect_cachep) { printk(KERN_ERR "Unable to create ip_expect slab cache\n"); goto err_free_conntrack_slab; } /* Don't NEED lock here, but good form anyway. */ write_lock_bh(&ip_conntrack_lock); for (i = 0; i < MAX_IP_CT_PROTO; i++) ip_ct_protos[i] = &ip_conntrack_generic_protocol; /* Sew in builtin protocols. */ ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp; ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp; ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; write_unlock_bh(&ip_conntrack_lock); /* For use by ipt_REJECT */ ip_ct_attach = ip_conntrack_attach; /* Set up fake conntrack: - to never be deleted, not in any hashes */ atomic_set(&ip_conntrack_untracked.ct_general.use, 1); /* - and look it like as a confirmed connection */ set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status); return ret;err_free_conntrack_slab: kmem_cache_destroy(ip_conntrack_cachep);err_free_hash: free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, ip_conntrack_htable_size);err_unreg_sockopt: nf_unregister_sockopt(&so_getorigdst); return -ENOMEM;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -