ip_conntrack_core.c
来自「底层驱动开发」· C语言 代码 · 共 1,487 行 · 第 1/3 页
C
1,487 行
if (!del_timer(&i->timeout)) return 0; i->timeout.expires = jiffies + i->master->helper->timeout*HZ; add_timer(&i->timeout); return 1;}int ip_conntrack_expect_related(struct ip_conntrack_expect *expect){ struct ip_conntrack_expect *i; int ret; DEBUGP("ip_conntrack_expect_related %p\n", related_to); DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple); DEBUGP("mask: "); DUMP_TUPLE(&expect->mask); write_lock_bh(&ip_conntrack_lock); list_for_each_entry(i, &ip_conntrack_expect_list, list) { if (expect_matches(i, expect)) { /* Refresh timer: if it's dying, ignore.. */ if (refresh_timer(i)) { ret = 0; goto out; } } else if (expect_clash(i, expect)) { ret = -EBUSY; goto out; } } /* Will be over limit? */ if (expect->master->helper->max_expected && expect->master->expecting >= expect->master->helper->max_expected) evict_oldest_expect(expect->master); ip_conntrack_expect_insert(expect); ip_conntrack_expect_event(IPEXP_NEW, expect); ret = 0;out: write_unlock_bh(&ip_conntrack_lock); return ret;}/* Alter reply tuple (maybe alter helper). This is for NAT, and is implicitly racy: see __ip_conntrack_confirm */void ip_conntrack_alter_reply(struct ip_conntrack *conntrack, const struct ip_conntrack_tuple *newreply){ write_lock_bh(&ip_conntrack_lock); /* Should be unconfirmed, so not in hash table yet */ IP_NF_ASSERT(!is_confirmed(conntrack)); DEBUGP("Altering reply tuple of %p to ", conntrack); DUMP_TUPLE(newreply); conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; if (!conntrack->master && conntrack->expecting == 0) conntrack->helper = __ip_conntrack_helper_find(newreply); write_unlock_bh(&ip_conntrack_lock);}int ip_conntrack_helper_register(struct ip_conntrack_helper *me){ BUG_ON(me->timeout == 0); write_lock_bh(&ip_conntrack_lock); list_prepend(&helpers, me); write_unlock_bh(&ip_conntrack_lock); return 0;}struct ip_conntrack_helper *__ip_conntrack_helper_find_byname(const char *name){ struct ip_conntrack_helper *h; list_for_each_entry(h, &helpers, list) { if (!strcmp(h->name, name)) return h; } return NULL;}static inline int unhelp(struct ip_conntrack_tuple_hash *i, const struct ip_conntrack_helper *me){ if (tuplehash_to_ctrack(i)->helper == me) { ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); tuplehash_to_ctrack(i)->helper = NULL; } return 0;}void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me){ unsigned int i; struct ip_conntrack_expect *exp, *tmp; /* Need write lock here, to delete helper. */ write_lock_bh(&ip_conntrack_lock); LIST_DELETE(&helpers, me); /* Get rid of expectations */ list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { if (exp->master->helper == me && del_timer(&exp->timeout)) { ip_ct_unlink_expect(exp); ip_conntrack_expect_put(exp); } } /* Get rid of expecteds, set helpers to NULL. */ LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me); for (i = 0; i < ip_conntrack_htable_size; i++) LIST_FIND_W(&ip_conntrack_hash[i], unhelp, struct ip_conntrack_tuple_hash *, me); write_unlock_bh(&ip_conntrack_lock); /* Someone could be still looking at the helper in a bh. */ synchronize_net();}/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */void __ip_ct_refresh_acct(struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, unsigned long extra_jiffies, int do_acct){ int event = 0; IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); IP_NF_ASSERT(skb); write_lock_bh(&ip_conntrack_lock); /* If not in hash table, timer will not be active yet */ if (!is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; event = IPCT_REFRESH; } else { /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); event = IPCT_REFRESH; } }#ifdef CONFIG_IP_NF_CT_ACCT if (do_acct) { ct->counters[CTINFO2DIR(ctinfo)].packets++; ct->counters[CTINFO2DIR(ctinfo)].bytes += ntohs(skb->nh.iph->tot_len); if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) event |= IPCT_COUNTER_FILLING; }#endif write_unlock_bh(&ip_conntrack_lock); /* must be unlocked when calling event cache */ if (event) ip_conntrack_event_cache(event, skb);}#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be * in ip_conntrack_core, since we don't want the protocols to autoload * or depend on ctnetlink */int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb, const struct ip_conntrack_tuple *tuple){ NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), &tuple->src.u.tcp.port); NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), &tuple->dst.u.tcp.port); return 0;nfattr_failure: return -1;}int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], struct ip_conntrack_tuple *t){ if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1]) return -EINVAL; t->src.u.tcp.port = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); t->dst.u.tcp.port = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); return 0;}#endif/* Returns new sk_buff, or NULL */struct sk_buff *ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user){ skb_orphan(skb); local_bh_disable(); skb = ip_defrag(skb, user); local_bh_enable(); if (skb) ip_send_check(skb->nh.iph); return skb;}/* Used by ipt_REJECT. */static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb){ struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; /* This ICMP is in reverse direction to the packet which caused it */ ct = ip_conntrack_get(skb, &ctinfo); if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; else ctinfo = IP_CT_RELATED; /* Attach to new skbuff, and increment count */ nskb->nfct = &ct->ct_general; nskb->nfctinfo = ctinfo; nf_conntrack_get(nskb->nfct);}static inline intdo_iter(const struct ip_conntrack_tuple_hash *i, int (*iter)(struct ip_conntrack *i, void *data), void *data){ return iter(tuplehash_to_ctrack(i), data);}/* Bring out ya dead! */static struct ip_conntrack_tuple_hash *get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data), void *data, unsigned int *bucket){ struct ip_conntrack_tuple_hash *h = NULL; write_lock_bh(&ip_conntrack_lock); for (; *bucket < ip_conntrack_htable_size; (*bucket)++) { h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter, struct ip_conntrack_tuple_hash *, iter, data); if (h) break; } if (!h) h = LIST_FIND_W(&unconfirmed, do_iter, struct ip_conntrack_tuple_hash *, iter, data); if (h) atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); write_unlock_bh(&ip_conntrack_lock); return h;}voidip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data){ struct ip_conntrack_tuple_hash *h; unsigned int bucket = 0; while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { struct ip_conntrack *ct = tuplehash_to_ctrack(h); /* Time to push up daises... */ if (del_timer(&ct->timeout)) death_by_timeout((unsigned long)ct); /* ... else the timer will get him soon. */ ip_conntrack_put(ct); }}/* Fast function for those who don't want to parse /proc (and I don't blame them). *//* Reversing the socket's dst/src point of view gives us the reply mapping. */static intgetorigdst(struct sock *sk, int optval, void __user *user, int *len){ struct inet_sock *inet = inet_sk(sk); struct ip_conntrack_tuple_hash *h; struct ip_conntrack_tuple tuple; IP_CT_TUPLE_U_BLANK(&tuple); tuple.src.ip = inet->rcv_saddr; tuple.src.u.tcp.port = inet->sport; tuple.dst.ip = inet->daddr; tuple.dst.u.tcp.port = inet->dport; tuple.dst.protonum = IPPROTO_TCP; /* We only do TCP at the moment: is there a better way? */ if (strcmp(sk->sk_prot->name, "TCP")) { DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); return -ENOPROTOOPT; } if ((unsigned int) *len < sizeof(struct sockaddr_in)) { DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", *len, sizeof(struct sockaddr_in)); return -EINVAL; } h = ip_conntrack_find_get(&tuple, NULL); if (h) { struct sockaddr_in sin; struct ip_conntrack *ct = tuplehash_to_ctrack(h); sin.sin_family = AF_INET; sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.dst.u.tcp.port; sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.dst.ip; DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); ip_conntrack_put(ct); if (copy_to_user(user, &sin, sizeof(sin)) != 0) return -EFAULT; else return 0; } DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port), NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port)); return -ENOENT;}static struct nf_sockopt_ops so_getorigdst = { .pf = PF_INET, .get_optmin = SO_ORIGINAL_DST, .get_optmax = SO_ORIGINAL_DST+1, .get = &getorigdst,};static int kill_all(struct ip_conntrack *i, void *data){ return 1;}static void free_conntrack_hash(void){ if (ip_conntrack_vmalloc) vfree(ip_conntrack_hash); else free_pages((unsigned long)ip_conntrack_hash, get_order(sizeof(struct list_head) * ip_conntrack_htable_size));}void ip_conntrack_flush(){ /* This makes sure all current packets have passed through netfilter framework. Roll on, two-stage module delete... */ synchronize_net(); ip_ct_event_cache_flush(); i_see_dead_people: ip_ct_iterate_cleanup(kill_all, NULL); if (atomic_read(&ip_conntrack_count) != 0) { schedule(); goto i_see_dead_people; } /* wait until all references to ip_conntrack_untracked are dropped */ while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1) schedule();}/* Mishearing the voices in his head, our hero wonders how he's supposed to kill the mall. */void ip_conntrack_cleanup(void){ ip_ct_attach = NULL; ip_conntrack_flush(); kmem_cache_destroy(ip_conntrack_cachep); kmem_cache_destroy(ip_conntrack_expect_cachep); free_conntrack_hash(); nf_unregister_sockopt(&so_getorigdst);}static int hashsize;module_param(hashsize, int, 0400);int __init ip_conntrack_init(void){ unsigned int i; int ret; /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ if (hashsize) { ip_conntrack_htable_size = hashsize; } else { ip_conntrack_htable_size = (((num_physpages << PAGE_SHIFT) / 16384) / sizeof(struct list_head)); if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) ip_conntrack_htable_size = 8192; if (ip_conntrack_htable_size < 16) ip_conntrack_htable_size = 16; } ip_conntrack_max = 8 * ip_conntrack_htable_size; printk("ip_conntrack version %s (%u buckets, %d max)" " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION, ip_conntrack_htable_size, ip_conntrack_max, sizeof(struct ip_conntrack)); ret = nf_register_sockopt(&so_getorigdst); if (ret != 0) { printk(KERN_ERR "Unable to register netfilter socket option\n"); return ret; } /* AK: the hash table is twice as big than needed because it uses list_head. it would be much nicer to caches to use a single pointer list head here. */ ip_conntrack_vmalloc = 0; ip_conntrack_hash =(void*)__get_free_pages(GFP_KERNEL, get_order(sizeof(struct list_head) *ip_conntrack_htable_size)); if (!ip_conntrack_hash) { ip_conntrack_vmalloc = 1; printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n"); ip_conntrack_hash = vmalloc(sizeof(struct list_head) * ip_conntrack_htable_size); } if (!ip_conntrack_hash) { printk(KERN_ERR "Unable to create ip_conntrack_hash\n"); goto err_unreg_sockopt; } ip_conntrack_cachep = kmem_cache_create("ip_conntrack", sizeof(struct ip_conntrack), 0, 0, NULL, NULL); if (!ip_conntrack_cachep) { printk(KERN_ERR "Unable to create ip_conntrack slab cache\n"); goto err_free_hash; } ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect", sizeof(struct ip_conntrack_expect), 0, 0, NULL, NULL); if (!ip_conntrack_expect_cachep) { printk(KERN_ERR "Unable to create ip_expect slab cache\n"); goto err_free_conntrack_slab; } /* Don't NEED lock here, but good form anyway. */ write_lock_bh(&ip_conntrack_lock); for (i = 0; i < MAX_IP_CT_PROTO; i++) ip_ct_protos[i] = &ip_conntrack_generic_protocol; /* Sew in builtin protocols. */ ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp; ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp; ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; write_unlock_bh(&ip_conntrack_lock); for (i = 0; i < ip_conntrack_htable_size; i++) INIT_LIST_HEAD(&ip_conntrack_hash[i]); /* For use by ipt_REJECT */ ip_ct_attach = ip_conntrack_attach; /* Set up fake conntrack: - to never be deleted, not in any hashes */ atomic_set(&ip_conntrack_untracked.ct_general.use, 1); /* - and look it like as a confirmed connection */ set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status); return ret;err_free_conntrack_slab: kmem_cache_destroy(ip_conntrack_cachep);err_free_hash: free_conntrack_hash();err_unreg_sockopt: nf_unregister_sockopt(&so_getorigdst); return -ENOMEM;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?