📄 ipt_clusterip.c
字号:
/* Cluster IP hashmark target * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> * based on ideas of Fabio Olive Leite <olive@unixforge.org> * * Development of this code funded by SuSE Linux AG, http://www.suse.com/ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * */#include <linux/module.h>#include <linux/config.h>#include <linux/proc_fs.h>#include <linux/jhash.h>#include <linux/bitops.h>#include <linux/skbuff.h>#include <linux/ip.h>#include <linux/tcp.h>#include <linux/udp.h>#include <linux/icmp.h>#include <linux/if_arp.h>#include <linux/proc_fs.h>#include <linux/seq_file.h>#include <net/checksum.h>#include <linux/netfilter_arp.h>#include <linux/netfilter_ipv4/ip_tables.h>#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>#include <net/netfilter/nf_conntrack_compat.h>#define CLUSTERIP_VERSION "0.8"#define DEBUG_CLUSTERIP#ifdef DEBUG_CLUSTERIP#define DEBUGP printk#else#define DEBUGP#endif#define ASSERT_READ_LOCK(x)MODULE_LICENSE("GPL");MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");MODULE_DESCRIPTION("iptables target for CLUSTERIP");struct clusterip_config { struct list_head list; /* list of all configs */ atomic_t refcount; /* reference count */ atomic_t entries; /* number of entries/rules * referencing us */ u_int32_t clusterip; /* the IP address */ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ struct net_device *dev; /* device */ u_int16_t num_total_nodes; /* total number of nodes */ unsigned long local_nodes; /* node number array */#ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; /* proc dir entry */#endif enum clusterip_hashmode hash_mode; /* which hashing mode */ u_int32_t hash_initval; /* hash initialization */};static LIST_HEAD(clusterip_configs);/* clusterip_lock protects the clusterip_configs list */static DEFINE_RWLOCK(clusterip_lock);#ifdef CONFIG_PROC_FSstatic struct file_operations clusterip_proc_fops;static struct proc_dir_entry *clusterip_procdir;#endifstatic inline voidclusterip_config_get(struct clusterip_config *c){ atomic_inc(&c->refcount);}static inline voidclusterip_config_put(struct clusterip_config *c){ if (atomic_dec_and_test(&c->refcount)) kfree(c);}/* increase the count of entries(rules) using/referencing this config */static inline voidclusterip_config_entry_get(struct clusterip_config *c){ atomic_inc(&c->entries);}/* decrease the count of entries using/referencing this config. If last * entry(rule) is removed, remove the config from lists, but don't free it * yet, since proc-files could still be holding references */static inline voidclusterip_config_entry_put(struct clusterip_config *c){ if (atomic_dec_and_test(&c->entries)) { write_lock_bh(&clusterip_lock); list_del(&c->list); write_unlock_bh(&clusterip_lock); dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); dev_put(c->dev); /* In case anyone still accesses the file, the open/close * functions are also incrementing the refcount on their own, * so it's safe to remove the entry even if it's in use. */#ifdef CONFIG_PROC_FS remove_proc_entry(c->pde->name, c->pde->parent);#endif }}static struct clusterip_config *__clusterip_config_find(u_int32_t clusterip){ struct list_head *pos; ASSERT_READ_LOCK(&clusterip_lock); list_for_each(pos, &clusterip_configs) { struct clusterip_config *c = list_entry(pos, struct clusterip_config, list); if (c->clusterip == clusterip) { return c; } } return NULL;}static inline struct clusterip_config *clusterip_config_find_get(u_int32_t clusterip, int entry){ struct clusterip_config *c; read_lock_bh(&clusterip_lock); c = __clusterip_config_find(clusterip); if (!c) { read_unlock_bh(&clusterip_lock); return NULL; } atomic_inc(&c->refcount); if (entry) atomic_inc(&c->entries); read_unlock_bh(&clusterip_lock); return c;}static voidclusterip_config_init_nodelist(struct clusterip_config *c, const struct ipt_clusterip_tgt_info *i){ int n; for (n = 0; n < i->num_local_nodes; n++) { set_bit(i->local_nodes[n] - 1, &c->local_nodes); }}static struct clusterip_config *clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, struct net_device *dev){ struct clusterip_config *c; char buffer[16]; c = kmalloc(sizeof(*c), GFP_ATOMIC); if (!c) return NULL; memset(c, 0, sizeof(*c)); c->dev = dev; c->clusterip = ip; memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); c->num_total_nodes = i->num_total_nodes; clusterip_config_init_nodelist(c, i); c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; atomic_set(&c->refcount, 1); atomic_set(&c->entries, 1);#ifdef CONFIG_PROC_FS /* create proc dir entry */ sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR, clusterip_procdir); if (!c->pde) { kfree(c); return NULL; } c->pde->proc_fops = &clusterip_proc_fops; c->pde->data = c;#endif write_lock_bh(&clusterip_lock); list_add(&c->list, &clusterip_configs); write_unlock_bh(&clusterip_lock); return c;}static intclusterip_add_node(struct clusterip_config *c, u_int16_t nodenum){ if (nodenum == 0 || nodenum > c->num_total_nodes) return 1; /* check if we already have this number in our bitfield */ if (test_and_set_bit(nodenum - 1, &c->local_nodes)) return 1; return 0;}static intclusterip_del_node(struct clusterip_config *c, u_int16_t nodenum){ if (nodenum == 0 || nodenum > c->num_total_nodes) return 1; if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) return 0; return 1;}static inline u_int32_tclusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config){ struct iphdr *iph = skb->nh.iph; unsigned long hashval; u_int16_t sport, dport; struct tcphdr *th; struct udphdr *uh; struct icmphdr *ih; switch (iph->protocol) { case IPPROTO_TCP: th = (void *)iph+iph->ihl*4; sport = ntohs(th->source); dport = ntohs(th->dest); break; case IPPROTO_UDP: uh = (void *)iph+iph->ihl*4; sport = ntohs(uh->source); dport = ntohs(uh->dest); break; case IPPROTO_ICMP: ih = (void *)iph+iph->ihl*4; sport = ntohs(ih->un.echo.id); dport = (ih->type<<8)|ih->code; break; default: if (net_ratelimit()) { printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n", iph->protocol); } sport = dport = 0; } switch (config->hash_mode) { case CLUSTERIP_HASHMODE_SIP: hashval = jhash_1word(ntohl(iph->saddr), config->hash_initval); break; case CLUSTERIP_HASHMODE_SIP_SPT: hashval = jhash_2words(ntohl(iph->saddr), sport, config->hash_initval); break; case CLUSTERIP_HASHMODE_SIP_SPT_DPT: hashval = jhash_3words(ntohl(iph->saddr), sport, dport, config->hash_initval); break; default: /* to make gcc happy */ hashval = 0; /* This cannot happen, unless the check function wasn't called * at rule load time */ printk("CLUSTERIP: unknown mode `%u'\n", config->hash_mode); BUG(); break; } /* node numbers are 1..n, not 0..n */ return ((hashval % config->num_total_nodes)+1);}static inline intclusterip_responsible(struct clusterip_config *config, u_int32_t hash){ return test_bit(hash - 1, &config->local_nodes);}/*********************************************************************** * IPTABLES TARGET ***********************************************************************/static unsigned inttarget(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const void *targinfo, void *userinfo){ const struct ipt_clusterip_tgt_info *cipinfo = targinfo; enum ip_conntrack_info ctinfo; u_int32_t *mark, hash; /* don't need to clusterip_config_get() here, since refcount * is only decremented by destroy() - and ip_tables guarantees * that the ->target() function isn't called after ->destroy() */ mark = nf_ct_get_mark((*pskb), &ctinfo); if (mark == NULL) { printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); /* FIXME: need to drop invalid ones, since replies * to outgoing connections of other nodes will be * marked as INVALID */ return NF_DROP; } /* special case: ICMP error handling. conntrack distinguishes between * error messages (RELATED) and information requests (see below) */ if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP && (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) return IPT_CONTINUE; /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here * on, which all have an ID field [relevant for hashing]. */ hash = clusterip_hashfn(*pskb, cipinfo->config); switch (ctinfo) { case IP_CT_NEW: *mark = hash; break; case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: /* FIXME: we don't handle expectations at the * moment. they can arrive on a different node than * the master connection (e.g. FTP passive mode) */ case IP_CT_ESTABLISHED: case IP_CT_ESTABLISHED+IP_CT_IS_REPLY: break; default: break; }#ifdef DEBUG_CLUSTERP DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);#endif DEBUGP("hash=%u ct_hash=%u ", hash, *mark); if (!clusterip_responsible(cipinfo->config, hash)) { DEBUGP("not responsible\n"); return NF_DROP; } DEBUGP("responsible\n"); /* despite being received via linklayer multicast, this is * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ (*pskb)->pkt_type = PACKET_HOST; return IPT_CONTINUE;}static intcheckentry(const char *tablename, const struct ipt_entry *e, void *targinfo, unsigned int targinfosize, unsigned int hook_mask){ struct ipt_clusterip_tgt_info *cipinfo = targinfo; struct clusterip_config *config; if (targinfosize != IPT_ALIGN(sizeof(struct ipt_clusterip_tgt_info))) { printk(KERN_WARNING "CLUSTERIP: targinfosize %u != %Zu\n", targinfosize, IPT_ALIGN(sizeof(struct ipt_clusterip_tgt_info)));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -