ip_vs_conn.c

来自「linux-2.4.29操作系统的源码」· C语言 代码 · 共 1,570 行 · 第 1/3 页

C
1,570
字号
/* * IPVS         An implementation of the IP virtual server support for the *              LINUX operating system.  IPVS is now implemented as a module *              over the Netfilter framework. IPVS can be used to build a *              high-performance and highly available server based on a *              cluster of servers. * * Version:     $Id: ip_vs_conn.c,v 1.28.2.5 2003/08/09 13:27:08 wensong Exp $ * * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org> *              Peter Kese <peter.kese@ijs.si> *              Julian Anastasov <ja@ssi.bg> * *              This program is free software; you can redistribute it and/or *              modify it under the terms of the GNU General Public License *              as published by the Free Software Foundation; either version *              2 of the License, or (at your option) any later version. * * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese, * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms * and others. Many code here is taken from IP MASQ code of kernel 2.2. * * Changes: * */#include <linux/module.h>#include <linux/kernel.h>#include <linux/vmalloc.h>#include <linux/ip.h>#include <linux/tcp.h>                  /* for tcphdr */#include <linux/in.h>#include <linux/proc_fs.h>              /* for proc_net_* */#include <asm/softirq.h>                /* for local_bh_* */#include <net/ip.h>#include <net/tcp.h>                    /* for csum_tcpudp_magic */#include <net/udp.h>#include <net/icmp.h>                   /* for icmp_send */#include <net/route.h>                  /* for ip_route_output */#include <linux/netfilter.h>#include <linux/netfilter_ipv4.h>#include <linux/jhash.h>#include <linux/random.h>#include <net/ip_vs.h>/* *  Connection hash table: for input and output packets lookups of IPVS */static struct list_head *ip_vs_conn_tab;/* SLAB cache for IPVS connections */static kmem_cache_t *ip_vs_conn_cachep;/* counter for current IPVS connections */static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);/* counter for no-client-port connections */static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);/* random value for IPVS connection hash */static unsigned int ip_vs_conn_rnd;/* *  Fine locking granularity for big connection hash table */#define CT_LOCKARRAY_BITS  4#define CT_LOCKARRAY_SIZE  (1<<CT_LOCKARRAY_BITS)#define CT_LOCKARRAY_MASK  (CT_LOCKARRAY_SIZE-1)struct ip_vs_aligned_lock{	rwlock_t	l;} __attribute__((__aligned__(SMP_CACHE_BYTES)));/* lock array for conn table */struct ip_vs_aligned_lock__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;static inline void ct_read_lock(unsigned key){	read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_read_unlock(unsigned key){	read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_write_lock(unsigned key){	write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_write_unlock(unsigned key){	write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_read_lock_bh(unsigned key){	read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_read_unlock_bh(unsigned key){	read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_write_lock_bh(unsigned key){	write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_write_unlock_bh(unsigned key){	write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}/* *	Returns hash value for IPVS connection entry */static unsignedip_vs_conn_hashkey(unsigned proto, __u32 addr, __u16 port){	return jhash_3words(addr, port, proto, ip_vs_conn_rnd)		& IP_VS_CONN_TAB_MASK;}/* *	Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. *	returns bool success. */static int ip_vs_conn_hash(struct ip_vs_conn *cp){	unsigned hash;	int ret;	/* Hash by protocol, client address and port */	hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);	ct_write_lock(hash);	if (!(cp->flags & IP_VS_CONN_F_HASHED)) {		list_add(&cp->c_list, &ip_vs_conn_tab[hash]);		cp->flags |= IP_VS_CONN_F_HASHED;		atomic_inc(&cp->refcnt);		ret = 1;	} else {		IP_VS_ERR("ip_vs_conn_hash(): request for already hashed, "			  "called from %p\n", __builtin_return_address(0));		ret = 0;	}	ct_write_unlock(hash);	return ret;}/* *	UNhashes ip_vs_conn from ip_vs_conn_tab. *	returns bool success. */static int ip_vs_conn_unhash(struct ip_vs_conn *cp){	unsigned hash;	int ret;	/* unhash it and decrease its reference counter */	hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);	ct_write_lock(hash);	if (cp->flags & IP_VS_CONN_F_HASHED) {		list_del(&cp->c_list);		cp->flags &= ~IP_VS_CONN_F_HASHED;		atomic_dec(&cp->refcnt);		ret = 1;	} else		ret = 0;	ct_write_unlock(hash);	return ret;}/* *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. *  Called for pkts coming from OUTside-to-INside. *	s_addr, s_port: pkt source address (foreign host) *	d_addr, d_port: pkt dest address (load balancer) */static inline struct ip_vs_conn *__ip_vs_conn_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port){	unsigned hash;	struct ip_vs_conn *cp;	struct list_head *l,*e;	hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);	l = &ip_vs_conn_tab[hash];	ct_read_lock(hash);	for (e=l->next; e!=l; e=e->next) {		cp = list_entry(e, struct ip_vs_conn, c_list);		if (s_addr==cp->caddr && s_port==cp->cport &&		    d_port==cp->vport && d_addr==cp->vaddr &&		    protocol==cp->protocol) {			/* HIT */			atomic_inc(&cp->refcnt);			ct_read_unlock(hash);			return cp;		}	}	ct_read_unlock(hash);	return NULL;}struct ip_vs_conn *ip_vs_conn_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port){	struct ip_vs_conn *cp;	cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port);	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))		cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);	IP_VS_DBG(7, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",		  ip_vs_proto_name(protocol),		  NIPQUAD(s_addr), ntohs(s_port),		  NIPQUAD(d_addr), ntohs(d_port),		  cp?"hit":"not hit");	return cp;}/* *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. *  Called for pkts coming from inside-to-OUTside. *	s_addr, s_port: pkt source address (inside host) *	d_addr, d_port: pkt dest address (foreign host) */struct ip_vs_conn *ip_vs_conn_out_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port){	unsigned hash;	struct ip_vs_conn *cp, *ret=NULL;	struct list_head *l,*e;	/*	 *	Check for "full" addressed entries	 */	hash = ip_vs_conn_hashkey(protocol, d_addr, d_port);	l = &ip_vs_conn_tab[hash];	ct_read_lock(hash);	for (e=l->next; e!=l; e=e->next) {		cp = list_entry(e, struct ip_vs_conn, c_list);		if (d_addr == cp->caddr && d_port == cp->cport &&		    s_port == cp->dport && s_addr == cp->daddr &&		    protocol == cp->protocol) {			/* HIT */			atomic_inc(&cp->refcnt);			ret = cp;			break;		}	}	ct_read_unlock(hash);	IP_VS_DBG(7, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",		  ip_vs_proto_name(protocol),		  NIPQUAD(s_addr), ntohs(s_port),		  NIPQUAD(d_addr), ntohs(d_port),		  ret?"hit":"not hit");	return ret;}/* *      Put back the conn and restart its timer with its timeout */void ip_vs_conn_put(struct ip_vs_conn *cp){	/* reset it expire in its timeout */	mod_timer(&cp->timer, jiffies+cp->timeout);	__ip_vs_conn_put(cp);}/* *	Timeout table[state] */struct ip_vs_timeout_table vs_timeout_table = {	ATOMIC_INIT(0),	/* refcnt */	0,		/* scale  */	{		[IP_VS_S_NONE]          =	30*60*HZ,		[IP_VS_S_ESTABLISHED]	=	15*60*HZ,		[IP_VS_S_SYN_SENT]	=	2*60*HZ,		[IP_VS_S_SYN_RECV]	=	1*60*HZ,		[IP_VS_S_FIN_WAIT]	=	2*60*HZ,		[IP_VS_S_TIME_WAIT]	=	2*60*HZ,		[IP_VS_S_CLOSE]         =	10*HZ,		[IP_VS_S_CLOSE_WAIT]	=	60*HZ,		[IP_VS_S_LAST_ACK]	=	30*HZ,		[IP_VS_S_LISTEN]	=	2*60*HZ,		[IP_VS_S_SYNACK]	=	120*HZ,		[IP_VS_S_UDP]		=	5*60*HZ,		[IP_VS_S_ICMP]          =	1*60*HZ,		[IP_VS_S_LAST]          =	2*HZ,	},	/* timeout */};struct ip_vs_timeout_table vs_timeout_table_dos = {	ATOMIC_INIT(0),	/* refcnt */	0,		/* scale  */	{		[IP_VS_S_NONE]          =	15*60*HZ,		[IP_VS_S_ESTABLISHED]	=	8*60*HZ,		[IP_VS_S_SYN_SENT]	=	60*HZ,		[IP_VS_S_SYN_RECV]	=	10*HZ,		[IP_VS_S_FIN_WAIT]	=	60*HZ,		[IP_VS_S_TIME_WAIT]	=	60*HZ,		[IP_VS_S_CLOSE]         =	10*HZ,		[IP_VS_S_CLOSE_WAIT]	=	60*HZ,		[IP_VS_S_LAST_ACK]	=	30*HZ,		[IP_VS_S_LISTEN]	=	2*60*HZ,		[IP_VS_S_SYNACK]	=	100*HZ,		[IP_VS_S_UDP]		=	3*60*HZ,		[IP_VS_S_ICMP]          =	1*60*HZ,		[IP_VS_S_LAST]          =	2*HZ,	},	/* timeout */};/* *	Timeout table to use for the VS entries *	If NULL we use the default table (vs_timeout_table). *	Under flood attack we switch to vs_timeout_table_dos */static struct ip_vs_timeout_table *ip_vs_timeout_table = &vs_timeout_table;static const char * state_name_table[IP_VS_S_LAST+1] = {	[IP_VS_S_NONE]          =	"NONE",	[IP_VS_S_ESTABLISHED]	=	"ESTABLISHED",	[IP_VS_S_SYN_SENT]	=	"SYN_SENT",	[IP_VS_S_SYN_RECV]	=	"SYN_RECV",	[IP_VS_S_FIN_WAIT]	=	"FIN_WAIT",	[IP_VS_S_TIME_WAIT]	=	"TIME_WAIT",	[IP_VS_S_CLOSE]         =	"CLOSE",	[IP_VS_S_CLOSE_WAIT]	=	"CLOSE_WAIT",	[IP_VS_S_LAST_ACK]	=	"LAST_ACK",	[IP_VS_S_LISTEN]	=	"LISTEN",	[IP_VS_S_SYNACK]	=	"SYNACK",	[IP_VS_S_UDP]		=	"UDP",	[IP_VS_S_ICMP]          =	"ICMP",	[IP_VS_S_LAST]          =	"BUG!",};#define sNO IP_VS_S_NONE#define sES IP_VS_S_ESTABLISHED#define sSS IP_VS_S_SYN_SENT#define sSR IP_VS_S_SYN_RECV#define sFW IP_VS_S_FIN_WAIT#define sTW IP_VS_S_TIME_WAIT#define sCL IP_VS_S_CLOSE#define sCW IP_VS_S_CLOSE_WAIT#define sLA IP_VS_S_LAST_ACK#define sLI IP_VS_S_LISTEN#define sSA IP_VS_S_SYNACKstruct vs_tcp_states_t {	int next_state[IP_VS_S_LAST];	/* should be _LAST_TCP */};const char * ip_vs_state_name(int state){	if (state >= IP_VS_S_LAST)		return "ERR!";	return state_name_table[state] ? state_name_table[state] : "?";}static struct vs_tcp_states_t vs_tcp_states [] = {/*	INPUT *//*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*//*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},/*	OUTPUT *//*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*//*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},/*	INPUT-ONLY *//*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*//*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},};static struct vs_tcp_states_t vs_tcp_states_dos [] = {/*	INPUT *//*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*//*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},/*	OUTPUT *//*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*//*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},/*	INPUT-ONLY *//*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*//*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},};static struct vs_tcp_states_t *ip_vs_state_table = vs_tcp_states;void ip_vs_secure_tcp_set(int on){	if (on) {		ip_vs_state_table = vs_tcp_states_dos;		ip_vs_timeout_table = &vs_timeout_table_dos;	} else {		ip_vs_state_table = vs_tcp_states;		ip_vs_timeout_table = &vs_timeout_table;	}}static inline int vs_tcp_state_idx(struct tcphdr *th, int state_off){	/*	 *	[0-3]: input states, [4-7]: output, [8-11] input only states.	 */	if (th->rst)		return state_off+3;	if (th->syn)		return state_off+0;	if (th->fin)		return state_off+1;	if (th->ack)		return state_off+2;	return -1;}static inline int vs_set_state_timeout(struct ip_vs_conn *cp, int state){	struct ip_vs_timeout_table *vstim = cp->timeout_table;	/*	 *	Use default timeout table if no specific for this entry	 */	if (!vstim)		vstim = &vs_timeout_table;	cp->timeout = vstim->timeout[cp->state=state];	if (vstim->scale) {		int scale = vstim->scale;		if (scale<0)			cp->timeout >>= -scale;		else if (scale > 0)			cp->timeout <<= scale;	}	return state;}static inline intvs_tcp_state(struct ip_vs_conn *cp, int state_off, struct tcphdr *th){	int state_idx;	int new_state = IP_VS_S_CLOSE;	/*	 *    Update state offset to INPUT_ONLY if necessary	 *    or delete NO_OUTPUT flag if output packet detected	 */	if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {		if (state_off == VS_STATE_OUTPUT)			cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;		else			state_off = VS_STATE_INPUT_ONLY;	}	if ((state_idx = vs_tcp_state_idx(th, state_off)) < 0) {		IP_VS_DBG(8, "vs_tcp_state_idx(%d)=%d!!!\n",			  state_off, state_idx);		goto tcp_state_out;	}	new_state = ip_vs_state_table[state_idx].next_state[cp->state];  tcp_state_out:

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?