ip_vs_conn.c
来自「linux-2.4.29操作系统的源码」· C语言 代码 · 共 1,570 行 · 第 1/3 页
C
1,570 行
/* * IPVS An implementation of the IP virtual server support for the * LINUX operating system. IPVS is now implemented as a module * over the Netfilter framework. IPVS can be used to build a * high-performance and highly available server based on a * cluster of servers. * * Version: $Id: ip_vs_conn.c,v 1.28.2.5 2003/08/09 13:27:08 wensong Exp $ * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * Julian Anastasov <ja@ssi.bg> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese, * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms * and others. Many code here is taken from IP MASQ code of kernel 2.2. * * Changes: * */#include <linux/module.h>#include <linux/kernel.h>#include <linux/vmalloc.h>#include <linux/ip.h>#include <linux/tcp.h> /* for tcphdr */#include <linux/in.h>#include <linux/proc_fs.h> /* for proc_net_* */#include <asm/softirq.h> /* for local_bh_* */#include <net/ip.h>#include <net/tcp.h> /* for csum_tcpudp_magic */#include <net/udp.h>#include <net/icmp.h> /* for icmp_send */#include <net/route.h> /* for ip_route_output */#include <linux/netfilter.h>#include <linux/netfilter_ipv4.h>#include <linux/jhash.h>#include <linux/random.h>#include <net/ip_vs.h>/* * Connection hash table: for input and output packets lookups of IPVS */static struct list_head *ip_vs_conn_tab;/* SLAB cache for IPVS connections */static kmem_cache_t *ip_vs_conn_cachep;/* counter for current IPVS connections */static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);/* counter for no-client-port connections */static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);/* random value for IPVS connection hash */static unsigned int ip_vs_conn_rnd;/* * Fine locking granularity for big connection hash table */#define CT_LOCKARRAY_BITS 4#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)struct ip_vs_aligned_lock{ rwlock_t l;} __attribute__((__aligned__(SMP_CACHE_BYTES)));/* lock array for conn table */struct ip_vs_aligned_lock__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;static inline void ct_read_lock(unsigned key){ read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_read_unlock(unsigned key){ read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_write_lock(unsigned key){ write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_write_unlock(unsigned key){ write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_read_lock_bh(unsigned key){ read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_read_unlock_bh(unsigned key){ read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_write_lock_bh(unsigned key){ write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_write_unlock_bh(unsigned key){ write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}/* * Returns hash value for IPVS connection entry */static unsignedip_vs_conn_hashkey(unsigned proto, __u32 addr, __u16 port){ return jhash_3words(addr, port, proto, ip_vs_conn_rnd) & IP_VS_CONN_TAB_MASK;}/* * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. * returns bool success. */static int ip_vs_conn_hash(struct ip_vs_conn *cp){ unsigned hash; int ret; /* Hash by protocol, client address and port */ hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport); ct_write_lock(hash); if (!(cp->flags & IP_VS_CONN_F_HASHED)) { list_add(&cp->c_list, &ip_vs_conn_tab[hash]); cp->flags |= IP_VS_CONN_F_HASHED; atomic_inc(&cp->refcnt); ret = 1; } else { IP_VS_ERR("ip_vs_conn_hash(): request for already hashed, " "called from %p\n", __builtin_return_address(0)); ret = 0; } ct_write_unlock(hash); return ret;}/* * UNhashes ip_vs_conn from ip_vs_conn_tab. * returns bool success. */static int ip_vs_conn_unhash(struct ip_vs_conn *cp){ unsigned hash; int ret; /* unhash it and decrease its reference counter */ hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport); ct_write_lock(hash); if (cp->flags & IP_VS_CONN_F_HASHED) { list_del(&cp->c_list); cp->flags &= ~IP_VS_CONN_F_HASHED; atomic_dec(&cp->refcnt); ret = 1; } else ret = 0; ct_write_unlock(hash); return ret;}/* * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. * Called for pkts coming from OUTside-to-INside. * s_addr, s_port: pkt source address (foreign host) * d_addr, d_port: pkt dest address (load balancer) */static inline struct ip_vs_conn *__ip_vs_conn_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port){ unsigned hash; struct ip_vs_conn *cp; struct list_head *l,*e; hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); l = &ip_vs_conn_tab[hash]; ct_read_lock(hash); for (e=l->next; e!=l; e=e->next) { cp = list_entry(e, struct ip_vs_conn, c_list); if (s_addr==cp->caddr && s_port==cp->cport && d_port==cp->vport && d_addr==cp->vaddr && protocol==cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); ct_read_unlock(hash); return cp; } } ct_read_unlock(hash); return NULL;}struct ip_vs_conn *ip_vs_conn_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port){ struct ip_vs_conn *cp; cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port); if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port); IP_VS_DBG(7, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", ip_vs_proto_name(protocol), NIPQUAD(s_addr), ntohs(s_port), NIPQUAD(d_addr), ntohs(d_port), cp?"hit":"not hit"); return cp;}/* * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. * Called for pkts coming from inside-to-OUTside. * s_addr, s_port: pkt source address (inside host) * d_addr, d_port: pkt dest address (foreign host) */struct ip_vs_conn *ip_vs_conn_out_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port){ unsigned hash; struct ip_vs_conn *cp, *ret=NULL; struct list_head *l,*e; /* * Check for "full" addressed entries */ hash = ip_vs_conn_hashkey(protocol, d_addr, d_port); l = &ip_vs_conn_tab[hash]; ct_read_lock(hash); for (e=l->next; e!=l; e=e->next) { cp = list_entry(e, struct ip_vs_conn, c_list); if (d_addr == cp->caddr && d_port == cp->cport && s_port == cp->dport && s_addr == cp->daddr && protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); ret = cp; break; } } ct_read_unlock(hash); IP_VS_DBG(7, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", ip_vs_proto_name(protocol), NIPQUAD(s_addr), ntohs(s_port), NIPQUAD(d_addr), ntohs(d_port), ret?"hit":"not hit"); return ret;}/* * Put back the conn and restart its timer with its timeout */void ip_vs_conn_put(struct ip_vs_conn *cp){ /* reset it expire in its timeout */ mod_timer(&cp->timer, jiffies+cp->timeout); __ip_vs_conn_put(cp);}/* * Timeout table[state] */struct ip_vs_timeout_table vs_timeout_table = { ATOMIC_INIT(0), /* refcnt */ 0, /* scale */ { [IP_VS_S_NONE] = 30*60*HZ, [IP_VS_S_ESTABLISHED] = 15*60*HZ, [IP_VS_S_SYN_SENT] = 2*60*HZ, [IP_VS_S_SYN_RECV] = 1*60*HZ, [IP_VS_S_FIN_WAIT] = 2*60*HZ, [IP_VS_S_TIME_WAIT] = 2*60*HZ, [IP_VS_S_CLOSE] = 10*HZ, [IP_VS_S_CLOSE_WAIT] = 60*HZ, [IP_VS_S_LAST_ACK] = 30*HZ, [IP_VS_S_LISTEN] = 2*60*HZ, [IP_VS_S_SYNACK] = 120*HZ, [IP_VS_S_UDP] = 5*60*HZ, [IP_VS_S_ICMP] = 1*60*HZ, [IP_VS_S_LAST] = 2*HZ, }, /* timeout */};struct ip_vs_timeout_table vs_timeout_table_dos = { ATOMIC_INIT(0), /* refcnt */ 0, /* scale */ { [IP_VS_S_NONE] = 15*60*HZ, [IP_VS_S_ESTABLISHED] = 8*60*HZ, [IP_VS_S_SYN_SENT] = 60*HZ, [IP_VS_S_SYN_RECV] = 10*HZ, [IP_VS_S_FIN_WAIT] = 60*HZ, [IP_VS_S_TIME_WAIT] = 60*HZ, [IP_VS_S_CLOSE] = 10*HZ, [IP_VS_S_CLOSE_WAIT] = 60*HZ, [IP_VS_S_LAST_ACK] = 30*HZ, [IP_VS_S_LISTEN] = 2*60*HZ, [IP_VS_S_SYNACK] = 100*HZ, [IP_VS_S_UDP] = 3*60*HZ, [IP_VS_S_ICMP] = 1*60*HZ, [IP_VS_S_LAST] = 2*HZ, }, /* timeout */};/* * Timeout table to use for the VS entries * If NULL we use the default table (vs_timeout_table). * Under flood attack we switch to vs_timeout_table_dos */static struct ip_vs_timeout_table *ip_vs_timeout_table = &vs_timeout_table;static const char * state_name_table[IP_VS_S_LAST+1] = { [IP_VS_S_NONE] = "NONE", [IP_VS_S_ESTABLISHED] = "ESTABLISHED", [IP_VS_S_SYN_SENT] = "SYN_SENT", [IP_VS_S_SYN_RECV] = "SYN_RECV", [IP_VS_S_FIN_WAIT] = "FIN_WAIT", [IP_VS_S_TIME_WAIT] = "TIME_WAIT", [IP_VS_S_CLOSE] = "CLOSE", [IP_VS_S_CLOSE_WAIT] = "CLOSE_WAIT", [IP_VS_S_LAST_ACK] = "LAST_ACK", [IP_VS_S_LISTEN] = "LISTEN", [IP_VS_S_SYNACK] = "SYNACK", [IP_VS_S_UDP] = "UDP", [IP_VS_S_ICMP] = "ICMP", [IP_VS_S_LAST] = "BUG!",};#define sNO IP_VS_S_NONE#define sES IP_VS_S_ESTABLISHED#define sSS IP_VS_S_SYN_SENT#define sSR IP_VS_S_SYN_RECV#define sFW IP_VS_S_FIN_WAIT#define sTW IP_VS_S_TIME_WAIT#define sCL IP_VS_S_CLOSE#define sCW IP_VS_S_CLOSE_WAIT#define sLA IP_VS_S_LAST_ACK#define sLI IP_VS_S_LISTEN#define sSA IP_VS_S_SYNACKstruct vs_tcp_states_t { int next_state[IP_VS_S_LAST]; /* should be _LAST_TCP */};const char * ip_vs_state_name(int state){ if (state >= IP_VS_S_LAST) return "ERR!"; return state_name_table[state] ? state_name_table[state] : "?";}static struct vs_tcp_states_t vs_tcp_states [] = {/* INPUT *//* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA *//*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},/* OUTPUT *//* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA *//*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},/* INPUT-ONLY *//* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA *//*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},};static struct vs_tcp_states_t vs_tcp_states_dos [] = {/* INPUT *//* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA *//*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},/* OUTPUT *//* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA *//*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},/* INPUT-ONLY *//* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA *//*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},};static struct vs_tcp_states_t *ip_vs_state_table = vs_tcp_states;void ip_vs_secure_tcp_set(int on){ if (on) { ip_vs_state_table = vs_tcp_states_dos; ip_vs_timeout_table = &vs_timeout_table_dos; } else { ip_vs_state_table = vs_tcp_states; ip_vs_timeout_table = &vs_timeout_table; }}static inline int vs_tcp_state_idx(struct tcphdr *th, int state_off){ /* * [0-3]: input states, [4-7]: output, [8-11] input only states. */ if (th->rst) return state_off+3; if (th->syn) return state_off+0; if (th->fin) return state_off+1; if (th->ack) return state_off+2; return -1;}static inline int vs_set_state_timeout(struct ip_vs_conn *cp, int state){ struct ip_vs_timeout_table *vstim = cp->timeout_table; /* * Use default timeout table if no specific for this entry */ if (!vstim) vstim = &vs_timeout_table; cp->timeout = vstim->timeout[cp->state=state]; if (vstim->scale) { int scale = vstim->scale; if (scale<0) cp->timeout >>= -scale; else if (scale > 0) cp->timeout <<= scale; } return state;}static inline intvs_tcp_state(struct ip_vs_conn *cp, int state_off, struct tcphdr *th){ int state_idx; int new_state = IP_VS_S_CLOSE; /* * Update state offset to INPUT_ONLY if necessary * or delete NO_OUTPUT flag if output packet detected */ if (cp->flags & IP_VS_CONN_F_NOOUTPUT) { if (state_off == VS_STATE_OUTPUT) cp->flags &= ~IP_VS_CONN_F_NOOUTPUT; else state_off = VS_STATE_INPUT_ONLY; } if ((state_idx = vs_tcp_state_idx(th, state_off)) < 0) { IP_VS_DBG(8, "vs_tcp_state_idx(%d)=%d!!!\n", state_off, state_idx); goto tcp_state_out; } new_state = ip_vs_state_table[state_idx].next_state[cp->state]; tcp_state_out:
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?