📄 ip_vs_conn.c
字号:
/* * IPVS An implementation of the IP virtual server support for the * LINUX operating system. IPVS is now implemented as a module * over the Netfilter framework. IPVS can be used to build a * high-performance and highly available server based on a * cluster of servers. * * Version: $Id: ip_vs_conn.c,v 1.31 2003/04/18 09:03:16 wensong Exp $ * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * Julian Anastasov <ja@ssi.bg> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese, * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms * and others. Many code here is taken from IP MASQ code of kernel 2.2. * * Changes: * */#include <linux/kernel.h>#include <linux/vmalloc.h>#include <linux/proc_fs.h> /* for proc_net_* */#include <linux/seq_file.h>#include <linux/jhash.h>#include <linux/random.h>#include <net/ip_vs.h>/* * Connection hash table: for input and output packets lookups of IPVS */static struct list_head *ip_vs_conn_tab;/* SLAB cache for IPVS connections */static kmem_cache_t *ip_vs_conn_cachep __read_mostly;/* counter for current IPVS connections */static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);/* counter for no client port connections */static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);/* random value for IPVS connection hash */static unsigned int ip_vs_conn_rnd;/* * Fine locking granularity for big connection hash table */#define CT_LOCKARRAY_BITS 4#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)struct ip_vs_aligned_lock{ rwlock_t l;} __attribute__((__aligned__(SMP_CACHE_BYTES)));/* lock array for conn table */static struct ip_vs_aligned_lock__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;static inline void ct_read_lock(unsigned key){ read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_read_unlock(unsigned key){ read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_write_lock(unsigned key){ write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_write_unlock(unsigned key){ write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_read_lock_bh(unsigned key){ read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_read_unlock_bh(unsigned key){ read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_write_lock_bh(unsigned key){ write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}static inline void ct_write_unlock_bh(unsigned key){ write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);}/* * Returns hash value for IPVS connection entry */static unsigned int ip_vs_conn_hashkey(unsigned proto, __u32 addr, __u16 port){ return jhash_3words(addr, port, proto, ip_vs_conn_rnd) & IP_VS_CONN_TAB_MASK;}/* * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. * returns bool success. */static inline int ip_vs_conn_hash(struct ip_vs_conn *cp){ unsigned hash; int ret; /* Hash by protocol, client address and port */ hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport); ct_write_lock(hash); if (!(cp->flags & IP_VS_CONN_F_HASHED)) { list_add(&cp->c_list, &ip_vs_conn_tab[hash]); cp->flags |= IP_VS_CONN_F_HASHED; atomic_inc(&cp->refcnt); ret = 1; } else { IP_VS_ERR("ip_vs_conn_hash(): request for already hashed, " "called from %p\n", __builtin_return_address(0)); ret = 0; } ct_write_unlock(hash); return ret;}/* * UNhashes ip_vs_conn from ip_vs_conn_tab. * returns bool success. */static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp){ unsigned hash; int ret; /* unhash it and decrease its reference counter */ hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport); ct_write_lock(hash); if (cp->flags & IP_VS_CONN_F_HASHED) { list_del(&cp->c_list); cp->flags &= ~IP_VS_CONN_F_HASHED; atomic_dec(&cp->refcnt); ret = 1; } else ret = 0; ct_write_unlock(hash); return ret;}/* * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. * Called for pkts coming from OUTside-to-INside. * s_addr, s_port: pkt source address (foreign host) * d_addr, d_port: pkt dest address (load balancer) */static inline struct ip_vs_conn *__ip_vs_conn_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port){ unsigned hash; struct ip_vs_conn *cp; hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (s_addr==cp->caddr && s_port==cp->cport && d_port==cp->vport && d_addr==cp->vaddr && ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && protocol==cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); ct_read_unlock(hash); return cp; } } ct_read_unlock(hash); return NULL;}struct ip_vs_conn *ip_vs_conn_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port){ struct ip_vs_conn *cp; cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port); if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port); IP_VS_DBG(7, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", ip_vs_proto_name(protocol), NIPQUAD(s_addr), ntohs(s_port), NIPQUAD(d_addr), ntohs(d_port), cp?"hit":"not hit"); return cp;}/* Get reference to connection template */struct ip_vs_conn *ip_vs_ct_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port){ unsigned hash; struct ip_vs_conn *cp; hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (s_addr==cp->caddr && s_port==cp->cport && d_port==cp->vport && d_addr==cp->vaddr && cp->flags & IP_VS_CONN_F_TEMPLATE && protocol==cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); goto out; } } cp = NULL; out: ct_read_unlock(hash); IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", ip_vs_proto_name(protocol), NIPQUAD(s_addr), ntohs(s_port), NIPQUAD(d_addr), ntohs(d_port), cp?"hit":"not hit"); return cp;}/* * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. * Called for pkts coming from inside-to-OUTside. * s_addr, s_port: pkt source address (inside host) * d_addr, d_port: pkt dest address (foreign host) */struct ip_vs_conn *ip_vs_conn_out_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port){ unsigned hash; struct ip_vs_conn *cp, *ret=NULL; /* * Check for "full" addressed entries */ hash = ip_vs_conn_hashkey(protocol, d_addr, d_port); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (d_addr == cp->caddr && d_port == cp->cport && s_port == cp->dport && s_addr == cp->daddr && protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); ret = cp; break; } } ct_read_unlock(hash); IP_VS_DBG(7, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", ip_vs_proto_name(protocol), NIPQUAD(s_addr), ntohs(s_port), NIPQUAD(d_addr), ntohs(d_port), ret?"hit":"not hit"); return ret;}/* * Put back the conn and restart its timer with its timeout */void ip_vs_conn_put(struct ip_vs_conn *cp){ /* reset it expire in its timeout */ mod_timer(&cp->timer, jiffies+cp->timeout); __ip_vs_conn_put(cp);}/* * Fill a no_client_port connection with a client port number */void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __u16 cport){ if (ip_vs_conn_unhash(cp)) { spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_NO_CPORT) { atomic_dec(&ip_vs_conn_no_cport_cnt); cp->flags &= ~IP_VS_CONN_F_NO_CPORT; cp->cport = cport; } spin_unlock(&cp->lock); /* hash on new dport */ ip_vs_conn_hash(cp); }}/* * Bind a connection entry with the corresponding packet_xmit. * Called by ip_vs_conn_new. */static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp){ switch (IP_VS_FWD_METHOD(cp)) { case IP_VS_CONN_F_MASQ: cp->packet_xmit = ip_vs_nat_xmit; break; case IP_VS_CONN_F_TUNNEL: cp->packet_xmit = ip_vs_tunnel_xmit; break; case IP_VS_CONN_F_DROUTE: cp->packet_xmit = ip_vs_dr_xmit; break; case IP_VS_CONN_F_LOCALNODE: cp->packet_xmit = ip_vs_null_xmit; break; case IP_VS_CONN_F_BYPASS: cp->packet_xmit = ip_vs_bypass_xmit; break; }}static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest){ return atomic_read(&dest->activeconns) + atomic_read(&dest->inactconns);}/* * Bind a connection entry with a virtual service destination * Called just after a new connection entry is created. */static inline voidip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest){ /* if dest is NULL, then return directly */ if (!dest) return; /* Increase the refcnt counter of the dest */ atomic_inc(&dest->refcnt); /* Bind with the destination and its corresponding transmitter */ cp->flags |= atomic_read(&dest->conn_flags); cp->dest = dest; IP_VS_DBG(9, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n", ip_vs_proto_name(cp->protocol), NIPQUAD(cp->caddr), ntohs(cp->cport), NIPQUAD(cp->vaddr), ntohs(cp->vport), NIPQUAD(cp->daddr), ntohs(cp->dport), ip_vs_fwd_tag(cp), cp->state, cp->flags, atomic_read(&cp->refcnt), atomic_read(&dest->refcnt)); /* Update the connection counters */ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { /* It is a normal connection, so increase the inactive connection counter because it is in TCP SYNRECV state (inactive) or other protocol inacive state */ atomic_inc(&dest->inactconns); } else { /* It is a persistent connection/template, so increase the peristent connection counter */ atomic_inc(&dest->persistconns); } if (dest->u_threshold != 0 && ip_vs_dest_totalconns(dest) >= dest->u_threshold) dest->flags |= IP_VS_DEST_F_OVERLOAD;}/* * Unbind a connection entry with its VS destination * Called by the ip_vs_conn_expire function. */static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp){ struct ip_vs_dest *dest = cp->dest; if (!dest) return; IP_VS_DBG(9, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n", ip_vs_proto_name(cp->protocol), NIPQUAD(cp->caddr), ntohs(cp->cport), NIPQUAD(cp->vaddr), ntohs(cp->vport), NIPQUAD(cp->daddr), ntohs(cp->dport), ip_vs_fwd_tag(cp), cp->state, cp->flags, atomic_read(&cp->refcnt), atomic_read(&dest->refcnt)); /* Update the connection counters */ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { /* It is a normal connection, so decrease the inactconns or activeconns counter */ if (cp->flags & IP_VS_CONN_F_INACTIVE) { atomic_dec(&dest->inactconns); } else { atomic_dec(&dest->activeconns); } } else { /* It is a persistent connection/template, so decrease the peristent connection counter */ atomic_dec(&dest->persistconns); } if (dest->l_threshold != 0) { if (ip_vs_dest_totalconns(dest) < dest->l_threshold) dest->flags &= ~IP_VS_DEST_F_OVERLOAD; } else if (dest->u_threshold != 0) { if (ip_vs_dest_totalconns(dest) * 4 < dest->u_threshold * 3) dest->flags &= ~IP_VS_DEST_F_OVERLOAD; } else { if (dest->flags & IP_VS_DEST_F_OVERLOAD) dest->flags &= ~IP_VS_DEST_F_OVERLOAD; } /* * Simply decrease the refcnt of the dest, because the
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -