📄 ip_vs_core.c
字号:
/* * IPVS An implementation of the IP virtual server support for the * LINUX operating system. IPVS is now implemented as a module * over the Netfilter framework. IPVS can be used to build a * high-performance and highly available server based on a * cluster of servers. * * Version: $Id: ip_vs_core.c,v 1.34 2003/05/10 03:05:23 wensong Exp $ * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * Julian Anastasov <ja@ssi.bg> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese, * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms * and others. * * Changes: * Paul `Rusty' Russell properly handle non-linear skbs * Harald Welte don't use nfcache * */#include <linux/module.h>#include <linux/kernel.h>#include <linux/ip.h>#include <linux/tcp.h>#include <linux/icmp.h>#include <net/ip.h>#include <net/tcp.h>#include <net/udp.h>#include <net/icmp.h> /* for icmp_send */#include <net/route.h>#include <linux/netfilter.h>#include <linux/netfilter_ipv4.h>#include <net/ip_vs.h>EXPORT_SYMBOL(register_ip_vs_scheduler);EXPORT_SYMBOL(unregister_ip_vs_scheduler);EXPORT_SYMBOL(ip_vs_skb_replace);EXPORT_SYMBOL(ip_vs_proto_name);EXPORT_SYMBOL(ip_vs_conn_new);EXPORT_SYMBOL(ip_vs_conn_in_get);EXPORT_SYMBOL(ip_vs_conn_out_get);#ifdef CONFIG_IP_VS_PROTO_TCPEXPORT_SYMBOL(ip_vs_tcp_conn_listen);#endifEXPORT_SYMBOL(ip_vs_conn_put);#ifdef CONFIG_IP_VS_DEBUGEXPORT_SYMBOL(ip_vs_get_debug_level);#endifEXPORT_SYMBOL(ip_vs_make_skb_writable);/* ID used in ICMP lookups */#define icmp_id(icmph) (((icmph)->un).echo.id)const char *ip_vs_proto_name(unsigned proto){ static char buf[20]; switch (proto) { case IPPROTO_IP: return "IP"; case IPPROTO_UDP: return "UDP"; case IPPROTO_TCP: return "TCP"; case IPPROTO_ICMP: return "ICMP"; default: sprintf(buf, "IP_%d", proto); return buf; }}void ip_vs_init_hash_table(struct list_head *table, int rows){ while (--rows >= 0) INIT_LIST_HEAD(&table[rows]);}static inline voidip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb){ struct ip_vs_dest *dest = cp->dest; if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { spin_lock(&dest->stats.lock); dest->stats.inpkts++; dest->stats.inbytes += skb->len; spin_unlock(&dest->stats.lock); spin_lock(&dest->svc->stats.lock); dest->svc->stats.inpkts++; dest->svc->stats.inbytes += skb->len; spin_unlock(&dest->svc->stats.lock); spin_lock(&ip_vs_stats.lock); ip_vs_stats.inpkts++; ip_vs_stats.inbytes += skb->len; spin_unlock(&ip_vs_stats.lock); }}static inline voidip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb){ struct ip_vs_dest *dest = cp->dest; if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { spin_lock(&dest->stats.lock); dest->stats.outpkts++; dest->stats.outbytes += skb->len; spin_unlock(&dest->stats.lock); spin_lock(&dest->svc->stats.lock); dest->svc->stats.outpkts++; dest->svc->stats.outbytes += skb->len; spin_unlock(&dest->svc->stats.lock); spin_lock(&ip_vs_stats.lock); ip_vs_stats.outpkts++; ip_vs_stats.outbytes += skb->len; spin_unlock(&ip_vs_stats.lock); }}static inline voidip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc){ spin_lock(&cp->dest->stats.lock); cp->dest->stats.conns++; spin_unlock(&cp->dest->stats.lock); spin_lock(&svc->stats.lock); svc->stats.conns++; spin_unlock(&svc->stats.lock); spin_lock(&ip_vs_stats.lock); ip_vs_stats.conns++; spin_unlock(&ip_vs_stats.lock);}static inline intip_vs_set_state(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, struct ip_vs_protocol *pp){ if (unlikely(!pp->state_transition)) return 0; return pp->state_transition(cp, direction, skb, pp);}int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len){ struct sk_buff *skb = *pskb; /* skb is already used, better copy skb and its payload */ if (unlikely(skb_shared(skb) || skb->sk)) goto copy_skb; /* skb data is already used, copy it */ if (unlikely(skb_cloned(skb))) goto copy_data; return pskb_may_pull(skb, writable_len); copy_data: if (unlikely(writable_len > skb->len)) return 0; return !pskb_expand_head(skb, 0, 0, GFP_ATOMIC); copy_skb: if (unlikely(writable_len > skb->len)) return 0; skb = skb_copy(skb, GFP_ATOMIC); if (!skb) return 0; BUG_ON(skb_is_nonlinear(skb)); /* Rest of kernel will get very unhappy if we pass it a suddenly-orphaned skbuff */ if ((*pskb)->sk) skb_set_owner_w(skb, (*pskb)->sk); kfree_skb(*pskb); *pskb = skb; return 1;}/* * IPVS persistent scheduling function * It creates a connection entry according to its template if exists, * or selects a server and creates a connection entry plus a template. * Locking: we are svc user (svc->refcnt), so we hold all dests too * Protocols supported: TCP, UDP */static struct ip_vs_conn *ip_vs_sched_persist(struct ip_vs_service *svc, const struct sk_buff *skb, __u16 ports[2]){ struct ip_vs_conn *cp = NULL; struct iphdr *iph = skb->nh.iph; struct ip_vs_dest *dest; struct ip_vs_conn *ct; __u16 dport; /* destination port to forward */ __u32 snet; /* source network of the client, after masking */ /* Mask saddr with the netmask to adjust template granularity */ snet = iph->saddr & svc->netmask; IP_VS_DBG(6, "p-schedule: src %u.%u.%u.%u:%u dest %u.%u.%u.%u:%u " "mnet %u.%u.%u.%u\n", NIPQUAD(iph->saddr), ntohs(ports[0]), NIPQUAD(iph->daddr), ntohs(ports[1]), NIPQUAD(snet)); /* * As far as we know, FTP is a very complicated network protocol, and * it uses control connection and data connections. For active FTP, * FTP server initialize data connection to the client, its source port * is often 20. For passive FTP, FTP server tells the clients the port * that it passively listens to, and the client issues the data * connection. In the tunneling or direct routing mode, the load * balancer is on the client-to-server half of connection, the port * number is unknown to the load balancer. So, a conn template like * <caddr, 0, vaddr, 0, daddr, 0> is created for persistent FTP * service, and a template like <caddr, 0, vaddr, vport, daddr, dport> * is created for other persistent services. */ if (ports[1] == svc->port) { /* Check if a template already exists */ if (svc->port != FTPPORT) ct = ip_vs_ct_in_get(iph->protocol, snet, 0, iph->daddr, ports[1]); else ct = ip_vs_ct_in_get(iph->protocol, snet, 0, iph->daddr, 0); if (!ct || !ip_vs_check_template(ct)) { /* * No template found or the dest of the connection * template is not available. */ dest = svc->scheduler->schedule(svc, skb); if (dest == NULL) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); return NULL; } /* * Create a template like <protocol,caddr,0, * vaddr,vport,daddr,dport> for non-ftp service, * and <protocol,caddr,0,vaddr,0,daddr,0> * for ftp service. */ if (svc->port != FTPPORT) ct = ip_vs_conn_new(iph->protocol, snet, 0, iph->daddr, ports[1], dest->addr, dest->port, IP_VS_CONN_F_TEMPLATE, dest); else ct = ip_vs_conn_new(iph->protocol, snet, 0, iph->daddr, 0, dest->addr, 0, IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) return NULL; ct->timeout = svc->timeout; } else { /* set destination with the found template */ dest = ct->dest; } dport = dest->port; } else { /* * Note: persistent fwmark-based services and persistent * port zero service are handled here. * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0> * port zero template: <protocol,caddr,0,vaddr,0,daddr,0> */ if (svc->fwmark) ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0, htonl(svc->fwmark), 0); else ct = ip_vs_ct_in_get(iph->protocol, snet, 0, iph->daddr, 0); if (!ct || !ip_vs_check_template(ct)) { /* * If it is not persistent port zero, return NULL, * otherwise create a connection template. */ if (svc->port) return NULL; dest = svc->scheduler->schedule(svc, skb); if (dest == NULL) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); return NULL; } /* * Create a template according to the service */ if (svc->fwmark) ct = ip_vs_conn_new(IPPROTO_IP, snet, 0, htonl(svc->fwmark), 0, dest->addr, 0, IP_VS_CONN_F_TEMPLATE, dest); else ct = ip_vs_conn_new(iph->protocol, snet, 0, iph->daddr, 0, dest->addr, 0, IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) return NULL; ct->timeout = svc->timeout; } else { /* set destination with the found template */ dest = ct->dest; } dport = ports[1]; } /* * Create a new connection according to the template */ cp = ip_vs_conn_new(iph->protocol, iph->saddr, ports[0], iph->daddr, ports[1], dest->addr, dport, 0, dest); if (cp == NULL) { ip_vs_conn_put(ct); return NULL; } /* * Add its control */ ip_vs_control_add(cp, ct); ip_vs_conn_put(ct); ip_vs_conn_stats(cp, svc); return cp;}/* * IPVS main scheduling function * It selects a server according to the virtual service, and * creates a connection entry. * Protocols supported: TCP, UDP */struct ip_vs_conn *ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb){ struct ip_vs_conn *cp = NULL; struct iphdr *iph = skb->nh.iph; struct ip_vs_dest *dest; __u16 _ports[2], *pptr; pptr = skb_header_pointer(skb, iph->ihl*4, sizeof(_ports), _ports); if (pptr == NULL) return NULL; /* * Persistent service */ if (svc->flags & IP_VS_SVC_F_PERSISTENT) return ip_vs_sched_persist(svc, skb, pptr);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -