📄 bonding.c
字号:
/* * originally based on the dummy device. * * Copyright 1999, Thomas Davis, tadavis@lbl.gov. * Licensed under the GPL. Based on dummy.c, and eql.c devices. * * bonding.c: an Ethernet Bonding driver * * This is useful to talk to a Cisco EtherChannel compatible equipment: * Cisco 5500 * Sun Trunking (Solaris) * Alteon AceDirector Trunks * Linux Bonding * and probably many L2 switches ... * * How it works: * ifconfig bond0 ipaddress netmask up * will setup a network device, with an ip address. No mac address * will be assigned at this time. The hw mac address will come from * the first slave bonded to the channel. All slaves will then use * this hw mac address. * * ifconfig bond0 down * will release all slaves, marking them as down. * * ifenslave bond0 eth0 * will attach eth0 to bond0 as a slave. eth0 hw mac address will either * a: be used as initial mac address * b: if a hw mac address already is there, eth0's hw mac address * will then be set from bond0. * * v0.1 - first working version. * v0.2 - changed stats to be calculated by summing slaves stats. * * Changes: * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * - fix leaks on failure at bond_init * * 2000/09/30 - Willy Tarreau <willy at meta-x.org> * - added trivial code to release a slave device. * - fixed security bug (CAP_NET_ADMIN not checked) * - implemented MII link monitoring to disable dead links : * All MII capable slaves are checked every <miimon> milliseconds * (100 ms seems good). This value can be changed by passing it to * insmod. A value of zero disables the monitoring (default). * - fixed an infinite loop in bond_xmit_roundrobin() when there's no * good slave. * - made the code hopefully SMP safe * * 2000/10/03 - Willy Tarreau <willy at meta-x.org> * - optimized slave lists based on relevant suggestions from Thomas Davis * - implemented active-backup method to obtain HA with two switches: * stay as long as possible on the same active interface, while we * also monitor the backup one (MII link status) because we want to know * if we are able to switch at any time. ( pass "mode=1" to insmod ) * - lots of stress testings because we need it to be more robust than the * wires ! :-> * * 2000/10/09 - Willy Tarreau <willy at meta-x.org> * - added up and down delays after link state change. * - optimized the slaves chaining so that when we run forward, we never * repass through the bond itself, but we can find it by searching * backwards. Renders the deletion more difficult, but accelerates the * scan. * - smarter enslaving and releasing. * - finer and more robust SMP locking * * 2000/10/17 - Willy Tarreau <willy at meta-x.org> * - fixed two potential SMP race conditions * * 2000/10/18 - Willy Tarreau <willy at meta-x.org> * - small fixes to the monitoring FSM in case of zero delays * 2000/11/01 - Willy Tarreau <willy at meta-x.org> * - fixed first slave not automatically used in trunk mode. * 2000/11/10 : spelling of "EtherChannel" corrected. * 2000/11/13 : fixed a race condition in case of concurrent accesses to ioctl(). * 2000/12/16 : fixed improper usage of rtnl_exlock_nowait(). * * 2001/1/3 - Chad N. Tindel <ctindel at ieee dot org> * - The bonding driver now simulates MII status monitoring, just like * a normal network device. It will show that the link is down iff * every slave in the bond shows that their links are down. If at least * one slave is up, the bond's MII status will appear as up. * * 2001/2/7 - Chad N. Tindel <ctindel at ieee dot org> * - Applications can now query the bond from user space to get * information which may be useful. They do this by calling * the BOND_INFO_QUERY ioctl. Once the app knows how many slaves * are in the bond, it can call the BOND_SLAVE_INFO_QUERY ioctl to * get slave specific information (# link failures, etc). See * <linux/if_bonding.h> for more details. The structs of interest * are ifbond and ifslave. * * 2001/4/5 - Chad N. Tindel <ctindel at ieee dot org> * - Ported to 2.4 Kernel * * 2001/5/2 - Jeffrey E. Mast <jeff at mastfamily dot com> * - When a device is detached from a bond, the slave device is no longer * left thinking that is has a master. * * 2001/5/16 - Jeffrey E. Mast <jeff at mastfamily dot com> * - memset did not appropriately initialized the bond rw_locks. Used * rwlock_init to initialize to unlocked state to prevent deadlock when * first attempting a lock * - Called SET_MODULE_OWNER for bond device * * 2001/5/17 - Tim Anderson <tsa at mvista.com> * - 2 paths for releasing for slave release; 1 through ioctl * and 2) through close. Both paths need to release the same way. * - the free slave in bond release is changing slave status before * the free. The netdev_set_master() is intended to change slave state * so it should not be done as part of the release process. * - Simple rule for slave state at release: only the active in A/B and * only one in the trunked case. * * 2001/6/01 - Tim Anderson <tsa at mvista.com> * - Now call dev_close when releasing a slave so it doesn't screw up * out routing table. * * 2001/6/01 - Chad N. Tindel <ctindel at ieee dot org> * - Added /proc support for getting bond and slave information. * Information is in /proc/net/<bond device>/info. * - Changed the locking when calling bond_close to prevent deadlock. * * 2001/8/05 - Janice Girouard <girouard at us.ibm.com> * - correct problem where refcnt of slave is not incremented in bond_ioctl * so the system hangs when halting. * - correct locking problem when unable to malloc in bond_enslave. * - adding bond_xmit_xor logic. * - adding multiple bond device support. * * 2001/8/13 - Erik Habbinga <erik_habbinga at hp dot com> * - correct locking problem with rtnl_exlock_nowait * * 2001/8/23 - Janice Girouard <girouard at us.ibm.com> * - bzero initial dev_bonds, to correct oops * - convert SIOCDEVPRIVATE to new MII ioctl calls * * 2001/9/13 - Takao Indoh <indou dot takao at jp dot fujitsu dot com> * - Add the BOND_CHANGE_ACTIVE ioctl implementation * * 2001/9/14 - Mark Huth <mhuth at mvista dot com> * - Change MII_LINK_READY to not check for end of auto-negotiation, * but only for an up link. * * 2001/9/20 - Chad N. Tindel <ctindel at ieee dot org> * - Add the device field to bonding_t. Previously the net_device * corresponding to a bond wasn't available from the bonding_t * structure. * * 2001/9/25 - Janice Girouard <girouard at us.ibm.com> * - add arp_monitor for active backup mode * * 2001/10/23 - Takao Indoh <indou dot takao at jp dot fujitsu dot com> * - Various memory leak fixes * * 2001/11/5 - Mark Huth <mark dot huth at mvista dot com> * - Don't take rtnl lock in bond_mii_monitor as it deadlocks under * certain hotswap conditions. * Note: this same change may be required in bond_arp_monitor ??? * - Remove possibility of calling bond_sethwaddr with NULL slave_dev ptr * - Handle hot swap ethernet interface deregistration events to remove * kernel oops following hot swap of enslaved interface */#include <linux/config.h>#include <linux/kernel.h>#include <linux/module.h>#include <linux/sched.h>#include <linux/types.h>#include <linux/fcntl.h>#include <linux/interrupt.h>#include <linux/ptrace.h>#include <linux/ioport.h>#include <linux/in.h>#include <linux/slab.h>#include <linux/string.h>#include <linux/init.h>#include <linux/timer.h>#include <linux/socket.h>#include <asm/system.h>#include <asm/bitops.h>#include <asm/io.h>#include <asm/dma.h>#include <asm/uaccess.h>#include <linux/errno.h>#include <linux/netdevice.h>#include <linux/etherdevice.h>#include <linux/skbuff.h>#include <net/sock.h>#include <linux/rtnetlink.h>#include <linux/if_bonding.h>#include <linux/smp.h>#include <linux/if_ether.h>#include <linux/if_arp.h>/* monitor all links that often (in milliseconds). <=0 disables monitoring */#ifndef BOND_LINK_MON_INTERV#define BOND_LINK_MON_INTERV 0#endif#undef MII_LINK_UP#define MII_LINK_UP 0x04#undef MII_ENDOF_NWAY#define MII_ENDOF_NWAY 0x20#undef MII_LINK_READY/*#define MII_LINK_READY (MII_LINK_UP | MII_ENDOF_NWAY)*/#define MII_LINK_READY (MII_LINK_UP)#define MAX_BOND_ADDR 256#ifndef BOND_LINK_ARP_INTERV#define BOND_LINK_ARP_INTERV 0#endifstatic int arp_interval = BOND_LINK_ARP_INTERV;static char *arp_ip_target = NULL;static unsigned long arp_target = 0;static u32 my_ip = 0;char *arp_target_hw_addr = NULL;static int max_bonds = MAX_BONDS;static int miimon = BOND_LINK_MON_INTERV;static int mode = BOND_MODE_ROUNDROBIN;static int updelay = 0;static int downdelay = 0;static int first_pass = 1;int bond_cnt;static struct bonding *these_bonds = NULL;static struct net_device *dev_bonds = NULL;MODULE_PARM(max_bonds, "1-" __MODULE_STRING(INT_MAX) "i");MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");MODULE_PARM(miimon, "i");MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");MODULE_PARM(mode, "i");MODULE_PARM(arp_interval, "i");MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");MODULE_PARM(arp_ip_target, "1-12s");MODULE_PARM_DESC(arp_ip_target, "arp target in n.n.n.n form");MODULE_PARM_DESC(mode, "Mode of operation : 0 for round robin, 1 for active-backup, 2 for xor");MODULE_PARM(updelay, "i");MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds");MODULE_PARM(downdelay, "i");MODULE_PARM_DESC(downdelay, "Delay before considering link down, in milliseconds");extern void arp_send( int type, int ptype, u32 dest_ip, struct net_device *dev, u32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, unsigned char *target_hw);static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev);static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev);static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev);static struct net_device_stats *bond_get_stats(struct net_device *dev);static void bond_mii_monitor(struct net_device *dev);static void bond_arp_monitor(struct net_device *dev);static int bond_event(struct notifier_block *this, unsigned long event, void *ptr);static void bond_set_slave_inactive_flags(slave_t *slave);static void bond_set_slave_active_flags(slave_t *slave);static int bond_enslave(struct net_device *master, struct net_device *slave);static int bond_release(struct net_device *master, struct net_device *slave);static int bond_release_all(struct net_device *master);static int bond_sethwaddr(struct net_device *master, struct net_device *slave);/* * bond_get_info is the interface into the /proc filesystem. This is * a different interface than the BOND_INFO_QUERY ioctl. That is done * through the generic networking ioctl interface, and bond_info_query * is the internal function which provides that information. */static int bond_get_info(char *buf, char **start, off_t offset, int length);/* #define BONDING_DEBUG 1 *//* several macros */#define IS_UP(dev) ((((dev)->flags & (IFF_UP)) == (IFF_UP)) && \ (netif_running(dev) && netif_carrier_ok(dev)))static void bond_set_slave_inactive_flags(slave_t *slave){ slave->state = BOND_STATE_BACKUP; slave->dev->flags |= IFF_NOARP;}static void bond_set_slave_active_flags(slave_t *slave){ slave->state = BOND_STATE_ACTIVE; slave->dev->flags &= ~IFF_NOARP;}/* * This function detaches the slave <slave> from the list <bond>. * WARNING: no check is made to verify if the slave effectively * belongs to <bond>. It returns <slave> in case it's needed. * Nothing is freed on return, structures are just unchained. * If the bond->current_slave pointer was pointing to <slave>, * it's replaced with slave->next, or <bond> if not applicable. */static slave_t *bond_detach_slave(bonding_t *bond, slave_t *slave){ if ((bond == NULL) || (slave == NULL) || ((void *)bond == (void *)slave)) { printk(KERN_ERR "bond_detach_slave(): trying to detach " "slave %p from bond %p\n", bond, slave); return slave; } if (bond->next == slave) { /* is the slave at the head ? */ if (bond->prev == slave) { /* is the slave alone ? */ write_lock(&bond->ptrlock); bond->current_slave = NULL; /* no slave anymore */ write_unlock(&bond->ptrlock); bond->prev = bond->next = (slave_t *)bond; } else { /* not alone */ bond->next = slave->next; slave->next->prev = (slave_t *)bond; bond->prev->next = slave->next; write_lock(&bond->ptrlock); if (bond->current_slave == slave) { bond->current_slave = slave->next; } write_unlock(&bond->ptrlock); } } else { slave->prev->next = slave->next; if (bond->prev == slave) { /* is this slave the last one ? */ bond->prev = slave->prev; } else { slave->next->prev = slave->prev; } write_lock(&bond->ptrlock); if (bond->current_slave == slave) { bond->current_slave = slave->next; } write_unlock(&bond->ptrlock); } return slave;}/* * if <dev> supports MII link status reporting, check its link * and report it as a bit field in a short int : * - 0x04 means link is up, * - 0x20 means end of autonegociation * If the device doesn't support MII, then we only report 0x24, * meaning that the link is up and running since we can't check it. */static u16 bond_check_dev_link(struct net_device *dev){ static int (* ioctl)(struct net_device *, struct ifreq *, int); struct ifreq ifr; u16 *data = (u16 *)&ifr.ifr_data; /* data[0] automagically filled by the ioctl */ data[1] = 1; /* MII location 1 reports Link Status */ if (((ioctl = dev->do_ioctl) != NULL) && /* ioctl to access MII */ (ioctl(dev, &ifr, SIOCGMIIPHY) == 0)) { /* now, data[3] contains info about link status : - data[3] & 0x04 means link up - data[3] & 0x20 means end of auto-negociation */ return data[3]; } else { return MII_LINK_READY; /* spoof link up ( we can't check it) */ }}static u16 bond_check_mii_link(bonding_t *bond){ int has_active_interface = 0; unsigned long flags; read_lock_irqsave(&bond->lock, flags); read_lock(&bond->ptrlock); has_active_interface = (bond->current_slave != NULL); read_unlock(&bond->ptrlock); read_unlock_irqrestore(&bond->lock, flags); return (has_active_interface ? MII_LINK_READY : 0);}static int bond_open(struct net_device *dev){ struct timer_list *timer = &((struct bonding *)(dev->priv))->mii_timer; struct timer_list *arp_timer = &((struct bonding *)(dev->priv))->arp_timer; MOD_INC_USE_COUNT; if (miimon > 0) { /* link check interval, in milliseconds. */ init_timer(timer); timer->expires = jiffies + (miimon * HZ / 1000); timer->data = (unsigned long)dev; timer->function = (void *)&bond_mii_monitor; add_timer(timer); } if (arp_interval> 0) { /* arp interval, in milliseconds. */ init_timer(arp_timer); arp_timer->expires = jiffies + (arp_interval * HZ / 1000); arp_timer->data = (unsigned long)dev; arp_timer->function = (void *)&bond_arp_monitor; add_timer(arp_timer); } return 0;}static int bond_close(struct net_device *master){ bonding_t *bond = (struct bonding *) master->priv; unsigned long flags; write_lock_irqsave(&bond->lock, flags); if (miimon > 0) { /* link check interval, in milliseconds. */ del_timer(&bond->mii_timer); } if (arp_interval> 0) { /* arp interval, in milliseconds. */ del_timer(&bond->arp_timer); } /* Release the bonded slaves */ bond_release_all(master); write_unlock_irqrestore(&bond->lock, flags); MOD_DEC_USE_COUNT; return 0;}static void set_multicast_list(struct net_device *master){/* bonding_t *bond = master->priv; slave_t *slave; for (slave = bond->next; slave != (slave_t*)bond; slave = slave->next) { slave->dev->mc_list = master->mc_list; slave->dev->mc_count = master->mc_count; slave->dev->flags = master->flags; slave->dev->set_multicast_list(slave->dev); } */}/* * This function counts the the number of attached * slaves for use by bond_xmit_xor. */static void update_slave_cnt(bonding_t *bond){ slave_t *slave = NULL; bond->slave_cnt = 0; for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) { bond->slave_cnt++; }}/* enslave device <slave> to bond device <master> */static int bond_enslave(struct net_device *master_dev, struct net_device *slave_dev){ bonding_t *bond = NULL; slave_t *new_slave = NULL; unsigned long flags = 0; int ndx = 0; int err = 0; if (master_dev == NULL || slave_dev == NULL) { return -ENODEV; } bond = (struct bonding *) master_dev->priv; if (slave_dev->do_ioctl == NULL) { printk(KERN_DEBUG "Warning : no link monitoring support for %s\n", slave_dev->name); } write_lock_irqsave(&bond->lock, flags); /* not running. */ if ((slave_dev->flags & IFF_UP) != IFF_UP) {#ifdef BONDING_DEBUG printk(KERN_CRIT "Error, slave_dev is not running\n");#endif write_unlock_irqrestore(&bond->lock, flags); return -EINVAL; } /* already enslaved */ if (master_dev->flags & IFF_SLAVE || slave_dev->flags & IFF_SLAVE) {#ifdef BONDING_DEBUG printk(KERN_CRIT "Error, Device was already enslaved\n");#endif write_unlock_irqrestore(&bond->lock, flags); return -EBUSY; } if ((new_slave = kmalloc(sizeof(slave_t), GFP_KERNEL)) == NULL) { write_unlock_irqrestore(&bond->lock, flags); return -ENOMEM; } memset(new_slave, 0, sizeof(slave_t)); err = netdev_set_master(slave_dev, master_dev); if (err) {#ifdef BONDING_DEBUG printk(KERN_CRIT "Error %d calling netdev_set_master\n", err);#endif kfree(new_slave); write_unlock_irqrestore(&bond->lock, flags); return err; } new_slave->dev = slave_dev;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -