📄 bond_alb.c
字号:
/* * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * The full GNU General Public License is included in this distribution in the * file called LICENSE. * * * Changes: * * 2003/06/25 - Shmulik Hen <shmulik.hen at intel dot com> * - Fixed signed/unsigned calculation errors that caused load sharing * to collapse to one slave under very heavy UDP Tx stress. * * 2003/08/06 - Amir Noam <amir.noam at intel dot com> * - Add support for setting bond's MAC address with special * handling required for ALB/TLB. * * 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com> * - Code cleanup and style changes * * 2003/12/30 - Amir Noam <amir.noam at intel dot com> * - Fixed: Cannot remove and re-enslave the original active slave. * * 2004/01/14 - Shmulik Hen <shmulik.hen at intel dot com> * - Add capability to tag self generated packets in ALB/TLB modes. *///#define BONDING_DEBUG 1#include <linux/skbuff.h>#include <linux/netdevice.h>#include <linux/etherdevice.h>#include <linux/pkt_sched.h>#include <linux/spinlock.h>#include <linux/slab.h>#include <linux/timer.h>#include <linux/ip.h>#include <linux/ipv6.h>#include <linux/if_arp.h>#include <linux/if_ether.h>#include <linux/if_bonding.h>#include <linux/if_vlan.h>#include <linux/in.h>#include <net/ipx.h>#include <net/arp.h>#include <asm/byteorder.h>#include "bonding.h"#include "bond_alb.h"#define ALB_TIMER_TICKS_PER_SEC 10 /* should be a divisor of HZ */#define BOND_TLB_REBALANCE_INTERVAL 10 /* In seconds, periodic re-balancing. * Used for division - never set * to zero !!! */#define BOND_ALB_LP_INTERVAL 1 /* In seconds, periodic send of * learning packets to the switch */#define BOND_TLB_REBALANCE_TICKS (BOND_TLB_REBALANCE_INTERVAL \ * ALB_TIMER_TICKS_PER_SEC)#define BOND_ALB_LP_TICKS (BOND_ALB_LP_INTERVAL \ * ALB_TIMER_TICKS_PER_SEC)#define TLB_HASH_TABLE_SIZE 256 /* The size of the clients hash table. * Note that this value MUST NOT be smaller * because the key hash table is BYTE wide ! */#define TLB_NULL_INDEX 0xffffffff#define MAX_LP_BURST 3/* rlb defs */#define RLB_HASH_TABLE_SIZE 256#define RLB_NULL_INDEX 0xffffffff#define RLB_UPDATE_DELAY 2*ALB_TIMER_TICKS_PER_SEC /* 2 seconds */#define RLB_ARP_BURST_SIZE 2#define RLB_UPDATE_RETRY 3 /* 3-ticks - must be smaller than the rlb * rebalance interval (5 min). *//* RLB_PROMISC_TIMEOUT = 10 sec equals the time that the current slave is * promiscuous after failover */#define RLB_PROMISC_TIMEOUT 10*ALB_TIMER_TICKS_PER_SECstatic const u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff};static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC;#pragma pack(1)struct learning_pkt { u8 mac_dst[ETH_ALEN]; u8 mac_src[ETH_ALEN]; u16 type; u8 padding[ETH_ZLEN - ETH_HLEN];};struct arp_pkt { u16 hw_addr_space; u16 prot_addr_space; u8 hw_addr_len; u8 prot_addr_len; u16 op_code; u8 mac_src[ETH_ALEN]; /* sender hardware address */ u32 ip_src; /* sender IP address */ u8 mac_dst[ETH_ALEN]; /* target hardware address */ u32 ip_dst; /* target IP address */};#pragma pack()/* Forward declaration */static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);static inline u8 _simple_hash(u8 *hash_start, int hash_size){ int i; u8 hash = 0; for (i = 0; i < hash_size; i++) { hash ^= hash_start[i]; } return hash;}/*********************** tlb specific functions ***************************/static inline void _lock_tx_hashtbl(struct bonding *bond){ spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));}static inline void _unlock_tx_hashtbl(struct bonding *bond){ spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));}/* Caller must hold tx_hashtbl lock */static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load){ if (save_load) { entry->load_history = 1 + entry->tx_bytes / BOND_TLB_REBALANCE_INTERVAL; entry->tx_bytes = 0; } entry->tx_slave = NULL; entry->next = TLB_NULL_INDEX; entry->prev = TLB_NULL_INDEX;}static inline void tlb_init_slave(struct slave *slave){ SLAVE_TLB_INFO(slave).load = 0; SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX;}/* Caller must hold bond lock for read */static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_load){ struct tlb_client_info *tx_hash_table; u32 index; _lock_tx_hashtbl(bond); /* clear slave from tx_hashtbl */ tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl; index = SLAVE_TLB_INFO(slave).head; while (index != TLB_NULL_INDEX) { u32 next_index = tx_hash_table[index].next; tlb_init_table_entry(&tx_hash_table[index], save_load); index = next_index; } _unlock_tx_hashtbl(bond); tlb_init_slave(slave);}/* Must be called before starting the monitor timer */static int tlb_initialize(struct bonding *bond){ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); int size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info); int i; spin_lock_init(&(bond_info->tx_hashtbl_lock)); _lock_tx_hashtbl(bond); bond_info->tx_hashtbl = kmalloc(size, GFP_KERNEL); if (!bond_info->tx_hashtbl) { printk(KERN_ERR DRV_NAME ": Error: %s: Failed to allocate TLB hash table\n", bond->dev->name); _unlock_tx_hashtbl(bond); return -1; } memset(bond_info->tx_hashtbl, 0, size); for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) { tlb_init_table_entry(&bond_info->tx_hashtbl[i], 1); } _unlock_tx_hashtbl(bond); return 0;}/* Must be called only after all slaves have been released */static void tlb_deinitialize(struct bonding *bond){ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); _lock_tx_hashtbl(bond); kfree(bond_info->tx_hashtbl); bond_info->tx_hashtbl = NULL; _unlock_tx_hashtbl(bond);}/* Caller must hold bond lock for read */static struct slave *tlb_get_least_loaded_slave(struct bonding *bond){ struct slave *slave, *least_loaded; s64 max_gap; int i, found = 0; /* Find the first enabled slave */ bond_for_each_slave(bond, slave, i) { if (SLAVE_IS_OK(slave)) { found = 1; break; } } if (!found) { return NULL; } least_loaded = slave; max_gap = (s64)(slave->speed << 20) - /* Convert to Megabit per sec */ (s64)(SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */ /* Find the slave with the largest gap */ bond_for_each_slave_from(bond, slave, i, least_loaded) { if (SLAVE_IS_OK(slave)) { s64 gap = (s64)(slave->speed << 20) - (s64)(SLAVE_TLB_INFO(slave).load << 3); if (max_gap < gap) { least_loaded = slave; max_gap = gap; } } } return least_loaded;}/* Caller must hold bond lock for read */static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len){ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct tlb_client_info *hash_table; struct slave *assigned_slave; _lock_tx_hashtbl(bond); hash_table = bond_info->tx_hashtbl; assigned_slave = hash_table[hash_index].tx_slave; if (!assigned_slave) { assigned_slave = tlb_get_least_loaded_slave(bond); if (assigned_slave) { struct tlb_slave_info *slave_info = &(SLAVE_TLB_INFO(assigned_slave)); u32 next_index = slave_info->head; hash_table[hash_index].tx_slave = assigned_slave; hash_table[hash_index].next = next_index; hash_table[hash_index].prev = TLB_NULL_INDEX; if (next_index != TLB_NULL_INDEX) { hash_table[next_index].prev = hash_index; } slave_info->head = hash_index; slave_info->load += hash_table[hash_index].load_history; } } if (assigned_slave) { hash_table[hash_index].tx_bytes += skb_len; } _unlock_tx_hashtbl(bond); return assigned_slave;}/*********************** rlb specific functions ***************************/static inline void _lock_rx_hashtbl(struct bonding *bond){ spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));}static inline void _unlock_rx_hashtbl(struct bonding *bond){ spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));}/* when an ARP REPLY is received from a client update its info * in the rx_hashtbl */static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp){ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct rlb_client_info *client_info; u32 hash_index; _lock_rx_hashtbl(bond); hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src)); client_info = &(bond_info->rx_hashtbl[hash_index]); if ((client_info->assigned) && (client_info->ip_src == arp->ip_dst) && (client_info->ip_dst == arp->ip_src)) { /* update the clients MAC address */ memcpy(client_info->mac_dst, arp->mac_src, ETH_ALEN); client_info->ntt = 1; bond_info->rx_ntt = 1; } _unlock_rx_hashtbl(bond);}static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev){ struct bonding *bond = bond_dev->priv; struct arp_pkt *arp = (struct arp_pkt *)skb->data; int res = NET_RX_DROP; if (!(bond_dev->flags & IFF_MASTER)) goto out; if (!arp) { dprintk("Packet has no ARP data\n"); goto out; } if (skb->len < sizeof(struct arp_pkt)) { dprintk("Packet is too small to be an ARP\n"); goto out; } if (arp->op_code == htons(ARPOP_REPLY)) { /* update rx hash table for this ARP */ rlb_update_entry_from_arp(bond, arp); dprintk("Server received an ARP Reply from client\n"); } res = NET_RX_SUCCESS;out: dev_kfree_skb(skb); return res;}/* Caller must hold bond lock for read */static struct slave *rlb_next_rx_slave(struct bonding *bond){ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct slave *rx_slave, *slave, *start_at; int i = 0; if (bond_info->next_rx_slave) { start_at = bond_info->next_rx_slave; } else { start_at = bond->first_slave; } rx_slave = NULL; bond_for_each_slave_from(bond, slave, i, start_at) { if (SLAVE_IS_OK(slave)) { if (!rx_slave) { rx_slave = slave; } else if (slave->speed > rx_slave->speed) { rx_slave = slave; } } } if (rx_slave) { bond_info->next_rx_slave = rx_slave->next; } return rx_slave;}/* teach the switch the mac of a disabled slave * on the primary for fault tolerance * * Caller must hold bond->curr_slave_lock for write or bond lock for write */static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[]){ if (!bond->curr_active_slave) { return; } if (!bond->alb_info.primary_is_promisc) { bond->alb_info.primary_is_promisc = 1; dev_set_promiscuity(bond->curr_active_slave->dev, 1); } bond->alb_info.rlb_promisc_timeout_counter = 0; alb_send_learning_packets(bond->curr_active_slave, addr);}/* slave being removed should not be active at this point * * Caller must hold bond lock for read */static void rlb_clear_slave(struct bonding *bond, struct slave *slave){ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct rlb_client_info *rx_hash_table; u32 index, next_index; /* clear slave from rx_hashtbl */ _lock_rx_hashtbl(bond); rx_hash_table = bond_info->rx_hashtbl; index = bond_info->rx_hashtbl_head; for (; index != RLB_NULL_INDEX; index = next_index) { next_index = rx_hash_table[index].next; if (rx_hash_table[index].slave == slave) { struct slave *assigned_slave = rlb_next_rx_slave(bond); if (assigned_slave) { rx_hash_table[index].slave = assigned_slave; if (memcmp(rx_hash_table[index].mac_dst, mac_bcast, ETH_ALEN)) { bond_info->rx_hashtbl[index].ntt = 1; bond_info->rx_ntt = 1; /* A slave has been removed from the * table because it is either disabled * or being released. We must retry the * update to avoid clients from not * being updated & disconnecting when * there is stress */ bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY; } } else { /* there is no active slave */ rx_hash_table[index].slave = NULL; } } } _unlock_rx_hashtbl(bond); write_lock(&bond->curr_slave_lock); if (slave != bond->curr_active_slave) { rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr); } write_unlock(&bond->curr_slave_lock);}static void rlb_update_client(struct rlb_client_info *client_info){ int i; if (!client_info->slave) { return; } for (i = 0; i < RLB_ARP_BURST_SIZE; i++) { struct sk_buff *skb; skb = arp_create(ARPOP_REPLY, ETH_P_ARP, client_info->ip_dst, client_info->slave->dev, client_info->ip_src, client_info->mac_dst, client_info->slave->dev->dev_addr, client_info->mac_dst); if (!skb) { printk(KERN_ERR DRV_NAME ": Error: failed to create an ARP packet\n"); continue; } skb->dev = client_info->slave->dev; if (client_info->tag) { skb = vlan_put_tag(skb, client_info->vlan_id); if (!skb) { printk(KERN_ERR DRV_NAME ": Error: failed to insert VLAN tag\n"); continue; } } arp_xmit(skb); }}/* sends ARP REPLIES that update the clients that need updating */static void rlb_update_rx_clients(struct bonding *bond){ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct rlb_client_info *client_info; u32 hash_index; _lock_rx_hashtbl(bond); hash_index = bond_info->rx_hashtbl_head; for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { client_info = &(bond_info->rx_hashtbl[hash_index]); if (client_info->ntt) { rlb_update_client(client_info); if (bond_info->rlb_update_retry_counter == 0) { client_info->ntt = 0; } } } /* do not update the entries again untill this counter is zero so that * not to confuse the clients. */ bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY; _unlock_rx_hashtbl(bond);}/* The slave was assigned a new mac address - update the clients */static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -