iseries_veth.c

来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 1,424 行 · 第 1/3 页

C
1,424
字号
/* File veth.c created by Kyle A. Lucke on Mon Aug  7 2000. *//* * IBM eServer iSeries Virtual Ethernet Device Driver * Copyright (C) 2001 Kyle A. Lucke (klucke@us.ibm.com), IBM Corp. * Substantially cleaned up by: * Copyright (C) 2003 David Gibson <dwg@au1.ibm.com>, IBM Corporation. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA * * * This module implements the virtual ethernet device for iSeries LPAR * Linux.  It uses hypervisor message passing to implement an * ethernet-like network device communicating between partitions on * the iSeries. * * The iSeries LPAR hypervisor currently allows for up to 16 different * virtual ethernets.  These are all dynamically configurable on * OS/400 partitions, but dynamic configuration is not supported under * Linux yet.  An ethXX network device will be created for each * virtual ethernet this partition is connected to. * * - This driver is responsible for routing packets to and from other *   partitions.  The MAC addresses used by the virtual ethernets *   contains meaning and must not be modified. * * - Having 2 virtual ethernets to the same remote partition DOES NOT *   double the available bandwidth.  The 2 devices will share the *   available hypervisor bandwidth. * * - If you send a packet to your own mac address, it will just be *   dropped, you won't get it on the receive side. * * - Multicast is implemented by sending the frame frame to every *   other partition.  It is the responsibility of the receiving *   partition to filter the addresses desired. * * Tunable parameters: * * VETH_NUMBUFFERS: This compile time option defaults to 120.  It * controls how much memory Linux will allocate per remote partition * it is communicating with.  It can be thought of as the maximum * number of packets outstanding to a remote partition at a time. */#include <linux/config.h>#include <linux/module.h>#include <linux/version.h>#include <linux/types.h>#include <linux/errno.h>#include <linux/ioport.h>#include <linux/kernel.h>#include <linux/netdevice.h>#include <linux/etherdevice.h>#include <linux/skbuff.h>#include <linux/init.h>#include <linux/delay.h>#include <linux/mm.h>#include <linux/ethtool.h>#include <asm/iSeries/mf.h>#include <asm/iSeries/iSeries_pci.h>#include <asm/uaccess.h>#include <asm/iSeries/HvLpConfig.h>#include <asm/iSeries/HvTypes.h>#include <asm/iSeries/HvLpEvent.h>#include <asm/iommu.h>#include <asm/vio.h>#include "iseries_veth.h"MODULE_AUTHOR("Kyle Lucke <klucke@us.ibm.com>");MODULE_DESCRIPTION("iSeries Virtual ethernet driver");MODULE_LICENSE("GPL");#define VETH_NUMBUFFERS		(120)#define VETH_ACKTIMEOUT 	(1000000) /* microseconds */#define VETH_MAX_MCAST		(12)#define VETH_MAX_MTU		(9000)#if VETH_NUMBUFFERS < 10#define ACK_THRESHOLD 		(1)#elif VETH_NUMBUFFERS < 20#define ACK_THRESHOLD 		(4)#elif VETH_NUMBUFFERS < 40#define ACK_THRESHOLD 		(10)#else#define ACK_THRESHOLD 		(20)#endif#define	VETH_STATE_SHUTDOWN	(0x0001)#define VETH_STATE_OPEN		(0x0002)#define VETH_STATE_RESET	(0x0004)#define VETH_STATE_SENTMON	(0x0008)#define VETH_STATE_SENTCAPS	(0x0010)#define VETH_STATE_GOTCAPACK	(0x0020)#define VETH_STATE_GOTCAPS	(0x0040)#define VETH_STATE_SENTCAPACK	(0x0080)#define VETH_STATE_READY	(0x0100)struct veth_msg {	struct veth_msg *next;	struct VethFramesData data;	int token;	unsigned long in_use;	struct sk_buff *skb;	struct device *dev;};struct veth_lpar_connection {	HvLpIndex remote_lp;	struct work_struct statemachine_wq;	struct veth_msg *msgs;	int num_events;	struct VethCapData local_caps;	struct timer_list ack_timer;	spinlock_t lock;	unsigned long state;	HvLpInstanceId src_inst;	HvLpInstanceId dst_inst;	struct VethLpEvent cap_event, cap_ack_event;	u16 pending_acks[VETH_MAX_ACKS_PER_MSG];	u32 num_pending_acks;	int num_ack_events;	struct VethCapData remote_caps;	u32 ack_timeout;	spinlock_t msg_stack_lock;	struct veth_msg *msg_stack_head;};struct veth_port {	struct device *dev;	struct net_device_stats stats;	u64 mac_addr;	HvLpIndexMap lpar_map;	spinlock_t pending_gate;	struct sk_buff *pending_skb;	HvLpIndexMap pending_lpmask;	rwlock_t mcast_gate;	int promiscuous;	int all_mcast;	int num_mcast;	u64 mcast_addr[VETH_MAX_MCAST];};static HvLpIndex this_lp;static struct veth_lpar_connection *veth_cnx[HVMAXARCHITECTEDLPS]; /* = 0 */static struct net_device *veth_dev[HVMAXARCHITECTEDVIRTUALLANS]; /* = 0 */static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev);static void veth_recycle_msg(struct veth_lpar_connection *, struct veth_msg *);static void veth_flush_pending(struct veth_lpar_connection *cnx);static void veth_receive(struct veth_lpar_connection *, struct VethLpEvent *);static void veth_timed_ack(unsigned long connectionPtr);/* * Utility functions */#define veth_printk(prio, fmt, args...) \	printk(prio "%s: " fmt, __FILE__, ## args)#define veth_error(fmt, args...) \	printk(KERN_ERR "(%s:%3.3d) ERROR: " fmt, __FILE__, __LINE__ , ## args)static inline void veth_stack_push(struct veth_lpar_connection *cnx,				   struct veth_msg *msg){	unsigned long flags;	spin_lock_irqsave(&cnx->msg_stack_lock, flags);	msg->next = cnx->msg_stack_head;	cnx->msg_stack_head = msg;	spin_unlock_irqrestore(&cnx->msg_stack_lock, flags);}static inline struct veth_msg *veth_stack_pop(struct veth_lpar_connection *cnx){	unsigned long flags;	struct veth_msg *msg;	spin_lock_irqsave(&cnx->msg_stack_lock, flags);	msg = cnx->msg_stack_head;	if (msg)		cnx->msg_stack_head = cnx->msg_stack_head->next;	spin_unlock_irqrestore(&cnx->msg_stack_lock, flags);	return msg;}static inline HvLpEvent_Rcveth_signalevent(struct veth_lpar_connection *cnx, u16 subtype,		 HvLpEvent_AckInd ackind, HvLpEvent_AckType acktype,		 u64 token,		 u64 data1, u64 data2, u64 data3, u64 data4, u64 data5){	return HvCallEvent_signalLpEventFast(cnx->remote_lp,					     HvLpEvent_Type_VirtualLan,					     subtype, ackind, acktype,					     cnx->src_inst,					     cnx->dst_inst,					     token, data1, data2, data3,					     data4, data5);}static inline HvLpEvent_Rc veth_signaldata(struct veth_lpar_connection *cnx,					   u16 subtype, u64 token, void *data){	u64 *p = (u64 *) data;	return veth_signalevent(cnx, subtype, HvLpEvent_AckInd_NoAck,				HvLpEvent_AckType_ImmediateAck,				token, p[0], p[1], p[2], p[3], p[4]);}struct veth_allocation {	struct completion c;	int num;};static void veth_complete_allocation(void *parm, int number){	struct veth_allocation *vc = (struct veth_allocation *)parm;	vc->num = number;	complete(&vc->c);}static int veth_allocate_events(HvLpIndex rlp, int number){	struct veth_allocation vc = { COMPLETION_INITIALIZER(vc.c), 0 };	mf_allocateLpEvents(rlp, HvLpEvent_Type_VirtualLan,			    sizeof(struct VethLpEvent), number,			    &veth_complete_allocation, &vc);	wait_for_completion(&vc.c);	return vc.num;}/* * LPAR connection code */static inline void veth_kick_statemachine(struct veth_lpar_connection *cnx){	schedule_work(&cnx->statemachine_wq);}static void veth_take_cap(struct veth_lpar_connection *cnx,			  struct VethLpEvent *event){	unsigned long flags;	spin_lock_irqsave(&cnx->lock, flags);	/* Receiving caps may mean the other end has just come up, so	 * we need to reload the instance ID of the far end */	cnx->dst_inst =		HvCallEvent_getTargetLpInstanceId(cnx->remote_lp,						  HvLpEvent_Type_VirtualLan);	if (cnx->state & VETH_STATE_GOTCAPS) {		veth_error("Received a second capabilities from lpar %d\n",			   cnx->remote_lp);		event->base_event.xRc = HvLpEvent_Rc_BufferNotAvailable;		HvCallEvent_ackLpEvent((struct HvLpEvent *) event);	} else {		memcpy(&cnx->cap_event, event, sizeof(cnx->cap_event));		cnx->state |= VETH_STATE_GOTCAPS;		veth_kick_statemachine(cnx);	}	spin_unlock_irqrestore(&cnx->lock, flags);}static void veth_take_cap_ack(struct veth_lpar_connection *cnx,			      struct VethLpEvent *event){	unsigned long flags;	spin_lock_irqsave(&cnx->lock, flags);	if (cnx->state & VETH_STATE_GOTCAPACK) {		veth_error("Received a second capabilities ack from lpar %d\n",			   cnx->remote_lp);	} else {		memcpy(&cnx->cap_ack_event, event,		       sizeof(&cnx->cap_ack_event));		cnx->state |= VETH_STATE_GOTCAPACK;		veth_kick_statemachine(cnx);	}	spin_unlock_irqrestore(&cnx->lock, flags);}static void veth_take_monitor_ack(struct veth_lpar_connection *cnx,				  struct VethLpEvent *event){	unsigned long flags;	spin_lock_irqsave(&cnx->lock, flags);	veth_printk(KERN_DEBUG, "Monitor ack returned for lpar %d\n",		    cnx->remote_lp);	cnx->state |= VETH_STATE_RESET;	veth_kick_statemachine(cnx);	spin_unlock_irqrestore(&cnx->lock, flags);}static void veth_handle_ack(struct VethLpEvent *event){	HvLpIndex rlp = event->base_event.xTargetLp;	struct veth_lpar_connection *cnx = veth_cnx[rlp];	BUG_ON(! cnx);	switch (event->base_event.xSubtype) {	case VethEventTypeCap:		veth_take_cap_ack(cnx, event);		break;	case VethEventTypeMonitor:		veth_take_monitor_ack(cnx, event);		break;	default:		veth_error("Unknown ack type %d from lpar %d\n",			   event->base_event.xSubtype, rlp);	};}static void veth_handle_int(struct VethLpEvent *event){	HvLpIndex rlp = event->base_event.xSourceLp;	struct veth_lpar_connection *cnx = veth_cnx[rlp];	unsigned long flags;	int i;	BUG_ON(! cnx);	switch (event->base_event.xSubtype) {	case VethEventTypeCap:		veth_take_cap(cnx, event);		break;	case VethEventTypeMonitor:		/* do nothing... this'll hang out here til we're dead,		 * and the hypervisor will return it for us. */		break;	case VethEventTypeFramesAck:		spin_lock_irqsave(&cnx->lock, flags);		for (i = 0; i < VETH_MAX_ACKS_PER_MSG; ++i) {			u16 msgnum = event->u.frames_ack_data.token[i];			if (msgnum < VETH_NUMBUFFERS)				veth_recycle_msg(cnx, cnx->msgs + msgnum);		}		spin_unlock_irqrestore(&cnx->lock, flags);		veth_flush_pending(cnx);		break;	case VethEventTypeFrames:		veth_receive(cnx, event);		break;	default:		veth_error("Unknown interrupt type %d from lpar %d\n",			   event->base_event.xSubtype, rlp);	};}static void veth_handle_event(struct HvLpEvent *event, struct pt_regs *regs){	struct VethLpEvent *veth_event = (struct VethLpEvent *)event;	if (event->xFlags.xFunction == HvLpEvent_Function_Ack)		veth_handle_ack(veth_event);	else if (event->xFlags.xFunction == HvLpEvent_Function_Int)		veth_handle_int(veth_event);}static int veth_process_caps(struct veth_lpar_connection *cnx){	struct VethCapData *remote_caps = &cnx->remote_caps;	int num_acks_needed;	/* Convert timer to jiffies */	cnx->ack_timeout = remote_caps->ack_timeout * HZ / 1000000;	if ( (remote_caps->num_buffers == 0)	     || (remote_caps->ack_threshold > VETH_MAX_ACKS_PER_MSG)	     || (remote_caps->ack_threshold == 0)	     || (cnx->ack_timeout == 0) ) {		veth_error("Received incompatible capabilities from lpar %d\n",			   cnx->remote_lp);		return HvLpEvent_Rc_InvalidSubtypeData;	}	num_acks_needed = (remote_caps->num_buffers			   / remote_caps->ack_threshold) + 1;	/* FIXME: locking on num_ack_events? */	if (cnx->num_ack_events < num_acks_needed) {		int num;		num = veth_allocate_events(cnx->remote_lp,					   num_acks_needed-cnx->num_ack_events);		if (num > 0)			cnx->num_ack_events += num;		if (cnx->num_ack_events < num_acks_needed) {			veth_error("Couldn't allocate enough ack events for lpar %d\n",				   cnx->remote_lp);			return HvLpEvent_Rc_BufferNotAvailable;		}	}	return HvLpEvent_Rc_Good;}/* FIXME: The gotos here are a bit dubious */static void veth_statemachine(void *p){	struct veth_lpar_connection *cnx = (struct veth_lpar_connection *)p;	int rlp = cnx->remote_lp;	int rc;	spin_lock_irq(&cnx->lock); restart:	if (cnx->state & VETH_STATE_RESET) {		int i;		del_timer(&cnx->ack_timer);		if (cnx->state & VETH_STATE_OPEN)			HvCallEvent_closeLpEventPath(cnx->remote_lp,						     HvLpEvent_Type_VirtualLan);		/* reset ack data */		memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks));		cnx->num_pending_acks = 0;		cnx->state &= ~(VETH_STATE_RESET | VETH_STATE_SENTMON				| VETH_STATE_OPEN | VETH_STATE_SENTCAPS				| VETH_STATE_GOTCAPACK | VETH_STATE_GOTCAPS				| VETH_STATE_SENTCAPACK | VETH_STATE_READY);		/* Clean up any leftover messages */		if (cnx->msgs)			for (i = 0; i < VETH_NUMBUFFERS; ++i)				veth_recycle_msg(cnx, cnx->msgs + i);		spin_unlock_irq(&cnx->lock);		veth_flush_pending(cnx);		spin_lock_irq(&cnx->lock);		if (cnx->state & VETH_STATE_RESET)			goto restart;	}	if (cnx->state & VETH_STATE_SHUTDOWN)		/* It's all over, do nothing */		goto out;	if ( !(cnx->state & VETH_STATE_OPEN) ) {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?