iseries_veth.c

来自「linux 内核源代码」· C语言 代码 · 共 1,730 行 · 第 1/3 页

C
1,730
字号
/* File veth.c created by Kyle A. Lucke on Mon Aug  7 2000. *//* * IBM eServer iSeries Virtual Ethernet Device Driver * Copyright (C) 2001 Kyle A. Lucke (klucke@us.ibm.com), IBM Corp. * Substantially cleaned up by: * Copyright (C) 2003 David Gibson <dwg@au1.ibm.com>, IBM Corporation. * Copyright (C) 2004-2005 Michael Ellerman, IBM Corporation. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA * * * This module implements the virtual ethernet device for iSeries LPAR * Linux.  It uses hypervisor message passing to implement an * ethernet-like network device communicating between partitions on * the iSeries. * * The iSeries LPAR hypervisor currently allows for up to 16 different * virtual ethernets.  These are all dynamically configurable on * OS/400 partitions, but dynamic configuration is not supported under * Linux yet.  An ethXX network device will be created for each * virtual ethernet this partition is connected to. * * - This driver is responsible for routing packets to and from other *   partitions.  The MAC addresses used by the virtual ethernets *   contains meaning and must not be modified. * * - Having 2 virtual ethernets to the same remote partition DOES NOT *   double the available bandwidth.  The 2 devices will share the *   available hypervisor bandwidth. * * - If you send a packet to your own mac address, it will just be *   dropped, you won't get it on the receive side. * * - Multicast is implemented by sending the frame frame to every *   other partition.  It is the responsibility of the receiving *   partition to filter the addresses desired. * * Tunable parameters: * * VETH_NUMBUFFERS: This compile time option defaults to 120.  It * controls how much memory Linux will allocate per remote partition * it is communicating with.  It can be thought of as the maximum * number of packets outstanding to a remote partition at a time. */#include <linux/module.h>#include <linux/types.h>#include <linux/errno.h>#include <linux/ioport.h>#include <linux/kernel.h>#include <linux/netdevice.h>#include <linux/etherdevice.h>#include <linux/skbuff.h>#include <linux/init.h>#include <linux/delay.h>#include <linux/mm.h>#include <linux/ethtool.h>#include <linux/if_ether.h>#include <asm/abs_addr.h>#include <asm/iseries/mf.h>#include <asm/uaccess.h>#include <asm/firmware.h>#include <asm/iseries/hv_lp_config.h>#include <asm/iseries/hv_types.h>#include <asm/iseries/hv_lp_event.h>#include <asm/iommu.h>#include <asm/vio.h>#undef DEBUGMODULE_AUTHOR("Kyle Lucke <klucke@us.ibm.com>");MODULE_DESCRIPTION("iSeries Virtual ethernet driver");MODULE_LICENSE("GPL");#define VETH_EVENT_CAP	(0)#define VETH_EVENT_FRAMES	(1)#define VETH_EVENT_MONITOR	(2)#define VETH_EVENT_FRAMES_ACK	(3)#define VETH_MAX_ACKS_PER_MSG	(20)#define VETH_MAX_FRAMES_PER_MSG	(6)struct veth_frames_data {	u32 addr[VETH_MAX_FRAMES_PER_MSG];	u16 len[VETH_MAX_FRAMES_PER_MSG];	u32 eofmask;};#define VETH_EOF_SHIFT		(32-VETH_MAX_FRAMES_PER_MSG)struct veth_frames_ack_data {	u16 token[VETH_MAX_ACKS_PER_MSG];};struct veth_cap_data {	u8 caps_version;	u8 rsvd1;	u16 num_buffers;	u16 ack_threshold;	u16 rsvd2;	u32 ack_timeout;	u32 rsvd3;	u64 rsvd4[3];};struct veth_lpevent {	struct HvLpEvent base_event;	union {		struct veth_cap_data caps_data;		struct veth_frames_data frames_data;		struct veth_frames_ack_data frames_ack_data;	} u;};#define DRV_NAME	"iseries_veth"#define DRV_VERSION	"2.0"#define VETH_NUMBUFFERS		(120)#define VETH_ACKTIMEOUT 	(1000000) /* microseconds */#define VETH_MAX_MCAST		(12)#define VETH_MAX_MTU		(9000)#if VETH_NUMBUFFERS < 10#define ACK_THRESHOLD 		(1)#elif VETH_NUMBUFFERS < 20#define ACK_THRESHOLD 		(4)#elif VETH_NUMBUFFERS < 40#define ACK_THRESHOLD 		(10)#else#define ACK_THRESHOLD 		(20)#endif#define	VETH_STATE_SHUTDOWN	(0x0001)#define VETH_STATE_OPEN		(0x0002)#define VETH_STATE_RESET	(0x0004)#define VETH_STATE_SENTMON	(0x0008)#define VETH_STATE_SENTCAPS	(0x0010)#define VETH_STATE_GOTCAPACK	(0x0020)#define VETH_STATE_GOTCAPS	(0x0040)#define VETH_STATE_SENTCAPACK	(0x0080)#define VETH_STATE_READY	(0x0100)struct veth_msg {	struct veth_msg *next;	struct veth_frames_data data;	int token;	int in_use;	struct sk_buff *skb;	struct device *dev;};struct veth_lpar_connection {	HvLpIndex remote_lp;	struct delayed_work statemachine_wq;	struct veth_msg *msgs;	int num_events;	struct veth_cap_data local_caps;	struct kobject kobject;	struct timer_list ack_timer;	struct timer_list reset_timer;	unsigned int reset_timeout;	unsigned long last_contact;	int outstanding_tx;	spinlock_t lock;	unsigned long state;	HvLpInstanceId src_inst;	HvLpInstanceId dst_inst;	struct veth_lpevent cap_event, cap_ack_event;	u16 pending_acks[VETH_MAX_ACKS_PER_MSG];	u32 num_pending_acks;	int num_ack_events;	struct veth_cap_data remote_caps;	u32 ack_timeout;	struct veth_msg *msg_stack_head;};struct veth_port {	struct device *dev;	u64 mac_addr;	HvLpIndexMap lpar_map;	/* queue_lock protects the stopped_map and dev's queue. */	spinlock_t queue_lock;	HvLpIndexMap stopped_map;	/* mcast_gate protects promiscuous, num_mcast & mcast_addr. */	rwlock_t mcast_gate;	int promiscuous;	int num_mcast;	u64 mcast_addr[VETH_MAX_MCAST];	struct kobject kobject;};static HvLpIndex this_lp;static struct veth_lpar_connection *veth_cnx[HVMAXARCHITECTEDLPS]; /* = 0 */static struct net_device *veth_dev[HVMAXARCHITECTEDVIRTUALLANS]; /* = 0 */static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev);static void veth_recycle_msg(struct veth_lpar_connection *, struct veth_msg *);static void veth_wake_queues(struct veth_lpar_connection *cnx);static void veth_stop_queues(struct veth_lpar_connection *cnx);static void veth_receive(struct veth_lpar_connection *, struct veth_lpevent *);static void veth_release_connection(struct kobject *kobject);static void veth_timed_ack(unsigned long ptr);static void veth_timed_reset(unsigned long ptr);/* * Utility functions */#define veth_info(fmt, args...) \	printk(KERN_INFO DRV_NAME ": " fmt, ## args)#define veth_error(fmt, args...) \	printk(KERN_ERR DRV_NAME ": Error: " fmt, ## args)#ifdef DEBUG#define veth_debug(fmt, args...) \	printk(KERN_DEBUG DRV_NAME ": " fmt, ## args)#else#define veth_debug(fmt, args...) do {} while (0)#endif/* You must hold the connection's lock when you call this function. */static inline void veth_stack_push(struct veth_lpar_connection *cnx,				   struct veth_msg *msg){	msg->next = cnx->msg_stack_head;	cnx->msg_stack_head = msg;}/* You must hold the connection's lock when you call this function. */static inline struct veth_msg *veth_stack_pop(struct veth_lpar_connection *cnx){	struct veth_msg *msg;	msg = cnx->msg_stack_head;	if (msg)		cnx->msg_stack_head = cnx->msg_stack_head->next;	return msg;}/* You must hold the connection's lock when you call this function. */static inline int veth_stack_is_empty(struct veth_lpar_connection *cnx){	return cnx->msg_stack_head == NULL;}static inline HvLpEvent_Rcveth_signalevent(struct veth_lpar_connection *cnx, u16 subtype,		 HvLpEvent_AckInd ackind, HvLpEvent_AckType acktype,		 u64 token,		 u64 data1, u64 data2, u64 data3, u64 data4, u64 data5){	return HvCallEvent_signalLpEventFast(cnx->remote_lp,					     HvLpEvent_Type_VirtualLan,					     subtype, ackind, acktype,					     cnx->src_inst,					     cnx->dst_inst,					     token, data1, data2, data3,					     data4, data5);}static inline HvLpEvent_Rc veth_signaldata(struct veth_lpar_connection *cnx,					   u16 subtype, u64 token, void *data){	u64 *p = (u64 *) data;	return veth_signalevent(cnx, subtype, HvLpEvent_AckInd_NoAck,				HvLpEvent_AckType_ImmediateAck,				token, p[0], p[1], p[2], p[3], p[4]);}struct veth_allocation {	struct completion c;	int num;};static void veth_complete_allocation(void *parm, int number){	struct veth_allocation *vc = (struct veth_allocation *)parm;	vc->num = number;	complete(&vc->c);}static int veth_allocate_events(HvLpIndex rlp, int number){	struct veth_allocation vc = { COMPLETION_INITIALIZER(vc.c), 0 };	mf_allocate_lp_events(rlp, HvLpEvent_Type_VirtualLan,			    sizeof(struct veth_lpevent), number,			    &veth_complete_allocation, &vc);	wait_for_completion(&vc.c);	return vc.num;}/* * sysfs support */struct veth_cnx_attribute {	struct attribute attr;	ssize_t (*show)(struct veth_lpar_connection *, char *buf);	ssize_t (*store)(struct veth_lpar_connection *, const char *buf);};static ssize_t veth_cnx_attribute_show(struct kobject *kobj,		struct attribute *attr, char *buf){	struct veth_cnx_attribute *cnx_attr;	struct veth_lpar_connection *cnx;	cnx_attr = container_of(attr, struct veth_cnx_attribute, attr);	cnx = container_of(kobj, struct veth_lpar_connection, kobject);	if (!cnx_attr->show)		return -EIO;	return cnx_attr->show(cnx, buf);}#define CUSTOM_CNX_ATTR(_name, _format, _expression)			\static ssize_t _name##_show(struct veth_lpar_connection *cnx, char *buf)\{									\	return sprintf(buf, _format, _expression);			\}									\struct veth_cnx_attribute veth_cnx_attr_##_name = __ATTR_RO(_name)#define SIMPLE_CNX_ATTR(_name)	\	CUSTOM_CNX_ATTR(_name, "%lu\n", (unsigned long)cnx->_name)SIMPLE_CNX_ATTR(outstanding_tx);SIMPLE_CNX_ATTR(remote_lp);SIMPLE_CNX_ATTR(num_events);SIMPLE_CNX_ATTR(src_inst);SIMPLE_CNX_ATTR(dst_inst);SIMPLE_CNX_ATTR(num_pending_acks);SIMPLE_CNX_ATTR(num_ack_events);CUSTOM_CNX_ATTR(ack_timeout, "%d\n", jiffies_to_msecs(cnx->ack_timeout));CUSTOM_CNX_ATTR(reset_timeout, "%d\n", jiffies_to_msecs(cnx->reset_timeout));CUSTOM_CNX_ATTR(state, "0x%.4lX\n", cnx->state);CUSTOM_CNX_ATTR(last_contact, "%d\n", cnx->last_contact ?		jiffies_to_msecs(jiffies - cnx->last_contact) : 0);#define GET_CNX_ATTR(_name)	(&veth_cnx_attr_##_name.attr)static struct attribute *veth_cnx_default_attrs[] = {	GET_CNX_ATTR(outstanding_tx),	GET_CNX_ATTR(remote_lp),	GET_CNX_ATTR(num_events),	GET_CNX_ATTR(reset_timeout),	GET_CNX_ATTR(last_contact),	GET_CNX_ATTR(state),	GET_CNX_ATTR(src_inst),	GET_CNX_ATTR(dst_inst),	GET_CNX_ATTR(num_pending_acks),	GET_CNX_ATTR(num_ack_events),	GET_CNX_ATTR(ack_timeout),	NULL};static struct sysfs_ops veth_cnx_sysfs_ops = {		.show = veth_cnx_attribute_show};static struct kobj_type veth_lpar_connection_ktype = {	.release	= veth_release_connection,	.sysfs_ops	= &veth_cnx_sysfs_ops,	.default_attrs	= veth_cnx_default_attrs};struct veth_port_attribute {	struct attribute attr;	ssize_t (*show)(struct veth_port *, char *buf);	ssize_t (*store)(struct veth_port *, const char *buf);};static ssize_t veth_port_attribute_show(struct kobject *kobj,		struct attribute *attr, char *buf){	struct veth_port_attribute *port_attr;	struct veth_port *port;	port_attr = container_of(attr, struct veth_port_attribute, attr);	port = container_of(kobj, struct veth_port, kobject);	if (!port_attr->show)		return -EIO;	return port_attr->show(port, buf);}#define CUSTOM_PORT_ATTR(_name, _format, _expression)			\static ssize_t _name##_show(struct veth_port *port, char *buf)		\{									\	return sprintf(buf, _format, _expression);			\}									\struct veth_port_attribute veth_port_attr_##_name = __ATTR_RO(_name)#define SIMPLE_PORT_ATTR(_name)	\	CUSTOM_PORT_ATTR(_name, "%lu\n", (unsigned long)port->_name)SIMPLE_PORT_ATTR(promiscuous);SIMPLE_PORT_ATTR(num_mcast);CUSTOM_PORT_ATTR(lpar_map, "0x%X\n", port->lpar_map);CUSTOM_PORT_ATTR(stopped_map, "0x%X\n", port->stopped_map);CUSTOM_PORT_ATTR(mac_addr, "0x%lX\n", port->mac_addr);#define GET_PORT_ATTR(_name)	(&veth_port_attr_##_name.attr)static struct attribute *veth_port_default_attrs[] = {	GET_PORT_ATTR(mac_addr),	GET_PORT_ATTR(lpar_map),	GET_PORT_ATTR(stopped_map),	GET_PORT_ATTR(promiscuous),	GET_PORT_ATTR(num_mcast),	NULL};static struct sysfs_ops veth_port_sysfs_ops = {	.show = veth_port_attribute_show};static struct kobj_type veth_port_ktype = {	.sysfs_ops	= &veth_port_sysfs_ops,	.default_attrs	= veth_port_default_attrs};/* * LPAR connection code */static inline void veth_kick_statemachine(struct veth_lpar_connection *cnx){	schedule_delayed_work(&cnx->statemachine_wq, 0);}static void veth_take_cap(struct veth_lpar_connection *cnx,			  struct veth_lpevent *event){	unsigned long flags;	spin_lock_irqsave(&cnx->lock, flags);	/* Receiving caps may mean the other end has just come up, so	 * we need to reload the instance ID of the far end */	cnx->dst_inst =		HvCallEvent_getTargetLpInstanceId(cnx->remote_lp,						  HvLpEvent_Type_VirtualLan);	if (cnx->state & VETH_STATE_GOTCAPS) {		veth_error("Received a second capabilities from LPAR %d.\n",			   cnx->remote_lp);		event->base_event.xRc = HvLpEvent_Rc_BufferNotAvailable;		HvCallEvent_ackLpEvent((struct HvLpEvent *) event);	} else {		memcpy(&cnx->cap_event, event, sizeof(cnx->cap_event));		cnx->state |= VETH_STATE_GOTCAPS;		veth_kick_statemachine(cnx);	}	spin_unlock_irqrestore(&cnx->lock, flags);}static void veth_take_cap_ack(struct veth_lpar_connection *cnx,			      struct veth_lpevent *event){	unsigned long flags;	spin_lock_irqsave(&cnx->lock, flags);	if (cnx->state & VETH_STATE_GOTCAPACK) {		veth_error("Received a second capabilities ack from LPAR %d.\n",			   cnx->remote_lp);	} else {		memcpy(&cnx->cap_ack_event, event,		       sizeof(&cnx->cap_ack_event));		cnx->state |= VETH_STATE_GOTCAPACK;		veth_kick_statemachine(cnx);	}	spin_unlock_irqrestore(&cnx->lock, flags);}static void veth_take_monitor_ack(struct veth_lpar_connection *cnx,				  struct veth_lpevent *event){	unsigned long flags;	spin_lock_irqsave(&cnx->lock, flags);	veth_debug("cnx %d: lost connection.\n", cnx->remote_lp);	/* Avoid kicking the statemachine once we're shutdown.	 * It's unnecessary and it could break veth_stop_connection(). */	if (! (cnx->state & VETH_STATE_SHUTDOWN)) {		cnx->state |= VETH_STATE_RESET;		veth_kick_statemachine(cnx);	}	spin_unlock_irqrestore(&cnx->lock, flags);}static void veth_handle_ack(struct veth_lpevent *event){	HvLpIndex rlp = event->base_event.xTargetLp;	struct veth_lpar_connection *cnx = veth_cnx[rlp];	BUG_ON(! cnx);	switch (event->base_event.xSubtype) {	case VETH_EVENT_CAP:		veth_take_cap_ack(cnx, event);		break;	case VETH_EVENT_MONITOR:		veth_take_monitor_ack(cnx, event);		break;	default:		veth_error("Unknown ack type %d from LPAR %d.\n",				event->base_event.xSubtype, rlp);	};}static void veth_handle_int(struct veth_lpevent *event){	HvLpIndex rlp = event->base_event.xSourceLp;	struct veth_lpar_connection *cnx = veth_cnx[rlp];	unsigned long flags;	int i, acked = 0;	BUG_ON(! cnx);	switch (event->base_event.xSubtype) {	case VETH_EVENT_CAP:		veth_take_cap(cnx, event);		break;	case VETH_EVENT_MONITOR:		/* do nothing... this'll hang out here til we're dead,		 * and the hypervisor will return it for us. */		break;	case VETH_EVENT_FRAMES_ACK:		spin_lock_irqsave(&cnx->lock, flags);		for (i = 0; i < VETH_MAX_ACKS_PER_MSG; ++i) {			u16 msgnum = event->u.frames_ack_data.token[i];			if (msgnum < VETH_NUMBUFFERS) {				veth_recycle_msg(cnx, cnx->msgs + msgnum);				cnx->outstanding_tx--;				acked++;			}		}		if (acked > 0) {			cnx->last_contact = jiffies;			veth_wake_queues(cnx);		}		spin_unlock_irqrestore(&cnx->lock, flags);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?