iseries_veth.c
来自「linux 内核源代码」· C语言 代码 · 共 1,730 行 · 第 1/3 页
C
1,730 行
/* File veth.c created by Kyle A. Lucke on Mon Aug 7 2000. *//* * IBM eServer iSeries Virtual Ethernet Device Driver * Copyright (C) 2001 Kyle A. Lucke (klucke@us.ibm.com), IBM Corp. * Substantially cleaned up by: * Copyright (C) 2003 David Gibson <dwg@au1.ibm.com>, IBM Corporation. * Copyright (C) 2004-2005 Michael Ellerman, IBM Corporation. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA * * * This module implements the virtual ethernet device for iSeries LPAR * Linux. It uses hypervisor message passing to implement an * ethernet-like network device communicating between partitions on * the iSeries. * * The iSeries LPAR hypervisor currently allows for up to 16 different * virtual ethernets. These are all dynamically configurable on * OS/400 partitions, but dynamic configuration is not supported under * Linux yet. An ethXX network device will be created for each * virtual ethernet this partition is connected to. * * - This driver is responsible for routing packets to and from other * partitions. The MAC addresses used by the virtual ethernets * contains meaning and must not be modified. * * - Having 2 virtual ethernets to the same remote partition DOES NOT * double the available bandwidth. The 2 devices will share the * available hypervisor bandwidth. * * - If you send a packet to your own mac address, it will just be * dropped, you won't get it on the receive side. * * - Multicast is implemented by sending the frame frame to every * other partition. It is the responsibility of the receiving * partition to filter the addresses desired. * * Tunable parameters: * * VETH_NUMBUFFERS: This compile time option defaults to 120. It * controls how much memory Linux will allocate per remote partition * it is communicating with. It can be thought of as the maximum * number of packets outstanding to a remote partition at a time. */#include <linux/module.h>#include <linux/types.h>#include <linux/errno.h>#include <linux/ioport.h>#include <linux/kernel.h>#include <linux/netdevice.h>#include <linux/etherdevice.h>#include <linux/skbuff.h>#include <linux/init.h>#include <linux/delay.h>#include <linux/mm.h>#include <linux/ethtool.h>#include <linux/if_ether.h>#include <asm/abs_addr.h>#include <asm/iseries/mf.h>#include <asm/uaccess.h>#include <asm/firmware.h>#include <asm/iseries/hv_lp_config.h>#include <asm/iseries/hv_types.h>#include <asm/iseries/hv_lp_event.h>#include <asm/iommu.h>#include <asm/vio.h>#undef DEBUGMODULE_AUTHOR("Kyle Lucke <klucke@us.ibm.com>");MODULE_DESCRIPTION("iSeries Virtual ethernet driver");MODULE_LICENSE("GPL");#define VETH_EVENT_CAP (0)#define VETH_EVENT_FRAMES (1)#define VETH_EVENT_MONITOR (2)#define VETH_EVENT_FRAMES_ACK (3)#define VETH_MAX_ACKS_PER_MSG (20)#define VETH_MAX_FRAMES_PER_MSG (6)struct veth_frames_data { u32 addr[VETH_MAX_FRAMES_PER_MSG]; u16 len[VETH_MAX_FRAMES_PER_MSG]; u32 eofmask;};#define VETH_EOF_SHIFT (32-VETH_MAX_FRAMES_PER_MSG)struct veth_frames_ack_data { u16 token[VETH_MAX_ACKS_PER_MSG];};struct veth_cap_data { u8 caps_version; u8 rsvd1; u16 num_buffers; u16 ack_threshold; u16 rsvd2; u32 ack_timeout; u32 rsvd3; u64 rsvd4[3];};struct veth_lpevent { struct HvLpEvent base_event; union { struct veth_cap_data caps_data; struct veth_frames_data frames_data; struct veth_frames_ack_data frames_ack_data; } u;};#define DRV_NAME "iseries_veth"#define DRV_VERSION "2.0"#define VETH_NUMBUFFERS (120)#define VETH_ACKTIMEOUT (1000000) /* microseconds */#define VETH_MAX_MCAST (12)#define VETH_MAX_MTU (9000)#if VETH_NUMBUFFERS < 10#define ACK_THRESHOLD (1)#elif VETH_NUMBUFFERS < 20#define ACK_THRESHOLD (4)#elif VETH_NUMBUFFERS < 40#define ACK_THRESHOLD (10)#else#define ACK_THRESHOLD (20)#endif#define VETH_STATE_SHUTDOWN (0x0001)#define VETH_STATE_OPEN (0x0002)#define VETH_STATE_RESET (0x0004)#define VETH_STATE_SENTMON (0x0008)#define VETH_STATE_SENTCAPS (0x0010)#define VETH_STATE_GOTCAPACK (0x0020)#define VETH_STATE_GOTCAPS (0x0040)#define VETH_STATE_SENTCAPACK (0x0080)#define VETH_STATE_READY (0x0100)struct veth_msg { struct veth_msg *next; struct veth_frames_data data; int token; int in_use; struct sk_buff *skb; struct device *dev;};struct veth_lpar_connection { HvLpIndex remote_lp; struct delayed_work statemachine_wq; struct veth_msg *msgs; int num_events; struct veth_cap_data local_caps; struct kobject kobject; struct timer_list ack_timer; struct timer_list reset_timer; unsigned int reset_timeout; unsigned long last_contact; int outstanding_tx; spinlock_t lock; unsigned long state; HvLpInstanceId src_inst; HvLpInstanceId dst_inst; struct veth_lpevent cap_event, cap_ack_event; u16 pending_acks[VETH_MAX_ACKS_PER_MSG]; u32 num_pending_acks; int num_ack_events; struct veth_cap_data remote_caps; u32 ack_timeout; struct veth_msg *msg_stack_head;};struct veth_port { struct device *dev; u64 mac_addr; HvLpIndexMap lpar_map; /* queue_lock protects the stopped_map and dev's queue. */ spinlock_t queue_lock; HvLpIndexMap stopped_map; /* mcast_gate protects promiscuous, num_mcast & mcast_addr. */ rwlock_t mcast_gate; int promiscuous; int num_mcast; u64 mcast_addr[VETH_MAX_MCAST]; struct kobject kobject;};static HvLpIndex this_lp;static struct veth_lpar_connection *veth_cnx[HVMAXARCHITECTEDLPS]; /* = 0 */static struct net_device *veth_dev[HVMAXARCHITECTEDVIRTUALLANS]; /* = 0 */static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev);static void veth_recycle_msg(struct veth_lpar_connection *, struct veth_msg *);static void veth_wake_queues(struct veth_lpar_connection *cnx);static void veth_stop_queues(struct veth_lpar_connection *cnx);static void veth_receive(struct veth_lpar_connection *, struct veth_lpevent *);static void veth_release_connection(struct kobject *kobject);static void veth_timed_ack(unsigned long ptr);static void veth_timed_reset(unsigned long ptr);/* * Utility functions */#define veth_info(fmt, args...) \ printk(KERN_INFO DRV_NAME ": " fmt, ## args)#define veth_error(fmt, args...) \ printk(KERN_ERR DRV_NAME ": Error: " fmt, ## args)#ifdef DEBUG#define veth_debug(fmt, args...) \ printk(KERN_DEBUG DRV_NAME ": " fmt, ## args)#else#define veth_debug(fmt, args...) do {} while (0)#endif/* You must hold the connection's lock when you call this function. */static inline void veth_stack_push(struct veth_lpar_connection *cnx, struct veth_msg *msg){ msg->next = cnx->msg_stack_head; cnx->msg_stack_head = msg;}/* You must hold the connection's lock when you call this function. */static inline struct veth_msg *veth_stack_pop(struct veth_lpar_connection *cnx){ struct veth_msg *msg; msg = cnx->msg_stack_head; if (msg) cnx->msg_stack_head = cnx->msg_stack_head->next; return msg;}/* You must hold the connection's lock when you call this function. */static inline int veth_stack_is_empty(struct veth_lpar_connection *cnx){ return cnx->msg_stack_head == NULL;}static inline HvLpEvent_Rcveth_signalevent(struct veth_lpar_connection *cnx, u16 subtype, HvLpEvent_AckInd ackind, HvLpEvent_AckType acktype, u64 token, u64 data1, u64 data2, u64 data3, u64 data4, u64 data5){ return HvCallEvent_signalLpEventFast(cnx->remote_lp, HvLpEvent_Type_VirtualLan, subtype, ackind, acktype, cnx->src_inst, cnx->dst_inst, token, data1, data2, data3, data4, data5);}static inline HvLpEvent_Rc veth_signaldata(struct veth_lpar_connection *cnx, u16 subtype, u64 token, void *data){ u64 *p = (u64 *) data; return veth_signalevent(cnx, subtype, HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck, token, p[0], p[1], p[2], p[3], p[4]);}struct veth_allocation { struct completion c; int num;};static void veth_complete_allocation(void *parm, int number){ struct veth_allocation *vc = (struct veth_allocation *)parm; vc->num = number; complete(&vc->c);}static int veth_allocate_events(HvLpIndex rlp, int number){ struct veth_allocation vc = { COMPLETION_INITIALIZER(vc.c), 0 }; mf_allocate_lp_events(rlp, HvLpEvent_Type_VirtualLan, sizeof(struct veth_lpevent), number, &veth_complete_allocation, &vc); wait_for_completion(&vc.c); return vc.num;}/* * sysfs support */struct veth_cnx_attribute { struct attribute attr; ssize_t (*show)(struct veth_lpar_connection *, char *buf); ssize_t (*store)(struct veth_lpar_connection *, const char *buf);};static ssize_t veth_cnx_attribute_show(struct kobject *kobj, struct attribute *attr, char *buf){ struct veth_cnx_attribute *cnx_attr; struct veth_lpar_connection *cnx; cnx_attr = container_of(attr, struct veth_cnx_attribute, attr); cnx = container_of(kobj, struct veth_lpar_connection, kobject); if (!cnx_attr->show) return -EIO; return cnx_attr->show(cnx, buf);}#define CUSTOM_CNX_ATTR(_name, _format, _expression) \static ssize_t _name##_show(struct veth_lpar_connection *cnx, char *buf)\{ \ return sprintf(buf, _format, _expression); \} \struct veth_cnx_attribute veth_cnx_attr_##_name = __ATTR_RO(_name)#define SIMPLE_CNX_ATTR(_name) \ CUSTOM_CNX_ATTR(_name, "%lu\n", (unsigned long)cnx->_name)SIMPLE_CNX_ATTR(outstanding_tx);SIMPLE_CNX_ATTR(remote_lp);SIMPLE_CNX_ATTR(num_events);SIMPLE_CNX_ATTR(src_inst);SIMPLE_CNX_ATTR(dst_inst);SIMPLE_CNX_ATTR(num_pending_acks);SIMPLE_CNX_ATTR(num_ack_events);CUSTOM_CNX_ATTR(ack_timeout, "%d\n", jiffies_to_msecs(cnx->ack_timeout));CUSTOM_CNX_ATTR(reset_timeout, "%d\n", jiffies_to_msecs(cnx->reset_timeout));CUSTOM_CNX_ATTR(state, "0x%.4lX\n", cnx->state);CUSTOM_CNX_ATTR(last_contact, "%d\n", cnx->last_contact ? jiffies_to_msecs(jiffies - cnx->last_contact) : 0);#define GET_CNX_ATTR(_name) (&veth_cnx_attr_##_name.attr)static struct attribute *veth_cnx_default_attrs[] = { GET_CNX_ATTR(outstanding_tx), GET_CNX_ATTR(remote_lp), GET_CNX_ATTR(num_events), GET_CNX_ATTR(reset_timeout), GET_CNX_ATTR(last_contact), GET_CNX_ATTR(state), GET_CNX_ATTR(src_inst), GET_CNX_ATTR(dst_inst), GET_CNX_ATTR(num_pending_acks), GET_CNX_ATTR(num_ack_events), GET_CNX_ATTR(ack_timeout), NULL};static struct sysfs_ops veth_cnx_sysfs_ops = { .show = veth_cnx_attribute_show};static struct kobj_type veth_lpar_connection_ktype = { .release = veth_release_connection, .sysfs_ops = &veth_cnx_sysfs_ops, .default_attrs = veth_cnx_default_attrs};struct veth_port_attribute { struct attribute attr; ssize_t (*show)(struct veth_port *, char *buf); ssize_t (*store)(struct veth_port *, const char *buf);};static ssize_t veth_port_attribute_show(struct kobject *kobj, struct attribute *attr, char *buf){ struct veth_port_attribute *port_attr; struct veth_port *port; port_attr = container_of(attr, struct veth_port_attribute, attr); port = container_of(kobj, struct veth_port, kobject); if (!port_attr->show) return -EIO; return port_attr->show(port, buf);}#define CUSTOM_PORT_ATTR(_name, _format, _expression) \static ssize_t _name##_show(struct veth_port *port, char *buf) \{ \ return sprintf(buf, _format, _expression); \} \struct veth_port_attribute veth_port_attr_##_name = __ATTR_RO(_name)#define SIMPLE_PORT_ATTR(_name) \ CUSTOM_PORT_ATTR(_name, "%lu\n", (unsigned long)port->_name)SIMPLE_PORT_ATTR(promiscuous);SIMPLE_PORT_ATTR(num_mcast);CUSTOM_PORT_ATTR(lpar_map, "0x%X\n", port->lpar_map);CUSTOM_PORT_ATTR(stopped_map, "0x%X\n", port->stopped_map);CUSTOM_PORT_ATTR(mac_addr, "0x%lX\n", port->mac_addr);#define GET_PORT_ATTR(_name) (&veth_port_attr_##_name.attr)static struct attribute *veth_port_default_attrs[] = { GET_PORT_ATTR(mac_addr), GET_PORT_ATTR(lpar_map), GET_PORT_ATTR(stopped_map), GET_PORT_ATTR(promiscuous), GET_PORT_ATTR(num_mcast), NULL};static struct sysfs_ops veth_port_sysfs_ops = { .show = veth_port_attribute_show};static struct kobj_type veth_port_ktype = { .sysfs_ops = &veth_port_sysfs_ops, .default_attrs = veth_port_default_attrs};/* * LPAR connection code */static inline void veth_kick_statemachine(struct veth_lpar_connection *cnx){ schedule_delayed_work(&cnx->statemachine_wq, 0);}static void veth_take_cap(struct veth_lpar_connection *cnx, struct veth_lpevent *event){ unsigned long flags; spin_lock_irqsave(&cnx->lock, flags); /* Receiving caps may mean the other end has just come up, so * we need to reload the instance ID of the far end */ cnx->dst_inst = HvCallEvent_getTargetLpInstanceId(cnx->remote_lp, HvLpEvent_Type_VirtualLan); if (cnx->state & VETH_STATE_GOTCAPS) { veth_error("Received a second capabilities from LPAR %d.\n", cnx->remote_lp); event->base_event.xRc = HvLpEvent_Rc_BufferNotAvailable; HvCallEvent_ackLpEvent((struct HvLpEvent *) event); } else { memcpy(&cnx->cap_event, event, sizeof(cnx->cap_event)); cnx->state |= VETH_STATE_GOTCAPS; veth_kick_statemachine(cnx); } spin_unlock_irqrestore(&cnx->lock, flags);}static void veth_take_cap_ack(struct veth_lpar_connection *cnx, struct veth_lpevent *event){ unsigned long flags; spin_lock_irqsave(&cnx->lock, flags); if (cnx->state & VETH_STATE_GOTCAPACK) { veth_error("Received a second capabilities ack from LPAR %d.\n", cnx->remote_lp); } else { memcpy(&cnx->cap_ack_event, event, sizeof(&cnx->cap_ack_event)); cnx->state |= VETH_STATE_GOTCAPACK; veth_kick_statemachine(cnx); } spin_unlock_irqrestore(&cnx->lock, flags);}static void veth_take_monitor_ack(struct veth_lpar_connection *cnx, struct veth_lpevent *event){ unsigned long flags; spin_lock_irqsave(&cnx->lock, flags); veth_debug("cnx %d: lost connection.\n", cnx->remote_lp); /* Avoid kicking the statemachine once we're shutdown. * It's unnecessary and it could break veth_stop_connection(). */ if (! (cnx->state & VETH_STATE_SHUTDOWN)) { cnx->state |= VETH_STATE_RESET; veth_kick_statemachine(cnx); } spin_unlock_irqrestore(&cnx->lock, flags);}static void veth_handle_ack(struct veth_lpevent *event){ HvLpIndex rlp = event->base_event.xTargetLp; struct veth_lpar_connection *cnx = veth_cnx[rlp]; BUG_ON(! cnx); switch (event->base_event.xSubtype) { case VETH_EVENT_CAP: veth_take_cap_ack(cnx, event); break; case VETH_EVENT_MONITOR: veth_take_monitor_ack(cnx, event); break; default: veth_error("Unknown ack type %d from LPAR %d.\n", event->base_event.xSubtype, rlp); };}static void veth_handle_int(struct veth_lpevent *event){ HvLpIndex rlp = event->base_event.xSourceLp; struct veth_lpar_connection *cnx = veth_cnx[rlp]; unsigned long flags; int i, acked = 0; BUG_ON(! cnx); switch (event->base_event.xSubtype) { case VETH_EVENT_CAP: veth_take_cap(cnx, event); break; case VETH_EVENT_MONITOR: /* do nothing... this'll hang out here til we're dead, * and the hypervisor will return it for us. */ break; case VETH_EVENT_FRAMES_ACK: spin_lock_irqsave(&cnx->lock, flags); for (i = 0; i < VETH_MAX_ACKS_PER_MSG; ++i) { u16 msgnum = event->u.frames_ack_data.token[i]; if (msgnum < VETH_NUMBUFFERS) { veth_recycle_msg(cnx, cnx->msgs + msgnum); cnx->outstanding_tx--; acked++; } } if (acked > 0) { cnx->last_contact = jiffies; veth_wake_queues(cnx); } spin_unlock_irqrestore(&cnx->lock, flags);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?