📄 xen-netfront.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* * Virtual network driver for conversing with remote driver backends. * * Copyright (c) 2002-2005, K A Fraser * Copyright (c) 2005, XenSource Ltd * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation; or, when distributed * separately from the Linux kernel or incorporated into other * software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */#include <linux/module.h>#include <linux/kernel.h>#include <linux/netdevice.h>#include <linux/etherdevice.h>#include <linux/skbuff.h>#include <linux/ethtool.h>#include <linux/if_ether.h>#include <linux/tcp.h>#include <linux/udp.h>#include <linux/moduleparam.h>#include <linux/mm.h>#include <net/ip.h>#include <xen/xenbus.h>#include <xen/events.h>#include <xen/page.h>#include <xen/grant_table.h>#include <xen/interface/io/netif.h>#include <xen/interface/memory.h>#include <xen/interface/grant_table.h>static struct ethtool_ops xennet_ethtool_ops;struct netfront_cb {	struct page *page;	unsigned offset;};#define NETFRONT_SKB_CB(skb)	((struct netfront_cb *)((skb)->cb))#define RX_COPY_THRESHOLD 256#define GRANT_INVALID_REF	0#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)struct netfront_info {	struct list_head list;	struct net_device *netdev;	struct napi_struct napi;	unsigned int evtchn;	struct xenbus_device *xbdev;	spinlock_t   tx_lock;	struct xen_netif_tx_front_ring tx;	int tx_ring_ref;	/*	 * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries	 * are linked from tx_skb_freelist through skb_entry.link.	 *	 *  NB. Freelist index entries are always going to be less than	 *  PAGE_OFFSET, whereas pointers to skbs will always be equal or	 *  greater than PAGE_OFFSET: we use this property to distinguish	 *  them.	 */	union skb_entry {		struct sk_buff *skb;		unsigned link;	} tx_skbs[NET_TX_RING_SIZE];	grant_ref_t gref_tx_head;	grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];	unsigned tx_skb_freelist;	spinlock_t   rx_lock ____cacheline_aligned_in_smp;	struct xen_netif_rx_front_ring rx;	int rx_ring_ref;	/* Receive-ring batched refills. */#define RX_MIN_TARGET 8#define RX_DFL_MIN_TARGET 64#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)	unsigned rx_min_target, rx_max_target, rx_target;	struct sk_buff_head rx_batch;	struct timer_list rx_refill_timer;	struct sk_buff *rx_skbs[NET_RX_RING_SIZE];	grant_ref_t gref_rx_head;	grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];	unsigned long rx_pfn_array[NET_RX_RING_SIZE];	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];	struct mmu_update rx_mmu[NET_RX_RING_SIZE];};struct netfront_rx_info {	struct xen_netif_rx_response rx;	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];};/* * Access macros for acquiring freeing slots in tx_skbs[]. */static void add_id_to_freelist(unsigned *head, union skb_entry *list,			       unsigned short id){	list[id].link = *head;	*head = id;}static unsigned short get_id_from_freelist(unsigned *head,					   union skb_entry *list){	unsigned int id = *head;	*head = list[id].link;	return id;}static int xennet_rxidx(RING_IDX idx){	return idx & (NET_RX_RING_SIZE - 1);}static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,					 RING_IDX ri){	int i = xennet_rxidx(ri);	struct sk_buff *skb = np->rx_skbs[i];	np->rx_skbs[i] = NULL;	return skb;}static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,					    RING_IDX ri){	int i = xennet_rxidx(ri);	grant_ref_t ref = np->grant_rx_ref[i];	np->grant_rx_ref[i] = GRANT_INVALID_REF;	return ref;}#ifdef CONFIG_SYSFSstatic int xennet_sysfs_addif(struct net_device *netdev);static void xennet_sysfs_delif(struct net_device *netdev);#else /* !CONFIG_SYSFS */#define xennet_sysfs_addif(dev) (0)#define xennet_sysfs_delif(dev) do { } while (0)#endifstatic int xennet_can_sg(struct net_device *dev){	return dev->features & NETIF_F_SG;}static void rx_refill_timeout(unsigned long data){	struct net_device *dev = (struct net_device *)data;	struct netfront_info *np = netdev_priv(dev);	netif_rx_schedule(dev, &np->napi);}static int netfront_tx_slot_available(struct netfront_info *np){	return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <		(TX_MAX_TARGET - MAX_SKB_FRAGS - 2));}static void xennet_maybe_wake_tx(struct net_device *dev){	struct netfront_info *np = netdev_priv(dev);	if (unlikely(netif_queue_stopped(dev)) &&	    netfront_tx_slot_available(np) &&	    likely(netif_running(dev)))		netif_wake_queue(dev);}static void xennet_alloc_rx_buffers(struct net_device *dev){	unsigned short id;	struct netfront_info *np = netdev_priv(dev);	struct sk_buff *skb;	struct page *page;	int i, batch_target, notify;	RING_IDX req_prod = np->rx.req_prod_pvt;	grant_ref_t ref;	unsigned long pfn;	void *vaddr;	struct xen_netif_rx_request *req;	if (unlikely(!netif_carrier_ok(dev)))		return;	/*	 * Allocate skbuffs greedily, even though we batch updates to the	 * receive ring. This creates a less bursty demand on the memory	 * allocator, so should reduce the chance of failed allocation requests	 * both for ourself and for other kernel subsystems.	 */	batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);	for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {		skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD,					 GFP_ATOMIC | __GFP_NOWARN);		if (unlikely(!skb))			goto no_skb;		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);		if (!page) {			kfree_skb(skb);no_skb:			/* Any skbuffs queued for refill? Force them out. */			if (i != 0)				goto refill;			/* Could not allocate any skbuffs. Try again later. */			mod_timer(&np->rx_refill_timer,				  jiffies + (HZ/10));			break;		}		skb_shinfo(skb)->frags[0].page = page;		skb_shinfo(skb)->nr_frags = 1;		__skb_queue_tail(&np->rx_batch, skb);	}	/* Is the batch large enough to be worthwhile? */	if (i < (np->rx_target/2)) {		if (req_prod > np->rx.sring->req_prod)			goto push;		return;	}	/* Adjust our fill target if we risked running out of buffers. */	if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&	    ((np->rx_target *= 2) > np->rx_max_target))		np->rx_target = np->rx_max_target; refill:	for (i = 0; ; i++) {		skb = __skb_dequeue(&np->rx_batch);		if (skb == NULL)			break;		skb->dev = dev;		id = xennet_rxidx(req_prod + i);		BUG_ON(np->rx_skbs[id]);		np->rx_skbs[id] = skb;		ref = gnttab_claim_grant_reference(&np->gref_rx_head);		BUG_ON((signed short)ref < 0);		np->grant_rx_ref[id] = ref;		pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);		vaddr = page_address(skb_shinfo(skb)->frags[0].page);		req = RING_GET_REQUEST(&np->rx, req_prod + i);		gnttab_grant_foreign_access_ref(ref,						np->xbdev->otherend_id,						pfn_to_mfn(pfn),						0);		req->id = id;		req->gref = ref;	}	wmb();		/* barrier so backend seens requests */	/* Above is a suitable barrier to ensure backend will see requests. */	np->rx.req_prod_pvt = req_prod + i; push:	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);	if (notify)		notify_remote_via_irq(np->netdev->irq);}static int xennet_open(struct net_device *dev){	struct netfront_info *np = netdev_priv(dev);	napi_enable(&np->napi);	spin_lock_bh(&np->rx_lock);	if (netif_carrier_ok(dev)) {		xennet_alloc_rx_buffers(dev);		np->rx.sring->rsp_event = np->rx.rsp_cons + 1;		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))			netif_rx_schedule(dev, &np->napi);	}	spin_unlock_bh(&np->rx_lock);	xennet_maybe_wake_tx(dev);	return 0;}static void xennet_tx_buf_gc(struct net_device *dev){	RING_IDX cons, prod;	unsigned short id;	struct netfront_info *np = netdev_priv(dev);	struct sk_buff *skb;	BUG_ON(!netif_carrier_ok(dev));	do {		prod = np->tx.sring->rsp_prod;		rmb(); /* Ensure we see responses up to 'rp'. */		for (cons = np->tx.rsp_cons; cons != prod; cons++) {			struct xen_netif_tx_response *txrsp;			txrsp = RING_GET_RESPONSE(&np->tx, cons);			if (txrsp->status == NETIF_RSP_NULL)				continue;			id  = txrsp->id;			skb = np->tx_skbs[id].skb;			if (unlikely(gnttab_query_foreign_access(				np->grant_tx_ref[id]) != 0)) {				printk(KERN_ALERT "xennet_tx_buf_gc: warning "				       "-- grant still in use by backend "				       "domain.\n");				BUG();			}			gnttab_end_foreign_access_ref(				np->grant_tx_ref[id], GNTMAP_readonly);			gnttab_release_grant_reference(				&np->gref_tx_head, np->grant_tx_ref[id]);			np->grant_tx_ref[id] = GRANT_INVALID_REF;			add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id);			dev_kfree_skb_irq(skb);		}		np->tx.rsp_cons = prod;		/*		 * Set a new event, then check for race with update of tx_cons.		 * Note that it is essential to schedule a callback, no matter		 * how few buffers are pending. Even if there is space in the		 * transmit ring, higher layers may be blocked because too much		 * data is outstanding: in such cases notification from Xen is		 * likely to be the only kick that we'll get.		 */		np->tx.sring->rsp_event =			prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;		mb();		/* update shared area */	} while ((cons == prod) && (prod != np->tx.sring->rsp_prod));	xennet_maybe_wake_tx(dev);}static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,			      struct xen_netif_tx_request *tx){	struct netfront_info *np = netdev_priv(dev);	char *data = skb->data;	unsigned long mfn;	RING_IDX prod = np->tx.req_prod_pvt;	int frags = skb_shinfo(skb)->nr_frags;	unsigned int offset = offset_in_page(data);	unsigned int len = skb_headlen(skb);	unsigned int id;	grant_ref_t ref;	int i;	/* While the header overlaps a page boundary (including being	   larger than a page), split it it into page-sized chunks. */	while (len > PAGE_SIZE - offset) {		tx->size = PAGE_SIZE - offset;		tx->flags |= NETTXF_more_data;		len -= tx->size;		data += tx->size;		offset = 0;		id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);		np->tx_skbs[id].skb = skb_get(skb);		tx = RING_GET_REQUEST(&np->tx, prod++);		tx->id = id;		ref = gnttab_claim_grant_reference(&np->gref_tx_head);		BUG_ON((signed short)ref < 0);		mfn = virt_to_mfn(data);		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,						mfn, GNTMAP_readonly);		tx->gref = np->grant_tx_ref[id] = ref;		tx->offset = offset;		tx->size = len;		tx->flags = 0;	}	/* Grant backend access to each skb fragment page. */	for (i = 0; i < frags; i++) {		skb_frag_t *frag = skb_shinfo(skb)->frags + i;		tx->flags |= NETTXF_more_data;		id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);		np->tx_skbs[id].skb = skb_get(skb);		tx = RING_GET_REQUEST(&np->tx, prod++);		tx->id = id;		ref = gnttab_claim_grant_reference(&np->gref_tx_head);		BUG_ON((signed short)ref < 0);		mfn = pfn_to_mfn(page_to_pfn(frag->page));		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,						mfn, GNTMAP_readonly);		tx->gref = np->grant_tx_ref[id] = ref;		tx->offset = frag->page_offset;		tx->size = frag->size;		tx->flags = 0;	}	np->tx.req_prod_pvt = prod;}static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev){	unsigned short id;	struct netfront_info *np = netdev_priv(dev);	struct xen_netif_tx_request *tx;	struct xen_netif_extra_info *extra;	char *data = skb->data;	RING_IDX i;	grant_ref_t ref;	unsigned long mfn;	int notify;	int frags = skb_shinfo(skb)->nr_frags;	unsigned int offset = offset_in_page(data);	unsigned int len = skb_headlen(skb);	frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;	if (unlikely(frags > MAX_SKB_FRAGS + 1)) {		printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",		       frags);		dump_stack();		goto drop;	}	spin_lock_irq(&np->tx_lock);	if (unlikely(!netif_carrier_ok(dev) ||		     (frags > 1 && !xennet_can_sg(dev)) ||		     netif_needs_gso(dev, skb))) {		spin_unlock_irq(&np->tx_lock);		goto drop;	}	i = np->tx.req_prod_pvt;	id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);	np->tx_skbs[id].skb = skb;	tx = RING_GET_REQUEST(&np->tx, i);	tx->id   = id;	ref = gnttab_claim_grant_reference(&np->gref_tx_head);	BUG_ON((signed short)ref < 0);	mfn = virt_to_mfn(data);	gnttab_grant_foreign_access_ref(		ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);	tx->gref = np->grant_tx_ref[id] = ref;	tx->offset = offset;	tx->size = len;	extra = NULL;	tx->flags = 0;	if (skb->ip_summed == CHECKSUM_PARTIAL)		/* local packet? */		tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;	else if (skb->ip_summed == CHECKSUM_UNNECESSARY)		/* remote but checksummed. */		tx->flags |= NETTXF_data_validated;	if (skb_shinfo(skb)->gso_size) {		struct xen_netif_extra_info *gso;		gso = (struct xen_netif_extra_info *)			RING_GET_REQUEST(&np->tx, ++i);		if (extra)			extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;		else			tx->flags |= NETTXF_extra_info;		gso->u.gso.size = skb_shinfo(skb)->gso_size;		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;		gso->u.gso.pad = 0;		gso->u.gso.features = 0;		gso->type = XEN_NETIF_EXTRA_TYPE_GSO;		gso->flags = 0;		extra = gso;	}	np->tx.req_prod_pvt = i + 1;	xennet_make_frags(skb, dev, tx);	tx->size = skb->len;	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);	if (notify)		notify_remote_via_irq(np->netdev->irq);	dev->stats.tx_bytes += skb->len;	dev->stats.tx_packets++;	/* Note: It is not safe to access skb after xennet_tx_buf_gc()! */	xennet_tx_buf_gc(dev);	if (!netfront_tx_slot_available(np))		netif_stop_queue(dev);	spin_unlock_irq(&np->tx_lock);	return 0; drop:	dev->stats.tx_dropped++;	dev_kfree_skb(skb);	return 0;}static int xennet_close(struct net_device *dev){	struct netfront_info *np = netdev_priv(dev);	netif_stop_queue(np->netdev);	napi_disable(&np->napi);	return 0;}static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,				grant_ref_t ref){	int new = xennet_rxidx(np->rx.req_prod_pvt);	BUG_ON(np->rx_skbs[new]);	np->rx_skbs[new] = skb;	np->grant_rx_ref[new] = ref;	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;	np->rx.req_prod_pvt++;}static int xennet_get_extras(struct netfront_info *np,			     struct xen_netif_extra_info *extras,			     RING_IDX rp){	struct xen_netif_extra_info *extra;	struct device *dev = &np->netdev->dev;	RING_IDX cons = np->rx.rsp_cons;	int err = 0;	do {		struct sk_buff *skb;		grant_ref_t ref;		if (unlikely(cons + 1 == rp)) {			if (net_ratelimit())				dev_warn(dev, "Missing extra info\n");			err = -EBADR;			break;		}		extra = (struct xen_netif_extra_info *)			RING_GET_RESPONSE(&np->rx, ++cons);		if (unlikely(!extra->type ||			     extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {			if (net_ratelimit())				dev_warn(dev, "Invalid extra type: %d\n",					extra->type);
12 3 下一页
💿 文件大小 57701 K
👤 上传用户 huanzhudev
📂 所属分类网络
🏷️ 相关标签

#linux #内核 #源代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -