📄 xen-netfront.c
字号:
/* * Virtual network driver for conversing with remote driver backends. * * Copyright (c) 2002-2005, K A Fraser * Copyright (c) 2005, XenSource Ltd * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation; or, when distributed * separately from the Linux kernel or incorporated into other * software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */#include <linux/module.h>#include <linux/kernel.h>#include <linux/netdevice.h>#include <linux/etherdevice.h>#include <linux/skbuff.h>#include <linux/ethtool.h>#include <linux/if_ether.h>#include <linux/tcp.h>#include <linux/udp.h>#include <linux/moduleparam.h>#include <linux/mm.h>#include <net/ip.h>#include <xen/xenbus.h>#include <xen/events.h>#include <xen/page.h>#include <xen/grant_table.h>#include <xen/interface/io/netif.h>#include <xen/interface/memory.h>#include <xen/interface/grant_table.h>static struct ethtool_ops xennet_ethtool_ops;struct netfront_cb { struct page *page; unsigned offset;};#define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb))#define RX_COPY_THRESHOLD 256#define GRANT_INVALID_REF 0#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)struct netfront_info { struct list_head list; struct net_device *netdev; struct napi_struct napi; unsigned int evtchn; struct xenbus_device *xbdev; spinlock_t tx_lock; struct xen_netif_tx_front_ring tx; int tx_ring_ref; /* * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries * are linked from tx_skb_freelist through skb_entry.link. * * NB. Freelist index entries are always going to be less than * PAGE_OFFSET, whereas pointers to skbs will always be equal or * greater than PAGE_OFFSET: we use this property to distinguish * them. */ union skb_entry { struct sk_buff *skb; unsigned link; } tx_skbs[NET_TX_RING_SIZE]; grant_ref_t gref_tx_head; grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; unsigned tx_skb_freelist; spinlock_t rx_lock ____cacheline_aligned_in_smp; struct xen_netif_rx_front_ring rx; int rx_ring_ref; /* Receive-ring batched refills. */#define RX_MIN_TARGET 8#define RX_DFL_MIN_TARGET 64#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) unsigned rx_min_target, rx_max_target, rx_target; struct sk_buff_head rx_batch; struct timer_list rx_refill_timer; struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; grant_ref_t gref_rx_head; grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; unsigned long rx_pfn_array[NET_RX_RING_SIZE]; struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; struct mmu_update rx_mmu[NET_RX_RING_SIZE];};struct netfront_rx_info { struct xen_netif_rx_response rx; struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];};/* * Access macros for acquiring freeing slots in tx_skbs[]. */static void add_id_to_freelist(unsigned *head, union skb_entry *list, unsigned short id){ list[id].link = *head; *head = id;}static unsigned short get_id_from_freelist(unsigned *head, union skb_entry *list){ unsigned int id = *head; *head = list[id].link; return id;}static int xennet_rxidx(RING_IDX idx){ return idx & (NET_RX_RING_SIZE - 1);}static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np, RING_IDX ri){ int i = xennet_rxidx(ri); struct sk_buff *skb = np->rx_skbs[i]; np->rx_skbs[i] = NULL; return skb;}static grant_ref_t xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri){ int i = xennet_rxidx(ri); grant_ref_t ref = np->grant_rx_ref[i]; np->grant_rx_ref[i] = GRANT_INVALID_REF; return ref;}#ifdef CONFIG_SYSFSstatic int xennet_sysfs_addif(struct net_device *netdev);static void xennet_sysfs_delif(struct net_device *netdev);#else /* !CONFIG_SYSFS */#define xennet_sysfs_addif(dev) (0)#define xennet_sysfs_delif(dev) do { } while (0)#endifstatic int xennet_can_sg(struct net_device *dev){ return dev->features & NETIF_F_SG;}static void rx_refill_timeout(unsigned long data){ struct net_device *dev = (struct net_device *)data; struct netfront_info *np = netdev_priv(dev); netif_rx_schedule(dev, &np->napi);}static int netfront_tx_slot_available(struct netfront_info *np){ return ((np->tx.req_prod_pvt - np->tx.rsp_cons) < (TX_MAX_TARGET - MAX_SKB_FRAGS - 2));}static void xennet_maybe_wake_tx(struct net_device *dev){ struct netfront_info *np = netdev_priv(dev); if (unlikely(netif_queue_stopped(dev)) && netfront_tx_slot_available(np) && likely(netif_running(dev))) netif_wake_queue(dev);}static void xennet_alloc_rx_buffers(struct net_device *dev){ unsigned short id; struct netfront_info *np = netdev_priv(dev); struct sk_buff *skb; struct page *page; int i, batch_target, notify; RING_IDX req_prod = np->rx.req_prod_pvt; grant_ref_t ref; unsigned long pfn; void *vaddr; struct xen_netif_rx_request *req; if (unlikely(!netif_carrier_ok(dev))) return; /* * Allocate skbuffs greedily, even though we batch updates to the * receive ring. This creates a less bursty demand on the memory * allocator, so should reduce the chance of failed allocation requests * both for ourself and for other kernel subsystems. */ batch_target = np->rx_target - (req_prod - np->rx.rsp_cons); for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) { skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) goto no_skb; page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); if (!page) { kfree_skb(skb);no_skb: /* Any skbuffs queued for refill? Force them out. */ if (i != 0) goto refill; /* Could not allocate any skbuffs. Try again later. */ mod_timer(&np->rx_refill_timer, jiffies + (HZ/10)); break; } skb_shinfo(skb)->frags[0].page = page; skb_shinfo(skb)->nr_frags = 1; __skb_queue_tail(&np->rx_batch, skb); } /* Is the batch large enough to be worthwhile? */ if (i < (np->rx_target/2)) { if (req_prod > np->rx.sring->req_prod) goto push; return; } /* Adjust our fill target if we risked running out of buffers. */ if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) && ((np->rx_target *= 2) > np->rx_max_target)) np->rx_target = np->rx_max_target; refill: for (i = 0; ; i++) { skb = __skb_dequeue(&np->rx_batch); if (skb == NULL) break; skb->dev = dev; id = xennet_rxidx(req_prod + i); BUG_ON(np->rx_skbs[id]); np->rx_skbs[id] = skb; ref = gnttab_claim_grant_reference(&np->gref_rx_head); BUG_ON((signed short)ref < 0); np->grant_rx_ref[id] = ref; pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page); vaddr = page_address(skb_shinfo(skb)->frags[0].page); req = RING_GET_REQUEST(&np->rx, req_prod + i); gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, pfn_to_mfn(pfn), 0); req->id = id; req->gref = ref; } wmb(); /* barrier so backend seens requests */ /* Above is a suitable barrier to ensure backend will see requests. */ np->rx.req_prod_pvt = req_prod + i; push: RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify); if (notify) notify_remote_via_irq(np->netdev->irq);}static int xennet_open(struct net_device *dev){ struct netfront_info *np = netdev_priv(dev); napi_enable(&np->napi); spin_lock_bh(&np->rx_lock); if (netif_carrier_ok(dev)) { xennet_alloc_rx_buffers(dev); np->rx.sring->rsp_event = np->rx.rsp_cons + 1; if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) netif_rx_schedule(dev, &np->napi); } spin_unlock_bh(&np->rx_lock); xennet_maybe_wake_tx(dev); return 0;}static void xennet_tx_buf_gc(struct net_device *dev){ RING_IDX cons, prod; unsigned short id; struct netfront_info *np = netdev_priv(dev); struct sk_buff *skb; BUG_ON(!netif_carrier_ok(dev)); do { prod = np->tx.sring->rsp_prod; rmb(); /* Ensure we see responses up to 'rp'. */ for (cons = np->tx.rsp_cons; cons != prod; cons++) { struct xen_netif_tx_response *txrsp; txrsp = RING_GET_RESPONSE(&np->tx, cons); if (txrsp->status == NETIF_RSP_NULL) continue; id = txrsp->id; skb = np->tx_skbs[id].skb; if (unlikely(gnttab_query_foreign_access( np->grant_tx_ref[id]) != 0)) { printk(KERN_ALERT "xennet_tx_buf_gc: warning " "-- grant still in use by backend " "domain.\n"); BUG(); } gnttab_end_foreign_access_ref( np->grant_tx_ref[id], GNTMAP_readonly); gnttab_release_grant_reference( &np->gref_tx_head, np->grant_tx_ref[id]); np->grant_tx_ref[id] = GRANT_INVALID_REF; add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id); dev_kfree_skb_irq(skb); } np->tx.rsp_cons = prod; /* * Set a new event, then check for race with update of tx_cons. * Note that it is essential to schedule a callback, no matter * how few buffers are pending. Even if there is space in the * transmit ring, higher layers may be blocked because too much * data is outstanding: in such cases notification from Xen is * likely to be the only kick that we'll get. */ np->tx.sring->rsp_event = prod + ((np->tx.sring->req_prod - prod) >> 1) + 1; mb(); /* update shared area */ } while ((cons == prod) && (prod != np->tx.sring->rsp_prod)); xennet_maybe_wake_tx(dev);}static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, struct xen_netif_tx_request *tx){ struct netfront_info *np = netdev_priv(dev); char *data = skb->data; unsigned long mfn; RING_IDX prod = np->tx.req_prod_pvt; int frags = skb_shinfo(skb)->nr_frags; unsigned int offset = offset_in_page(data); unsigned int len = skb_headlen(skb); unsigned int id; grant_ref_t ref; int i; /* While the header overlaps a page boundary (including being larger than a page), split it it into page-sized chunks. */ while (len > PAGE_SIZE - offset) { tx->size = PAGE_SIZE - offset; tx->flags |= NETTXF_more_data; len -= tx->size; data += tx->size; offset = 0; id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); np->tx_skbs[id].skb = skb_get(skb); tx = RING_GET_REQUEST(&np->tx, prod++); tx->id = id; ref = gnttab_claim_grant_reference(&np->gref_tx_head); BUG_ON((signed short)ref < 0); mfn = virt_to_mfn(data); gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); tx->gref = np->grant_tx_ref[id] = ref; tx->offset = offset; tx->size = len; tx->flags = 0; } /* Grant backend access to each skb fragment page. */ for (i = 0; i < frags; i++) { skb_frag_t *frag = skb_shinfo(skb)->frags + i; tx->flags |= NETTXF_more_data; id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); np->tx_skbs[id].skb = skb_get(skb); tx = RING_GET_REQUEST(&np->tx, prod++); tx->id = id; ref = gnttab_claim_grant_reference(&np->gref_tx_head); BUG_ON((signed short)ref < 0); mfn = pfn_to_mfn(page_to_pfn(frag->page)); gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); tx->gref = np->grant_tx_ref[id] = ref; tx->offset = frag->page_offset; tx->size = frag->size; tx->flags = 0; } np->tx.req_prod_pvt = prod;}static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev){ unsigned short id; struct netfront_info *np = netdev_priv(dev); struct xen_netif_tx_request *tx; struct xen_netif_extra_info *extra; char *data = skb->data; RING_IDX i; grant_ref_t ref; unsigned long mfn; int notify; int frags = skb_shinfo(skb)->nr_frags; unsigned int offset = offset_in_page(data); unsigned int len = skb_headlen(skb); frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE; if (unlikely(frags > MAX_SKB_FRAGS + 1)) { printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n", frags); dump_stack(); goto drop; } spin_lock_irq(&np->tx_lock); if (unlikely(!netif_carrier_ok(dev) || (frags > 1 && !xennet_can_sg(dev)) || netif_needs_gso(dev, skb))) { spin_unlock_irq(&np->tx_lock); goto drop; } i = np->tx.req_prod_pvt; id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); np->tx_skbs[id].skb = skb; tx = RING_GET_REQUEST(&np->tx, i); tx->id = id; ref = gnttab_claim_grant_reference(&np->gref_tx_head); BUG_ON((signed short)ref < 0); mfn = virt_to_mfn(data); gnttab_grant_foreign_access_ref( ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); tx->gref = np->grant_tx_ref[id] = ref; tx->offset = offset; tx->size = len; extra = NULL; tx->flags = 0; if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ tx->flags |= NETTXF_csum_blank | NETTXF_data_validated; else if (skb->ip_summed == CHECKSUM_UNNECESSARY) /* remote but checksummed. */ tx->flags |= NETTXF_data_validated; if (skb_shinfo(skb)->gso_size) { struct xen_netif_extra_info *gso; gso = (struct xen_netif_extra_info *) RING_GET_REQUEST(&np->tx, ++i); if (extra) extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; else tx->flags |= NETTXF_extra_info; gso->u.gso.size = skb_shinfo(skb)->gso_size; gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; gso->u.gso.pad = 0; gso->u.gso.features = 0; gso->type = XEN_NETIF_EXTRA_TYPE_GSO; gso->flags = 0; extra = gso; } np->tx.req_prod_pvt = i + 1; xennet_make_frags(skb, dev, tx); tx->size = skb->len; RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify); if (notify) notify_remote_via_irq(np->netdev->irq); dev->stats.tx_bytes += skb->len; dev->stats.tx_packets++; /* Note: It is not safe to access skb after xennet_tx_buf_gc()! */ xennet_tx_buf_gc(dev); if (!netfront_tx_slot_available(np)) netif_stop_queue(dev); spin_unlock_irq(&np->tx_lock); return 0; drop: dev->stats.tx_dropped++; dev_kfree_skb(skb); return 0;}static int xennet_close(struct net_device *dev){ struct netfront_info *np = netdev_priv(dev); netif_stop_queue(np->netdev); napi_disable(&np->napi); return 0;}static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb, grant_ref_t ref){ int new = xennet_rxidx(np->rx.req_prod_pvt); BUG_ON(np->rx_skbs[new]); np->rx_skbs[new] = skb; np->grant_rx_ref[new] = ref; RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new; RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref; np->rx.req_prod_pvt++;}static int xennet_get_extras(struct netfront_info *np, struct xen_netif_extra_info *extras, RING_IDX rp){ struct xen_netif_extra_info *extra; struct device *dev = &np->netdev->dev; RING_IDX cons = np->rx.rsp_cons; int err = 0; do { struct sk_buff *skb; grant_ref_t ref; if (unlikely(cons + 1 == rp)) { if (net_ratelimit()) dev_warn(dev, "Missing extra info\n"); err = -EBADR; break; } extra = (struct xen_netif_extra_info *) RING_GET_RESPONSE(&np->rx, ++cons); if (unlikely(!extra->type || extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { if (net_ratelimit()) dev_warn(dev, "Invalid extra type: %d\n", extra->type);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -