📄 xen-netfront.c
字号:
err = -EINVAL; } else { memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); } skb = xennet_get_rx_skb(np, cons); ref = xennet_get_rx_ref(np, cons); xennet_move_rx_slot(np, skb, ref); } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); np->rx.rsp_cons = cons; return err;}static int xennet_get_responses(struct netfront_info *np, struct netfront_rx_info *rinfo, RING_IDX rp, struct sk_buff_head *list){ struct xen_netif_rx_response *rx = &rinfo->rx; struct xen_netif_extra_info *extras = rinfo->extras; struct device *dev = &np->netdev->dev; RING_IDX cons = np->rx.rsp_cons; struct sk_buff *skb = xennet_get_rx_skb(np, cons); grant_ref_t ref = xennet_get_rx_ref(np, cons); int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD); int frags = 1; int err = 0; unsigned long ret; if (rx->flags & NETRXF_extra_info) { err = xennet_get_extras(np, extras, rp); cons = np->rx.rsp_cons; } for (;;) { if (unlikely(rx->status < 0 || rx->offset + rx->status > PAGE_SIZE)) { if (net_ratelimit()) dev_warn(dev, "rx->offset: %x, size: %u\n", rx->offset, rx->status); xennet_move_rx_slot(np, skb, ref); err = -EINVAL; goto next; } /* * This definitely indicates a bug, either in this driver or in * the backend driver. In future this should flag the bad * situation to the system controller to reboot the backed. */ if (ref == GRANT_INVALID_REF) { if (net_ratelimit()) dev_warn(dev, "Bad rx response id %d.\n", rx->id); err = -EINVAL; goto next; } ret = gnttab_end_foreign_access_ref(ref, 0); BUG_ON(!ret); gnttab_release_grant_reference(&np->gref_rx_head, ref); __skb_queue_tail(list, skb);next: if (!(rx->flags & NETRXF_more_data)) break; if (cons + frags == rp) { if (net_ratelimit()) dev_warn(dev, "Need more frags\n"); err = -ENOENT; break; } rx = RING_GET_RESPONSE(&np->rx, cons + frags); skb = xennet_get_rx_skb(np, cons + frags); ref = xennet_get_rx_ref(np, cons + frags); frags++; } if (unlikely(frags > max)) { if (net_ratelimit()) dev_warn(dev, "Too many frags\n"); err = -E2BIG; } if (unlikely(err)) np->rx.rsp_cons = cons + frags; return err;}static int xennet_set_skb_gso(struct sk_buff *skb, struct xen_netif_extra_info *gso){ if (!gso->u.gso.size) { if (net_ratelimit()) printk(KERN_WARNING "GSO size must not be zero.\n"); return -EINVAL; } /* Currently only TCPv4 S.O. is supported. */ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { if (net_ratelimit()) printk(KERN_WARNING "Bad GSO type %d.\n", gso->u.gso.type); return -EINVAL; } skb_shinfo(skb)->gso_size = gso->u.gso.size; skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; /* Header must be checked, and gso_segs computed. */ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; skb_shinfo(skb)->gso_segs = 0; return 0;}static RING_IDX xennet_fill_frags(struct netfront_info *np, struct sk_buff *skb, struct sk_buff_head *list){ struct skb_shared_info *shinfo = skb_shinfo(skb); int nr_frags = shinfo->nr_frags; RING_IDX cons = np->rx.rsp_cons; skb_frag_t *frag = shinfo->frags + nr_frags; struct sk_buff *nskb; while ((nskb = __skb_dequeue(list))) { struct xen_netif_rx_response *rx = RING_GET_RESPONSE(&np->rx, ++cons); frag->page = skb_shinfo(nskb)->frags[0].page; frag->page_offset = rx->offset; frag->size = rx->status; skb->data_len += rx->status; skb_shinfo(nskb)->nr_frags = 0; kfree_skb(nskb); frag++; nr_frags++; } shinfo->nr_frags = nr_frags; return cons;}static int skb_checksum_setup(struct sk_buff *skb){ struct iphdr *iph; unsigned char *th; int err = -EPROTO; if (skb->protocol != htons(ETH_P_IP)) goto out; iph = (void *)skb->data; th = skb->data + 4 * iph->ihl; if (th >= skb_tail_pointer(skb)) goto out; skb->csum_start = th - skb->head; switch (iph->protocol) { case IPPROTO_TCP: skb->csum_offset = offsetof(struct tcphdr, check); break; case IPPROTO_UDP: skb->csum_offset = offsetof(struct udphdr, check); break; default: if (net_ratelimit()) printk(KERN_ERR "Attempting to checksum a non-" "TCP/UDP packet, dropping a protocol" " %d packet", iph->protocol); goto out; } if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) goto out; err = 0;out: return err;}static int handle_incoming_queue(struct net_device *dev, struct sk_buff_head *rxq){ int packets_dropped = 0; struct sk_buff *skb; while ((skb = __skb_dequeue(rxq)) != NULL) { struct page *page = NETFRONT_SKB_CB(skb)->page; void *vaddr = page_address(page); unsigned offset = NETFRONT_SKB_CB(skb)->offset; memcpy(skb->data, vaddr + offset, skb_headlen(skb)); if (page != skb_shinfo(skb)->frags[0].page) __free_page(page); /* Ethernet work: Delayed to here as it peeks the header. */ skb->protocol = eth_type_trans(skb, dev); if (skb->ip_summed == CHECKSUM_PARTIAL) { if (skb_checksum_setup(skb)) { kfree_skb(skb); packets_dropped++; dev->stats.rx_errors++; continue; } } dev->stats.rx_packets++; dev->stats.rx_bytes += skb->len; /* Pass it up. */ netif_receive_skb(skb); dev->last_rx = jiffies; } return packets_dropped;}static int xennet_poll(struct napi_struct *napi, int budget){ struct netfront_info *np = container_of(napi, struct netfront_info, napi); struct net_device *dev = np->netdev; struct sk_buff *skb; struct netfront_rx_info rinfo; struct xen_netif_rx_response *rx = &rinfo.rx; struct xen_netif_extra_info *extras = rinfo.extras; RING_IDX i, rp; int work_done; struct sk_buff_head rxq; struct sk_buff_head errq; struct sk_buff_head tmpq; unsigned long flags; unsigned int len; int err; spin_lock(&np->rx_lock); skb_queue_head_init(&rxq); skb_queue_head_init(&errq); skb_queue_head_init(&tmpq); rp = np->rx.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ i = np->rx.rsp_cons; work_done = 0; while ((i != rp) && (work_done < budget)) { memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); memset(extras, 0, sizeof(rinfo.extras)); err = xennet_get_responses(np, &rinfo, rp, &tmpq); if (unlikely(err)) {err: while ((skb = __skb_dequeue(&tmpq))) __skb_queue_tail(&errq, skb); dev->stats.rx_errors++; i = np->rx.rsp_cons; continue; } skb = __skb_dequeue(&tmpq); if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { struct xen_netif_extra_info *gso; gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; if (unlikely(xennet_set_skb_gso(skb, gso))) { __skb_queue_head(&tmpq, skb); np->rx.rsp_cons += skb_queue_len(&tmpq); goto err; } } NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page; NETFRONT_SKB_CB(skb)->offset = rx->offset; len = rx->status; if (len > RX_COPY_THRESHOLD) len = RX_COPY_THRESHOLD; skb_put(skb, len); if (rx->status > len) { skb_shinfo(skb)->frags[0].page_offset = rx->offset + len; skb_shinfo(skb)->frags[0].size = rx->status - len; skb->data_len = rx->status - len; } else { skb_shinfo(skb)->frags[0].page = NULL; skb_shinfo(skb)->nr_frags = 0; } i = xennet_fill_frags(np, skb, &tmpq); /* * Truesize approximates the size of true data plus * any supervisor overheads. Adding hypervisor * overheads has been shown to significantly reduce * achievable bandwidth with the default receive * buffer size. It is therefore not wise to account * for it here. * * After alloc_skb(RX_COPY_THRESHOLD), truesize is set * to RX_COPY_THRESHOLD + the supervisor * overheads. Here, we add the size of the data pulled * in xennet_fill_frags(). * * We also adjust for any unused space in the main * data area by subtracting (RX_COPY_THRESHOLD - * len). This is especially important with drivers * which split incoming packets into header and data, * using only 66 bytes of the main data area (see the * e1000 driver for example.) On such systems, * without this last adjustement, our achievable * receive throughout using the standard receive * buffer size was cut by 25%(!!!). */ skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len); skb->len += skb->data_len; if (rx->flags & NETRXF_csum_blank) skb->ip_summed = CHECKSUM_PARTIAL; else if (rx->flags & NETRXF_data_validated) skb->ip_summed = CHECKSUM_UNNECESSARY; __skb_queue_tail(&rxq, skb); np->rx.rsp_cons = ++i; work_done++; } while ((skb = __skb_dequeue(&errq))) kfree_skb(skb); work_done -= handle_incoming_queue(dev, &rxq); /* If we get a callback with very few responses, reduce fill target. */ /* NB. Note exponential increase, linear decrease. */ if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > ((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target)) np->rx_target = np->rx_min_target; xennet_alloc_rx_buffers(dev); if (work_done < budget) { int more_to_do = 0; local_irq_save(flags); RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do); if (!more_to_do) __netif_rx_complete(dev, napi); local_irq_restore(flags); } spin_unlock(&np->rx_lock); return work_done;}static int xennet_change_mtu(struct net_device *dev, int mtu){ int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; if (mtu > max) return -EINVAL; dev->mtu = mtu; return 0;}static void xennet_release_tx_bufs(struct netfront_info *np){ struct sk_buff *skb; int i; for (i = 0; i < NET_TX_RING_SIZE; i++) { /* Skip over entries which are actually freelist references */ if ((unsigned long)np->tx_skbs[i].skb < PAGE_OFFSET) continue; skb = np->tx_skbs[i].skb; gnttab_end_foreign_access_ref(np->grant_tx_ref[i], GNTMAP_readonly); gnttab_release_grant_reference(&np->gref_tx_head, np->grant_tx_ref[i]); np->grant_tx_ref[i] = GRANT_INVALID_REF; add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i); dev_kfree_skb_irq(skb); }}static void xennet_release_rx_bufs(struct netfront_info *np){ struct mmu_update *mmu = np->rx_mmu; struct multicall_entry *mcl = np->rx_mcl; struct sk_buff_head free_list; struct sk_buff *skb; unsigned long mfn; int xfer = 0, noxfer = 0, unused = 0; int id, ref; dev_warn(&np->netdev->dev, "%s: fix me for copying receiver.\n", __func__); return; skb_queue_head_init(&free_list); spin_lock_bh(&np->rx_lock); for (id = 0; id < NET_RX_RING_SIZE; id++) { ref = np->grant_rx_ref[id]; if (ref == GRANT_INVALID_REF) { unused++; continue; } skb = np->rx_skbs[id]; mfn = gnttab_end_foreign_transfer_ref(ref); gnttab_release_grant_reference(&np->gref_rx_head, ref); np->grant_rx_ref[id] = GRANT_INVALID_REF; if (0 == mfn) { skb_shinfo(skb)->nr_frags = 0; dev_kfree_skb(skb); noxfer++; continue; } if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* Remap the page. */ struct page *page = skb_shinfo(skb)->frags[0].page; unsigned long pfn = page_to_pfn(page); void *vaddr = page_address(page); MULTI_update_va_mapping(mcl, (unsigned long)vaddr, mfn_pte(mfn, PAGE_KERNEL), 0); mcl++; mmu->ptr = ((u64)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; mmu->val = pfn; mmu++; set_phys_to_machine(pfn, mfn); } __skb_queue_tail(&free_list, skb); xfer++; } dev_info(&np->netdev->dev, "%s: %d xfer, %d noxfer, %d unused\n", __func__, xfer, noxfer, unused); if (xfer) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* Do all the remapping work and M2P updates. */ MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu, 0, DOMID_SELF); mcl++; HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl); } } while ((skb = __skb_dequeue(&free_list)) != NULL) dev_kfree_skb(skb); spin_unlock_bh(&np->rx_lock);}static void xennet_uninit(struct net_device *dev){ struct netfront_info *np = netdev_priv(dev); xennet_release_tx_bufs(np); xennet_release_rx_bufs(np); gnttab_free_grant_references(np->gref_tx_head); gnttab_free_grant_references(np->gref_rx_head);}static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev){ int i, err; struct net_device *netdev; struct netfront_info *np; netdev = alloc_etherdev(sizeof(struct netfront_info)); if (!netdev) { printk(KERN_WARNING "%s> alloc_etherdev failed.\n", __func__); return ERR_PTR(-ENOMEM); } np = netdev_priv(netdev); np->xbdev = dev; spin_lock_init(&np->tx_lock); spin_lock_init(&np->rx_lock); skb_queue_head_init(&np->rx_batch); np->rx_target = RX_DFL_MIN_TARGET; np->rx_min_target = RX_DFL_MIN_TARGET; np->rx_max_target = RX_MAX_TARGET; init_timer(&np->rx_refill_timer); np->rx_refill_timer.data = (unsigned long)netdev; np->rx_refill_timer.function = rx_refill_timeout; /* Initialise tx_skbs as a free chain containing every entry. */ np->tx_skb_freelist = 0; for (i = 0; i < NET_TX_RING_SIZE; i++) { np->tx_skbs[i].link = i+1; np->grant_tx_ref[i] = GRANT_INVALID_REF; } /* Clear out rx_skbs */ for (i = 0; i < NET_RX_RING_SIZE; i++) { np->rx_skbs[i] = NULL; np->grant_rx_ref[i] = GRANT_INVALID_REF; } /* A grant for every tx ring slot */ if (gnttab_alloc_grant_references(TX_MAX_TARGET, &np->gref_tx_head) < 0) { printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n"); err = -ENOMEM; goto exit; } /* A grant for every rx ring slot */ if (gnttab_alloc_grant_references(RX_MAX_TARGET, &np->gref_rx_head) < 0) { printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n"); err = -ENOMEM; goto exit_free_tx; } netdev->open = xennet_open; netdev->hard_start_xmit = xennet_start_xmit; netdev->stop = xennet_close; netif_napi_add(netdev, &np->napi, xennet_poll, 64); netdev->uninit = xennet_uninit; netdev->change_mtu = xennet_change_mtu; netdev->features = NETIF_F_IP_CSUM; SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops); SET_NETDEV_DEV(netdev, &dev->dev); np->netdev = netdev; netif_carrier_off(netdev); return netdev; exit_free_tx: gnttab_free_grant_references(np->gref_tx_head); exit: free_netdev(netdev); return ERR_PTR(err);}/** * Entry point to this code when a new device is created. Allocate the basic * structures and the ring buffers for communication with the backend, and * inform the backend of the appropriate details for those. */static int __devinit netfront_probe(struct xenbus_device *dev, const struct xenbus_device_id *id){ int err; struct net_device *netdev; struct netfront_info *info; netdev = xennet_create_dev(dev); if (IS_ERR(netdev)) { err = PTR_ERR(netdev); xenbus_dev_fatal(dev, err, "creating netdev"); return err; } info = netdev_priv(netdev); dev->dev.driver_data = info; err = register_netdev(info->netdev); if (err) { printk(KERN_WARNING "%s: register_netdev err=%d\n", __func__, err); goto fail; } err = xennet_sysfs_addif(info->netdev); if (err) { unregister_netdev(info->netdev);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -