📄 ipoib_main.c
字号:
/* * Check if our queue is stopped. Since we have the LLTX bit * set, we can't rely on netif_stop_queue() preventing our * xmit function from being called with a full queue. */ if (unlikely(netif_queue_stopped(dev))) { spin_unlock_irqrestore(&priv->tx_lock, flags); return NETDEV_TX_BUSY; } if (skb->dst && skb->dst->neighbour) { if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { path_lookup(skb, dev); goto out; } neigh = *to_ipoib_neigh(skb->dst->neighbour); if (likely(neigh->ah)) { ipoib_send(dev, skb, neigh->ah, be32_to_cpup((__be32 *) skb->dst->neighbour->ha)); goto out; } if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { spin_lock(&priv->lock); __skb_queue_tail(&neigh->queue, skb); spin_unlock(&priv->lock); } else { ++priv->stats.tx_dropped; dev_kfree_skb_any(skb); } } else { struct ipoib_pseudoheader *phdr = (struct ipoib_pseudoheader *) skb->data; skb_pull(skb, sizeof *phdr); if (phdr->hwaddr[4] == 0xff) { /* Add in the P_Key for multicast*/ phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff; phdr->hwaddr[9] = priv->pkey & 0xff; ipoib_mcast_send(dev, (union ib_gid *) (phdr->hwaddr + 4), skb); } else { /* unicast GID -- should be ARP reply */ if (be16_to_cpup((u16 *) skb->data) != ETH_P_ARP) { ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x " IPOIB_GID_FMT "\n", skb->dst ? "neigh" : "dst", be16_to_cpup((u16 *) skb->data), be32_to_cpup((u32 *) phdr->hwaddr), IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4))); dev_kfree_skb_any(skb); ++priv->stats.tx_dropped; goto out; } unicast_arp_send(skb, dev, phdr); } }out: spin_unlock_irqrestore(&priv->tx_lock, flags); return NETDEV_TX_OK;}static struct net_device_stats *ipoib_get_stats(struct net_device *dev){ struct ipoib_dev_priv *priv = netdev_priv(dev); return &priv->stats;}static void ipoib_timeout(struct net_device *dev){ struct ipoib_dev_priv *priv = netdev_priv(dev); ipoib_warn(priv, "transmit timeout: latency %ld\n", jiffies - dev->trans_start); /* XXX reset QP, etc. */}static int ipoib_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len){ struct ipoib_header *header; header = (struct ipoib_header *) skb_push(skb, sizeof *header); header->proto = htons(type); header->reserved = 0; /* * If we don't have a neighbour structure, stuff the * destination address onto the front of the skb so we can * figure out where to send the packet later. */ if (!skb->dst || !skb->dst->neighbour) { struct ipoib_pseudoheader *phdr = (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr); memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN); } return 0;}static void ipoib_set_mcast_list(struct net_device *dev){ struct ipoib_dev_priv *priv = netdev_priv(dev); schedule_work(&priv->restart_task);}static void ipoib_neigh_destructor(struct neighbour *n){ struct ipoib_neigh *neigh = *to_ipoib_neigh(n); struct ipoib_dev_priv *priv = netdev_priv(n->dev); unsigned long flags; ipoib_dbg(priv, "neigh_destructor for %06x " IPOIB_GID_FMT "\n", be32_to_cpup((__be32 *) n->ha), IPOIB_GID_ARG(*((union ib_gid *) (n->ha + 4)))); spin_lock_irqsave(&priv->lock, flags); if (neigh) { if (neigh->ah) ipoib_put_ah(neigh->ah); list_del(&neigh->list); *to_ipoib_neigh(n) = NULL; kfree(neigh); } spin_unlock_irqrestore(&priv->lock, flags);}static int ipoib_neigh_setup(struct neighbour *neigh){ /* * Is this kosher? I can't find anybody in the kernel that * sets neigh->destructor, so we should be able to set it here * without trouble. */ neigh->ops->destructor = ipoib_neigh_destructor; return 0;}static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms){ parms->neigh_setup = ipoib_neigh_setup; return 0;}int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port){ struct ipoib_dev_priv *priv = netdev_priv(dev); /* Allocate RX/TX "rings" to hold queued skbs */ priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf), GFP_KERNEL); if (!priv->rx_ring) { printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", ca->name, IPOIB_RX_RING_SIZE); goto out; } memset(priv->rx_ring, 0, IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf)); priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf), GFP_KERNEL); if (!priv->tx_ring) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", ca->name, IPOIB_TX_RING_SIZE); goto out_rx_ring_cleanup; } memset(priv->tx_ring, 0, IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf)); /* priv->tx_head & tx_tail are already 0 */ if (ipoib_ib_dev_init(dev, ca, port)) goto out_tx_ring_cleanup; return 0;out_tx_ring_cleanup: kfree(priv->tx_ring);out_rx_ring_cleanup: kfree(priv->rx_ring);out: return -ENOMEM;}void ipoib_dev_cleanup(struct net_device *dev){ struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv; ipoib_delete_debug_file(dev); /* Delete any child interfaces first */ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { unregister_netdev(cpriv->dev); ipoib_dev_cleanup(cpriv->dev); free_netdev(cpriv->dev); } ipoib_ib_dev_cleanup(dev); if (priv->rx_ring) { kfree(priv->rx_ring); priv->rx_ring = NULL; } if (priv->tx_ring) { kfree(priv->tx_ring); priv->tx_ring = NULL; }}static void ipoib_setup(struct net_device *dev){ struct ipoib_dev_priv *priv = netdev_priv(dev); dev->open = ipoib_open; dev->stop = ipoib_stop; dev->change_mtu = ipoib_change_mtu; dev->hard_start_xmit = ipoib_start_xmit; dev->get_stats = ipoib_get_stats; dev->tx_timeout = ipoib_timeout; dev->hard_header = ipoib_hard_header; dev->set_multicast_list = ipoib_set_mcast_list; dev->neigh_setup = ipoib_neigh_setup_dev; dev->watchdog_timeo = HZ; dev->rebuild_header = NULL; dev->set_mac_address = NULL; dev->header_cache_update = NULL; dev->flags |= IFF_BROADCAST | IFF_MULTICAST; /* * We add in INFINIBAND_ALEN to allow for the destination * address "pseudoheader" for skbs without neighbour struct. */ dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; dev->addr_len = INFINIBAND_ALEN; dev->type = ARPHRD_INFINIBAND; dev->tx_queue_len = IPOIB_TX_RING_SIZE * 2; dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; /* MTU will be reset when mcast join happens */ dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; priv->mcast_mtu = priv->admin_mtu = dev->mtu; memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); netif_carrier_off(dev); SET_MODULE_OWNER(dev); priv->dev = dev; spin_lock_init(&priv->lock); spin_lock_init(&priv->tx_lock); init_MUTEX(&priv->mcast_mutex); init_MUTEX(&priv->vlan_mutex); INIT_LIST_HEAD(&priv->path_list); INIT_LIST_HEAD(&priv->child_intfs); INIT_LIST_HEAD(&priv->dead_ahs); INIT_LIST_HEAD(&priv->multicast_list); INIT_WORK(&priv->pkey_task, ipoib_pkey_poll, priv->dev); INIT_WORK(&priv->mcast_task, ipoib_mcast_join_task, priv->dev); INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush, priv->dev); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task, priv->dev); INIT_WORK(&priv->ah_reap_task, ipoib_reap_ah, priv->dev);}struct ipoib_dev_priv *ipoib_intf_alloc(const char *name){ struct net_device *dev; dev = alloc_netdev((int) sizeof (struct ipoib_dev_priv), name, ipoib_setup); if (!dev) return NULL; return netdev_priv(dev);}static ssize_t show_pkey(struct class_device *cdev, char *buf){ struct ipoib_dev_priv *priv = netdev_priv(container_of(cdev, struct net_device, class_dev)); return sprintf(buf, "0x%04x\n", priv->pkey);}static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);static ssize_t create_child(struct class_device *cdev, const char *buf, size_t count){ int pkey; int ret; if (sscanf(buf, "%i", &pkey) != 1) return -EINVAL; if (pkey < 0 || pkey > 0xffff) return -EINVAL; ret = ipoib_vlan_add(container_of(cdev, struct net_device, class_dev), pkey); return ret ? ret : count;}static CLASS_DEVICE_ATTR(create_child, S_IWUGO, NULL, create_child);static ssize_t delete_child(struct class_device *cdev, const char *buf, size_t count){ int pkey; int ret; if (sscanf(buf, "%i", &pkey) != 1) return -EINVAL; if (pkey < 0 || pkey > 0xffff) return -EINVAL; ret = ipoib_vlan_delete(container_of(cdev, struct net_device, class_dev), pkey); return ret ? ret : count;}static CLASS_DEVICE_ATTR(delete_child, S_IWUGO, NULL, delete_child);int ipoib_add_pkey_attr(struct net_device *dev){ return class_device_create_file(&dev->class_dev, &class_device_attr_pkey);}static struct net_device *ipoib_add_port(const char *format, struct ib_device *hca, u8 port){ struct ipoib_dev_priv *priv; int result = -ENOMEM; priv = ipoib_intf_alloc(format); if (!priv) goto alloc_mem_failed; SET_NETDEV_DEV(priv->dev, hca->dma_device); result = ib_query_pkey(hca, port, 0, &priv->pkey); if (result) { printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", hca->name, port, result); goto alloc_mem_failed; } priv->dev->broadcast[8] = priv->pkey >> 8; priv->dev->broadcast[9] = priv->pkey & 0xff; result = ib_query_gid(hca, port, 0, &priv->local_gid); if (result) { printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", hca->name, port, result); goto alloc_mem_failed; } else memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); result = ipoib_dev_init(priv->dev, hca, port); if (result < 0) { printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n", hca->name, port, result); goto device_init_failed; } INIT_IB_EVENT_HANDLER(&priv->event_handler, priv->ca, ipoib_event); result = ib_register_event_handler(&priv->event_handler); if (result < 0) { printk(KERN_WARNING "%s: ib_register_event_handler failed for " "port %d (ret = %d)\n", hca->name, port, result); goto event_failed; } result = register_netdev(priv->dev); if (result) { printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n", hca->name, port, result); goto register_failed; } if (ipoib_create_debug_file(priv->dev)) goto debug_failed; if (ipoib_add_pkey_attr(priv->dev)) goto sysfs_failed; if (class_device_create_file(&priv->dev->class_dev, &class_device_attr_create_child)) goto sysfs_failed; if (class_device_create_file(&priv->dev->class_dev, &class_device_attr_delete_child)) goto sysfs_failed; return priv->dev;sysfs_failed: ipoib_delete_debug_file(priv->dev);debug_failed: unregister_netdev(priv->dev);register_failed: ib_unregister_event_handler(&priv->event_handler);event_failed: ipoib_dev_cleanup(priv->dev);device_init_failed: free_netdev(priv->dev);alloc_mem_failed: return ERR_PTR(result);}static void ipoib_add_one(struct ib_device *device){ struct list_head *dev_list; struct net_device *dev; struct ipoib_dev_priv *priv; int s, e, p; dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); if (!dev_list) return; INIT_LIST_HEAD(dev_list); if (device->node_type == IB_NODE_SWITCH) { s = 0; e = 0; } else { s = 1; e = device->phys_port_cnt; } for (p = s; p <= e; ++p) { dev = ipoib_add_port("ib%d", device, p); if (!IS_ERR(dev)) { priv = netdev_priv(dev); list_add_tail(&priv->list, dev_list); } } ib_set_client_data(device, &ipoib_client, dev_list);}static void ipoib_remove_one(struct ib_device *device){ struct ipoib_dev_priv *priv, *tmp; struct list_head *dev_list; dev_list = ib_get_client_data(device, &ipoib_client); list_for_each_entry_safe(priv, tmp, dev_list, list) { ib_unregister_event_handler(&priv->event_handler); unregister_netdev(priv->dev); ipoib_dev_cleanup(priv->dev); free_netdev(priv->dev); }}static int __init ipoib_init_module(void){ int ret; ret = ipoib_register_debugfs(); if (ret) return ret; /* * We create our own workqueue mainly because we want to be * able to flush it when devices are being removed. We can't * use schedule_work()/flush_scheduled_work() because both * unregister_netdev() and linkwatch_event take the rtnl lock, * so flush_scheduled_work() can deadlock during device * removal. */ ipoib_workqueue = create_singlethread_workqueue("ipoib"); if (!ipoib_workqueue) { ret = -ENOMEM; goto err_fs; } ret = ib_register_client(&ipoib_client); if (ret) goto err_wq; return 0;err_fs: ipoib_unregister_debugfs();err_wq: destroy_workqueue(ipoib_workqueue); return ret;}static void __exit ipoib_cleanup_module(void){ ipoib_unregister_debugfs(); ib_unregister_client(&ipoib_client); destroy_workqueue(ipoib_workqueue);}module_init(ipoib_init_module);module_exit(ipoib_cleanup_module);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -