📄 ipoib_main.c
字号:
/* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * $Id: ipoib_main.c 1377 2004-12-23 19:57:12Z roland $ */#include "ipoib.h"#include <linux/module.h>#include <linux/init.h>#include <linux/slab.h>#include <linux/vmalloc.h>#include <linux/if_arp.h> /* For ARPHRD_xxx */#include <linux/ip.h>#include <linux/in.h>MODULE_AUTHOR("Roland Dreier");MODULE_DESCRIPTION("IP-over-InfiniBand net driver");MODULE_LICENSE("Dual BSD/GPL");#ifdef CONFIG_INFINIBAND_IPOIB_DEBUGint ipoib_debug_level;module_param_named(debug_level, ipoib_debug_level, int, 0644);MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");#endifstruct ipoib_path_iter { struct net_device *dev; struct ipoib_path path;};static const u8 ipv4_bcast_addr[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff};struct workqueue_struct *ipoib_workqueue;static void ipoib_add_one(struct ib_device *device);static void ipoib_remove_one(struct ib_device *device);static struct ib_client ipoib_client = { .name = "ipoib", .add = ipoib_add_one, .remove = ipoib_remove_one};int ipoib_open(struct net_device *dev){ struct ipoib_dev_priv *priv = netdev_priv(dev); ipoib_dbg(priv, "bringing up interface\n"); set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); if (ipoib_pkey_dev_delay_open(dev)) return 0; if (ipoib_ib_dev_open(dev)) return -EINVAL; if (ipoib_ib_dev_up(dev)) { ipoib_ib_dev_stop(dev); return -EINVAL; } if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; /* Bring up any child interfaces too */ down(&priv->vlan_mutex); list_for_each_entry(cpriv, &priv->child_intfs, list) { int flags; flags = cpriv->dev->flags; if (flags & IFF_UP) continue; dev_change_flags(cpriv->dev, flags | IFF_UP); } up(&priv->vlan_mutex); } netif_start_queue(dev); return 0;}static int ipoib_stop(struct net_device *dev){ struct ipoib_dev_priv *priv = netdev_priv(dev); ipoib_dbg(priv, "stopping interface\n"); clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); netif_stop_queue(dev); ipoib_ib_dev_down(dev); ipoib_ib_dev_stop(dev); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; /* Bring down any child interfaces too */ down(&priv->vlan_mutex); list_for_each_entry(cpriv, &priv->child_intfs, list) { int flags; flags = cpriv->dev->flags; if (!(flags & IFF_UP)) continue; dev_change_flags(cpriv->dev, flags & ~IFF_UP); } up(&priv->vlan_mutex); } return 0;}static int ipoib_change_mtu(struct net_device *dev, int new_mtu){ struct ipoib_dev_priv *priv = netdev_priv(dev); if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) return -EINVAL; priv->admin_mtu = new_mtu; dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); return 0;}static struct ipoib_path *__path_find(struct net_device *dev, union ib_gid *gid){ struct ipoib_dev_priv *priv = netdev_priv(dev); struct rb_node *n = priv->path_tree.rb_node; struct ipoib_path *path; int ret; while (n) { path = rb_entry(n, struct ipoib_path, rb_node); ret = memcmp(gid->raw, path->pathrec.dgid.raw, sizeof (union ib_gid)); if (ret < 0) n = n->rb_left; else if (ret > 0) n = n->rb_right; else return path; } return NULL;}static int __path_add(struct net_device *dev, struct ipoib_path *path){ struct ipoib_dev_priv *priv = netdev_priv(dev); struct rb_node **n = &priv->path_tree.rb_node; struct rb_node *pn = NULL; struct ipoib_path *tpath; int ret; while (*n) { pn = *n; tpath = rb_entry(pn, struct ipoib_path, rb_node); ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw, sizeof (union ib_gid)); if (ret < 0) n = &pn->rb_left; else if (ret > 0) n = &pn->rb_right; else return -EEXIST; } rb_link_node(&path->rb_node, pn, n); rb_insert_color(&path->rb_node, &priv->path_tree); list_add_tail(&path->list, &priv->path_list); return 0;}static void path_free(struct net_device *dev, struct ipoib_path *path){ struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_neigh *neigh, *tn; struct sk_buff *skb; unsigned long flags; while ((skb = __skb_dequeue(&path->queue))) dev_kfree_skb_irq(skb); spin_lock_irqsave(&priv->lock, flags); list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { /* * It's safe to call ipoib_put_ah() inside priv->lock * here, because we know that path->ah will always * hold one more reference, so ipoib_put_ah() will * never do more than decrement the ref count. */ if (neigh->ah) ipoib_put_ah(neigh->ah); *to_ipoib_neigh(neigh->neighbour) = NULL; neigh->neighbour->ops->destructor = NULL; kfree(neigh); } spin_unlock_irqrestore(&priv->lock, flags); if (path->ah) ipoib_put_ah(path->ah); kfree(path);}#ifdef CONFIG_INFINIBAND_IPOIB_DEBUGstruct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev){ struct ipoib_path_iter *iter; iter = kmalloc(sizeof *iter, GFP_KERNEL); if (!iter) return NULL; iter->dev = dev; memset(iter->path.pathrec.dgid.raw, 0, 16); if (ipoib_path_iter_next(iter)) { kfree(iter); return NULL; } return iter;}int ipoib_path_iter_next(struct ipoib_path_iter *iter){ struct ipoib_dev_priv *priv = netdev_priv(iter->dev); struct rb_node *n; struct ipoib_path *path; int ret = 1; spin_lock_irq(&priv->lock); n = rb_first(&priv->path_tree); while (n) { path = rb_entry(n, struct ipoib_path, rb_node); if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw, sizeof (union ib_gid)) < 0) { iter->path = *path; ret = 0; break; } n = rb_next(n); } spin_unlock_irq(&priv->lock); return ret;}void ipoib_path_iter_read(struct ipoib_path_iter *iter, struct ipoib_path *path){ *path = iter->path;}#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */void ipoib_flush_paths(struct net_device *dev){ struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path, *tp; LIST_HEAD(remove_list); unsigned long flags; spin_lock_irqsave(&priv->lock, flags); list_splice(&priv->path_list, &remove_list); INIT_LIST_HEAD(&priv->path_list); list_for_each_entry(path, &remove_list, list) rb_erase(&path->rb_node, &priv->path_tree); spin_unlock_irqrestore(&priv->lock, flags); list_for_each_entry_safe(path, tp, &remove_list, list) { if (path->query) ib_sa_cancel_query(path->query_id, path->query); wait_for_completion(&path->done); path_free(dev, path); }}static void path_rec_completion(int status, struct ib_sa_path_rec *pathrec, void *path_ptr){ struct ipoib_path *path = path_ptr; struct net_device *dev = path->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_ah *ah = NULL; struct ipoib_neigh *neigh; struct sk_buff_head skqueue; struct sk_buff *skb; unsigned long flags; if (pathrec) ipoib_dbg(priv, "PathRec LID 0x%04x for GID " IPOIB_GID_FMT "\n", be16_to_cpu(pathrec->dlid), IPOIB_GID_ARG(pathrec->dgid)); else ipoib_dbg(priv, "PathRec status %d for GID " IPOIB_GID_FMT "\n", status, IPOIB_GID_ARG(path->pathrec.dgid)); skb_queue_head_init(&skqueue); if (!status) { struct ib_ah_attr av = { .dlid = be16_to_cpu(pathrec->dlid), .sl = pathrec->sl, .port_num = priv->port }; int path_rate = ib_sa_rate_enum_to_int(pathrec->rate); if (path_rate > 0 && priv->local_rate > path_rate) av.static_rate = (priv->local_rate - 1) / path_rate; ipoib_dbg(priv, "static_rate %d for local port %dX, path %dX\n", av.static_rate, priv->local_rate, ib_sa_rate_enum_to_int(pathrec->rate)); ah = ipoib_create_ah(dev, priv->pd, &av); } spin_lock_irqsave(&priv->lock, flags); path->ah = ah; if (ah) { path->pathrec = *pathrec; ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n", ah, be16_to_cpu(pathrec->dlid), pathrec->sl); while ((skb = __skb_dequeue(&path->queue))) __skb_queue_tail(&skqueue, skb); list_for_each_entry(neigh, &path->neigh_list, list) { kref_get(&path->ah->ref); neigh->ah = path->ah; while ((skb = __skb_dequeue(&neigh->queue))) __skb_queue_tail(&skqueue, skb); } } path->query = NULL; complete(&path->done); spin_unlock_irqrestore(&priv->lock, flags); while ((skb = __skb_dequeue(&skqueue))) { skb->dev = dev; if (dev_queue_xmit(skb)) ipoib_warn(priv, "dev_queue_xmit failed " "to requeue packet\n"); }}static struct ipoib_path *path_rec_create(struct net_device *dev, union ib_gid *gid){ struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; path = kzalloc(sizeof *path, GFP_ATOMIC); if (!path) return NULL; path->dev = dev; skb_queue_head_init(&path->queue); INIT_LIST_HEAD(&path->neigh_list); memcpy(path->pathrec.dgid.raw, gid->raw, sizeof (union ib_gid)); path->pathrec.sgid = priv->local_gid; path->pathrec.pkey = cpu_to_be16(priv->pkey); path->pathrec.numb_path = 1; return path;}static int path_rec_start(struct net_device *dev, struct ipoib_path *path){ struct ipoib_dev_priv *priv = netdev_priv(dev); ipoib_dbg(priv, "Start path record lookup for " IPOIB_GID_FMT "\n", IPOIB_GID_ARG(path->pathrec.dgid)); init_completion(&path->done); path->query_id = ib_sa_path_rec_get(priv->ca, priv->port, &path->pathrec, IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_NUMB_PATH | IB_SA_PATH_REC_PKEY, 1000, GFP_ATOMIC, path_rec_completion, path, &path->query); if (path->query_id < 0) { ipoib_warn(priv, "ib_sa_path_rec_get failed\n"); path->query = NULL; return path->query_id; } return 0;}static void neigh_add_path(struct sk_buff *skb, struct net_device *dev){ struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; struct ipoib_neigh *neigh; neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); if (!neigh) { ++priv->stats.tx_dropped; dev_kfree_skb_any(skb); return; } skb_queue_head_init(&neigh->queue); neigh->neighbour = skb->dst->neighbour; *to_ipoib_neigh(skb->dst->neighbour) = neigh; /* * We can only be called from ipoib_start_xmit, so we're * inside tx_lock -- no need to save/restore flags. */ spin_lock(&priv->lock); path = __path_find(dev, (union ib_gid *) (skb->dst->neighbour->ha + 4)); if (!path) { path = path_rec_create(dev, (union ib_gid *) (skb->dst->neighbour->ha + 4)); if (!path) goto err; __path_add(dev, path); } list_add_tail(&neigh->list, &path->neigh_list); if (path->pathrec.dlid) { kref_get(&path->ah->ref); neigh->ah = path->ah; ipoib_send(dev, skb, path->ah, be32_to_cpup((__be32 *) skb->dst->neighbour->ha)); } else { neigh->ah = NULL; if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { __skb_queue_tail(&neigh->queue, skb); } else { ++priv->stats.tx_dropped; dev_kfree_skb_any(skb); } if (!path->query && path_rec_start(dev, path)) goto err; } spin_unlock(&priv->lock); return;err: *to_ipoib_neigh(skb->dst->neighbour) = NULL; list_del(&neigh->list); neigh->neighbour->ops->destructor = NULL; kfree(neigh); ++priv->stats.tx_dropped; dev_kfree_skb_any(skb); spin_unlock(&priv->lock);}static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev){ struct ipoib_dev_priv *priv = netdev_priv(skb->dev); /* Look up path record for unicasts */ if (skb->dst->neighbour->ha[4] != 0xff) { neigh_add_path(skb, dev); return; } /* Add in the P_Key for multicasts */ skb->dst->neighbour->ha[8] = (priv->pkey >> 8) & 0xff; skb->dst->neighbour->ha[9] = priv->pkey & 0xff; ipoib_mcast_send(dev, (union ib_gid *) (skb->dst->neighbour->ha + 4), skb);}static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, struct ipoib_pseudoheader *phdr){ struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; /* * We can only be called from ipoib_start_xmit, so we're * inside tx_lock -- no need to save/restore flags. */ spin_lock(&priv->lock); path = __path_find(dev, (union ib_gid *) (phdr->hwaddr + 4)); if (!path) { path = path_rec_create(dev, (union ib_gid *) (phdr->hwaddr + 4)); if (path) { /* put pseudoheader back on for next time */ skb_push(skb, sizeof *phdr); __skb_queue_tail(&path->queue, skb); if (path_rec_start(dev, path)) { spin_unlock(&priv->lock); path_free(dev, path); return; } else __path_add(dev, path); } else { ++priv->stats.tx_dropped; dev_kfree_skb_any(skb);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -