📄 ipoib_cm.c
字号:
ipoib_warn(priv, "failed to send cm req: %d\n", ret); goto err_send_cm; } ipoib_dbg(priv, "Request connection 0x%x for gid " IPOIB_GID_FMT " qpn 0x%x\n", p->qp->qp_num, IPOIB_GID_ARG(pathrec->dgid), qpn); return 0;err_send_cm:err_modify: ib_destroy_cm_id(p->id);err_id: p->id = NULL; ib_destroy_qp(p->qp);err_qp: p->qp = NULL;err_tx: return ret;}static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p){ struct ipoib_dev_priv *priv = netdev_priv(p->dev); struct ipoib_tx_buf *tx_req; unsigned long flags; unsigned long begin; ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail); if (p->id) ib_destroy_cm_id(p->id); if (p->tx_ring) { /* Wait for all sends to complete */ begin = jiffies; while ((int) p->tx_tail - (int) p->tx_head < 0) { if (time_after(jiffies, begin + 5 * HZ)) { ipoib_warn(priv, "timing out; %d sends not completed\n", p->tx_head - p->tx_tail); goto timeout; } msleep(1); } }timeout: while ((int) p->tx_tail - (int) p->tx_head < 0) { tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); dev_kfree_skb_any(tx_req->skb); ++p->tx_tail; spin_lock_irqsave(&priv->tx_lock, flags); if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && netif_queue_stopped(p->dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(p->dev); spin_unlock_irqrestore(&priv->tx_lock, flags); } if (p->qp) ib_destroy_qp(p->qp); kfree(p->tx_ring); kfree(p);}static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event){ struct ipoib_cm_tx *tx = cm_id->context; struct ipoib_dev_priv *priv = netdev_priv(tx->dev); struct net_device *dev = priv->dev; struct ipoib_neigh *neigh; int ret; switch (event->event) { case IB_CM_DREQ_RECEIVED: ipoib_dbg(priv, "DREQ received.\n"); ib_send_cm_drep(cm_id, NULL, 0); break; case IB_CM_REP_RECEIVED: ipoib_dbg(priv, "REP received.\n"); ret = ipoib_cm_rep_handler(cm_id, event); if (ret) ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); break; case IB_CM_REQ_ERROR: case IB_CM_REJ_RECEIVED: case IB_CM_TIMEWAIT_EXIT: ipoib_dbg(priv, "CM error %d.\n", event->event); spin_lock_irq(&priv->tx_lock); spin_lock(&priv->lock); neigh = tx->neigh; if (neigh) { neigh->cm = NULL; list_del(&neigh->list); if (neigh->ah) ipoib_put_ah(neigh->ah); ipoib_neigh_free(dev, neigh); tx->neigh = NULL; } if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { list_move(&tx->list, &priv->cm.reap_list); queue_work(ipoib_workqueue, &priv->cm.reap_task); } spin_unlock(&priv->lock); spin_unlock_irq(&priv->tx_lock); break; default: break; } return 0;}struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, struct ipoib_neigh *neigh){ struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_cm_tx *tx; tx = kzalloc(sizeof *tx, GFP_ATOMIC); if (!tx) return NULL; neigh->cm = tx; tx->neigh = neigh; tx->path = path; tx->dev = dev; list_add(&tx->list, &priv->cm.start_list); set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); queue_work(ipoib_workqueue, &priv->cm.start_task); return tx;}void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx){ struct ipoib_dev_priv *priv = netdev_priv(tx->dev); if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { list_move(&tx->list, &priv->cm.reap_list); queue_work(ipoib_workqueue, &priv->cm.reap_task); ipoib_dbg(priv, "Reap connection for gid " IPOIB_GID_FMT "\n", IPOIB_GID_ARG(tx->neigh->dgid)); tx->neigh = NULL; }}static void ipoib_cm_tx_start(struct work_struct *work){ struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, cm.start_task); struct net_device *dev = priv->dev; struct ipoib_neigh *neigh; struct ipoib_cm_tx *p; unsigned long flags; int ret; struct ib_sa_path_rec pathrec; u32 qpn; spin_lock_irqsave(&priv->tx_lock, flags); spin_lock(&priv->lock); while (!list_empty(&priv->cm.start_list)) { p = list_entry(priv->cm.start_list.next, typeof(*p), list); list_del_init(&p->list); neigh = p->neigh; qpn = IPOIB_QPN(neigh->neighbour->ha); memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); spin_unlock(&priv->lock); spin_unlock_irqrestore(&priv->tx_lock, flags); ret = ipoib_cm_tx_init(p, qpn, &pathrec); spin_lock_irqsave(&priv->tx_lock, flags); spin_lock(&priv->lock); if (ret) { neigh = p->neigh; if (neigh) { neigh->cm = NULL; list_del(&neigh->list); if (neigh->ah) ipoib_put_ah(neigh->ah); ipoib_neigh_free(dev, neigh); } list_del(&p->list); kfree(p); } } spin_unlock(&priv->lock); spin_unlock_irqrestore(&priv->tx_lock, flags);}static void ipoib_cm_tx_reap(struct work_struct *work){ struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, cm.reap_task); struct ipoib_cm_tx *p; spin_lock_irq(&priv->tx_lock); spin_lock(&priv->lock); while (!list_empty(&priv->cm.reap_list)) { p = list_entry(priv->cm.reap_list.next, typeof(*p), list); list_del(&p->list); spin_unlock(&priv->lock); spin_unlock_irq(&priv->tx_lock); ipoib_cm_tx_destroy(p); spin_lock_irq(&priv->tx_lock); spin_lock(&priv->lock); } spin_unlock(&priv->lock); spin_unlock_irq(&priv->tx_lock);}static void ipoib_cm_skb_reap(struct work_struct *work){ struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, cm.skb_task); struct sk_buff *skb; unsigned mtu = priv->mcast_mtu; spin_lock_irq(&priv->tx_lock); spin_lock(&priv->lock); while ((skb = skb_dequeue(&priv->cm.skb_queue))) { spin_unlock(&priv->lock); spin_unlock_irq(&priv->tx_lock); if (skb->protocol == htons(ETH_P_IP)) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, priv->dev);#endif dev_kfree_skb_any(skb); spin_lock_irq(&priv->tx_lock); spin_lock(&priv->lock); } spin_unlock(&priv->lock); spin_unlock_irq(&priv->tx_lock);}void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, unsigned int mtu){ struct ipoib_dev_priv *priv = netdev_priv(dev); int e = skb_queue_empty(&priv->cm.skb_queue); if (skb->dst) skb->dst->ops->update_pmtu(skb->dst, mtu); skb_queue_tail(&priv->cm.skb_queue, skb); if (e) queue_work(ipoib_workqueue, &priv->cm.skb_task);}static void ipoib_cm_rx_reap(struct work_struct *work){ struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, cm.rx_reap_task); struct ipoib_cm_rx *p, *n; LIST_HEAD(list); spin_lock_irq(&priv->lock); list_splice_init(&priv->cm.rx_reap_list, &list); spin_unlock_irq(&priv->lock); list_for_each_entry_safe(p, n, &list, list) { ib_destroy_cm_id(p->id); ib_destroy_qp(p->qp); kfree(p); }}static void ipoib_cm_stale_task(struct work_struct *work){ struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, cm.stale_task.work); struct ipoib_cm_rx *p; int ret; spin_lock_irq(&priv->lock); while (!list_empty(&priv->cm.passive_ids)) { /* List is sorted by LRU, start from tail, * stop when we see a recently used entry */ p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) break; list_move(&p->list, &priv->cm.rx_error_list); p->state = IPOIB_CM_RX_ERROR; spin_unlock_irq(&priv->lock); ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE); if (ret) ipoib_warn(priv, "unable to move qp to error state: %d\n", ret); spin_lock_irq(&priv->lock); } if (!list_empty(&priv->cm.passive_ids)) queue_delayed_work(ipoib_workqueue, &priv->cm.stale_task, IPOIB_CM_RX_DELAY); spin_unlock_irq(&priv->lock);}static ssize_t show_mode(struct device *d, struct device_attribute *attr, char *buf){ struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(d)); if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) return sprintf(buf, "connected\n"); else return sprintf(buf, "datagram\n");}static ssize_t set_mode(struct device *d, struct device_attribute *attr, const char *buf, size_t count){ struct net_device *dev = to_net_dev(d); struct ipoib_dev_priv *priv = netdev_priv(dev); /* flush paths if we switch modes so that connections are restarted */ if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) { set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); ipoib_warn(priv, "enabling connected mode " "will cause multicast packet drops\n"); ipoib_flush_paths(dev); return count; } if (!strcmp(buf, "datagram\n")) { clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); dev->mtu = min(priv->mcast_mtu, dev->mtu); ipoib_flush_paths(dev); return count; } return -EINVAL;}static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);int ipoib_cm_add_mode_attr(struct net_device *dev){ return device_create_file(&dev->dev, &dev_attr_mode);}int ipoib_cm_dev_init(struct net_device *dev){ struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_srq_init_attr srq_init_attr = { .attr = { .max_wr = ipoib_recvq_size, .max_sge = IPOIB_CM_RX_SG } }; int ret, i; INIT_LIST_HEAD(&priv->cm.passive_ids); INIT_LIST_HEAD(&priv->cm.reap_list); INIT_LIST_HEAD(&priv->cm.start_list); INIT_LIST_HEAD(&priv->cm.rx_error_list); INIT_LIST_HEAD(&priv->cm.rx_flush_list); INIT_LIST_HEAD(&priv->cm.rx_drain_list); INIT_LIST_HEAD(&priv->cm.rx_reap_list); INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap); INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap); INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task); skb_queue_head_init(&priv->cm.skb_queue); priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); if (IS_ERR(priv->cm.srq)) { ret = PTR_ERR(priv->cm.srq); priv->cm.srq = NULL; return ret; } priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, GFP_KERNEL); if (!priv->cm.srq_ring) { printk(KERN_WARNING "%s: failed to allocate CM ring (%d entries)\n", priv->ca->name, ipoib_recvq_size); ipoib_cm_dev_cleanup(dev); return -ENOMEM; } for (i = 0; i < IPOIB_CM_RX_SG; ++i) priv->cm.rx_sge[i].lkey = priv->mr->lkey; priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; for (i = 1; i < IPOIB_CM_RX_SG; ++i) priv->cm.rx_sge[i].length = PAGE_SIZE; priv->cm.rx_wr.next = NULL; priv->cm.rx_wr.sg_list = priv->cm.rx_sge; priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG; for (i = 0; i < ipoib_recvq_size; ++i) { if (!ipoib_cm_alloc_rx_skb(dev, i, IPOIB_CM_RX_SG - 1, priv->cm.srq_ring[i].mapping)) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); ipoib_cm_dev_cleanup(dev); return -ENOMEM; } if (ipoib_cm_post_receive(dev, i)) { ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); ipoib_cm_dev_cleanup(dev); return -EIO; } } priv->dev->dev_addr[0] = IPOIB_FLAGS_RC; return 0;}void ipoib_cm_dev_cleanup(struct net_device *dev){ struct ipoib_dev_priv *priv = netdev_priv(dev); int i, ret; if (!priv->cm.srq) return; ipoib_dbg(priv, "Cleanup ipoib connected mode.\n"); ret = ib_destroy_srq(priv->cm.srq); if (ret) ipoib_warn(priv, "ib_destroy_srq failed: %d\n", ret); priv->cm.srq = NULL; if (!priv->cm.srq_ring) return; for (i = 0; i < ipoib_recvq_size; ++i) if (priv->cm.srq_ring[i].skb) { ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, priv->cm.srq_ring[i].mapping); dev_kfree_skb_any(priv->cm.srq_ring[i].skb); priv->cm.srq_ring[i].skb = NULL; } kfree(priv->cm.srq_ring); priv->cm.srq_ring = NULL;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -