⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ipoib_cm.c

📁 linux内核源码
💻 C
📖 第 1 页 / 共 3 页
字号:
/* * Copyright (c) 2006 Mellanox Technologies. All rights reserved * * This software is available to you under a choice of one of two * licenses.  You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * *     Redistribution and use in source and binary forms, with or *     without modification, are permitted provided that the following *     conditions are met: * *      - Redistributions of source code must retain the above *        copyright notice, this list of conditions and the following *        disclaimer. * *      - Redistributions in binary form must reproduce the above *        copyright notice, this list of conditions and the following *        disclaimer in the documentation and/or other materials *        provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * $Id$ */#include <rdma/ib_cm.h>#include <rdma/ib_cache.h>#include <net/dst.h>#include <net/icmp.h>#include <linux/icmpv6.h>#include <linux/delay.h>#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATAstatic int data_debug_level;module_param_named(cm_data_debug_level, data_debug_level, int, 0644);MODULE_PARM_DESC(cm_data_debug_level,		 "Enable data path debug tracing for connected mode if > 0");#endif#include "ipoib.h"#define IPOIB_CM_IETF_ID 0x1000000000000000ULL#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ)#define IPOIB_CM_RX_TIMEOUT     (2 * 256 * HZ)#define IPOIB_CM_RX_DELAY       (3 * 256 * HZ)#define IPOIB_CM_RX_UPDATE_MASK (0x3)static struct ib_qp_attr ipoib_cm_err_attr = {	.qp_state = IB_QPS_ERR};#define IPOIB_CM_RX_DRAIN_WRID 0xffffffffstatic struct ib_send_wr ipoib_cm_rx_drain_wr = {	.wr_id = IPOIB_CM_RX_DRAIN_WRID,	.opcode = IB_WR_SEND,};static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,			       struct ib_cm_event *event);static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,				  u64 mapping[IPOIB_CM_RX_SG]){	int i;	ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);	for (i = 0; i < frags; ++i)		ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);}static int ipoib_cm_post_receive(struct net_device *dev, int id){	struct ipoib_dev_priv *priv = netdev_priv(dev);	struct ib_recv_wr *bad_wr;	int i, ret;	priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;	for (i = 0; i < IPOIB_CM_RX_SG; ++i)		priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];	ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);	if (unlikely(ret)) {		ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);		ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,				      priv->cm.srq_ring[id].mapping);		dev_kfree_skb_any(priv->cm.srq_ring[id].skb);		priv->cm.srq_ring[id].skb = NULL;	}	return ret;}static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, int frags,					     u64 mapping[IPOIB_CM_RX_SG]){	struct ipoib_dev_priv *priv = netdev_priv(dev);	struct sk_buff *skb;	int i;	skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12);	if (unlikely(!skb))		return NULL;	/*	 * IPoIB adds a 4 byte header. So we need 12 more bytes to align the	 * IP header to a multiple of 16.	 */	skb_reserve(skb, 12);	mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE,				       DMA_FROM_DEVICE);	if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) {		dev_kfree_skb_any(skb);		return NULL;	}	for (i = 0; i < frags; i++) {		struct page *page = alloc_page(GFP_ATOMIC);		if (!page)			goto partial_error;		skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE);		mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page,						 0, PAGE_SIZE, DMA_FROM_DEVICE);		if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1])))			goto partial_error;	}	priv->cm.srq_ring[id].skb = skb;	return skb;partial_error:	ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);	for (; i > 0; --i)		ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE);	dev_kfree_skb_any(skb);	return NULL;}static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv* priv){	struct ib_send_wr *bad_wr;	struct ipoib_cm_rx *p;	/* We only reserved 1 extra slot in CQ for drain WRs, so	 * make sure we have at most 1 outstanding WR. */	if (list_empty(&priv->cm.rx_flush_list) ||	    !list_empty(&priv->cm.rx_drain_list))		return;	/*	 * QPs on flush list are error state.  This way, a "flush	 * error" WC will be immediately generated for each WR we post.	 */	p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);	if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))		ipoib_warn(priv, "failed to post drain wr\n");	list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);}static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx){	struct ipoib_cm_rx *p = ctx;	struct ipoib_dev_priv *priv = netdev_priv(p->dev);	unsigned long flags;	if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)		return;	spin_lock_irqsave(&priv->lock, flags);	list_move(&p->list, &priv->cm.rx_flush_list);	p->state = IPOIB_CM_RX_FLUSH;	ipoib_cm_start_rx_drain(priv);	spin_unlock_irqrestore(&priv->lock, flags);}static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,					   struct ipoib_cm_rx *p){	struct ipoib_dev_priv *priv = netdev_priv(dev);	struct ib_qp_init_attr attr = {		.event_handler = ipoib_cm_rx_event_handler,		.send_cq = priv->cq, /* For drain WR */		.recv_cq = priv->cq,		.srq = priv->cm.srq,		.cap.max_send_wr = 1, /* For drain WR */		.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */		.sq_sig_type = IB_SIGNAL_ALL_WR,		.qp_type = IB_QPT_RC,		.qp_context = p,	};	return ib_create_qp(priv->pd, &attr);}static int ipoib_cm_modify_rx_qp(struct net_device *dev,				  struct ib_cm_id *cm_id, struct ib_qp *qp,				  unsigned psn){	struct ipoib_dev_priv *priv = netdev_priv(dev);	struct ib_qp_attr qp_attr;	int qp_attr_mask, ret;	qp_attr.qp_state = IB_QPS_INIT;	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);	if (ret) {		ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret);		return ret;	}	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);	if (ret) {		ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret);		return ret;	}	qp_attr.qp_state = IB_QPS_RTR;	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);	if (ret) {		ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);		return ret;	}	qp_attr.rq_psn = psn;	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);	if (ret) {		ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);		return ret;	}	/*	 * Current Mellanox HCA firmware won't generate completions	 * with error for drain WRs unless the QP has been moved to	 * RTS first. This work-around leaves a window where a QP has	 * moved to error asynchronously, but this will eventually get	 * fixed in firmware, so let's not error out if modify QP	 * fails.	 */	qp_attr.qp_state = IB_QPS_RTS;	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);	if (ret) {		ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);		return 0;	}	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);	if (ret) {		ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);		return 0;	}	return 0;}static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,			     struct ib_qp *qp, struct ib_cm_req_event_param *req,			     unsigned psn){	struct ipoib_dev_priv *priv = netdev_priv(dev);	struct ipoib_cm_data data = {};	struct ib_cm_rep_param rep = {};	data.qpn = cpu_to_be32(priv->qp->qp_num);	data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);	rep.private_data = &data;	rep.private_data_len = sizeof data;	rep.flow_control = 0;	rep.rnr_retry_count = req->rnr_retry_count;	rep.srq = 1;	rep.qp_num = qp->qp_num;	rep.starting_psn = psn;	return ib_send_cm_rep(cm_id, &rep);}static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event){	struct net_device *dev = cm_id->context;	struct ipoib_dev_priv *priv = netdev_priv(dev);	struct ipoib_cm_rx *p;	unsigned psn;	int ret;	ipoib_dbg(priv, "REQ arrived\n");	p = kzalloc(sizeof *p, GFP_KERNEL);	if (!p)		return -ENOMEM;	p->dev = dev;	p->id = cm_id;	cm_id->context = p;	p->state = IPOIB_CM_RX_LIVE;	p->jiffies = jiffies;	INIT_LIST_HEAD(&p->list);	p->qp = ipoib_cm_create_rx_qp(dev, p);	if (IS_ERR(p->qp)) {		ret = PTR_ERR(p->qp);		goto err_qp;	}	psn = random32() & 0xffffff;	ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn);	if (ret)		goto err_modify;	spin_lock_irq(&priv->lock);	queue_delayed_work(ipoib_workqueue,			   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);	/* Add this entry to passive ids list head, but do not re-add it	 * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */	p->jiffies = jiffies;	if (p->state == IPOIB_CM_RX_LIVE)		list_move(&p->list, &priv->cm.passive_ids);	spin_unlock_irq(&priv->lock);	ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn);	if (ret) {		ipoib_warn(priv, "failed to send REP: %d\n", ret);		if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))			ipoib_warn(priv, "unable to move qp to error state\n");	}	return 0;err_modify:	ib_destroy_qp(p->qp);err_qp:	kfree(p);	return ret;}static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,			       struct ib_cm_event *event){	struct ipoib_cm_rx *p;	struct ipoib_dev_priv *priv;	switch (event->event) {	case IB_CM_REQ_RECEIVED:		return ipoib_cm_req_handler(cm_id, event);	case IB_CM_DREQ_RECEIVED:		p = cm_id->context;		ib_send_cm_drep(cm_id, NULL, 0);		/* Fall through */	case IB_CM_REJ_RECEIVED:		p = cm_id->context;		priv = netdev_priv(p->dev);		if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))			ipoib_warn(priv, "unable to move qp to error state\n");		/* Fall through */	default:		return 0;	}}/* Adjust length of skb with fragments to match received data */static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,			  unsigned int length, struct sk_buff *toskb){	int i, num_frags;	unsigned int size;	/* put header into skb */	size = min(length, hdr_space);	skb->tail += size;	skb->len += size;	length -= size;	num_frags = skb_shinfo(skb)->nr_frags;	for (i = 0; i < num_frags; i++) {		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];		if (length == 0) {			/* don't need this page */			skb_fill_page_desc(toskb, i, frag->page, 0, PAGE_SIZE);			--skb_shinfo(skb)->nr_frags;		} else {			size = min(length, (unsigned) PAGE_SIZE);			frag->size = size;			skb->data_len += size;			skb->truesize += size;			skb->len += size;			length -= size;		}	}}void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc){	struct ipoib_dev_priv *priv = netdev_priv(dev);	unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);	struct sk_buff *skb, *newskb;	struct ipoib_cm_rx *p;	unsigned long flags;	u64 mapping[IPOIB_CM_RX_SG];	int frags;	ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",		       wr_id, wc->status);	if (unlikely(wr_id >= ipoib_recvq_size)) {		if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) {			spin_lock_irqsave(&priv->lock, flags);			list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);			ipoib_cm_start_rx_drain(priv);			queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);			spin_unlock_irqrestore(&priv->lock, flags);		} else			ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",				   wr_id, ipoib_recvq_size);		return;	}	skb  = priv->cm.srq_ring[wr_id].skb;	if (unlikely(wc->status != IB_WC_SUCCESS)) {		ipoib_dbg(priv, "cm recv error "			   "(status=%d, wrid=%d vend_err %x)\n",			   wc->status, wr_id, wc->vendor_err);		++dev->stats.rx_dropped;		goto repost;	}	if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) {		p = wc->qp->qp_context;		if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {			spin_lock_irqsave(&priv->lock, flags);			p->jiffies = jiffies;			/* Move this entry to list head, but do not re-add it			 * if it has been moved out of list. */			if (p->state == IPOIB_CM_RX_LIVE)				list_move(&p->list, &priv->cm.passive_ids);			spin_unlock_irqrestore(&priv->lock, flags);		}	}	frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,					      (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -