📄 verbs.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses.  You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the BSD-type * license below: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * *      Redistributions of source code must retain the above copyright *      notice, this list of conditions and the following disclaimer. * *      Redistributions in binary form must reproduce the above *      copyright notice, this list of conditions and the following *      disclaimer in the documentation and/or other materials provided *      with the distribution. * *      Neither the name of the Network Appliance, Inc. nor the names of *      its contributors may be used to endorse or promote products *      derived from this software without specific prior written *      permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *//* * verbs.c * * Encapsulates the major functions managing: *  o adapters *  o endpoints *  o connections *  o buffer memory */#include <linux/pci.h>	/* for Tavor hack below */#include "xprt_rdma.h"/* * Globals/Macros */#ifdef RPC_DEBUG# define RPCDBG_FACILITY	RPCDBG_TRANS#endif/* * internal functions *//* * handle replies in tasklet context, using a single, global list * rdma tasklet function -- just turn around and call the func * for all replies on the list */static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);static LIST_HEAD(rpcrdma_tasklets_g);static voidrpcrdma_run_tasklet(unsigned long data){	struct rpcrdma_rep *rep;	void (*func)(struct rpcrdma_rep *);	unsigned long flags;	data = data;	spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);	while (!list_empty(&rpcrdma_tasklets_g)) {		rep = list_entry(rpcrdma_tasklets_g.next,				 struct rpcrdma_rep, rr_list);		list_del(&rep->rr_list);		func = rep->rr_func;		rep->rr_func = NULL;		spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);		if (func)			func(rep);		else			rpcrdma_recv_buffer_put(rep);		spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);	}	spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);}static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);static inline voidrpcrdma_schedule_tasklet(struct rpcrdma_rep *rep){	unsigned long flags;	spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);	list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);	spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);	tasklet_schedule(&rpcrdma_tasklet_g);}static voidrpcrdma_qp_async_error_upcall(struct ib_event *event, void *context){	struct rpcrdma_ep *ep = context;	dprintk("RPC:       %s: QP error %X on device %s ep %p\n",		__func__, event->event, event->device->name, context);	if (ep->rep_connected == 1) {		ep->rep_connected = -EIO;		ep->rep_func(ep);		wake_up_all(&ep->rep_connect_wait);	}}static voidrpcrdma_cq_async_error_upcall(struct ib_event *event, void *context){	struct rpcrdma_ep *ep = context;	dprintk("RPC:       %s: CQ error %X on device %s ep %p\n",		__func__, event->event, event->device->name, context);	if (ep->rep_connected == 1) {		ep->rep_connected = -EIO;		ep->rep_func(ep);		wake_up_all(&ep->rep_connect_wait);	}}static inlinevoid rpcrdma_event_process(struct ib_wc *wc){	struct rpcrdma_rep *rep =			(struct rpcrdma_rep *)(unsigned long) wc->wr_id;	dprintk("RPC:       %s: event rep %p status %X opcode %X length %u\n",		__func__, rep, wc->status, wc->opcode, wc->byte_len);	if (!rep) /* send or bind completion that we don't care about */		return;	if (IB_WC_SUCCESS != wc->status) {		dprintk("RPC:       %s: %s WC status %X, connection lost\n",			__func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send",			 wc->status);		rep->rr_len = ~0U;		rpcrdma_schedule_tasklet(rep);		return;	}	switch (wc->opcode) {	case IB_WC_RECV:		rep->rr_len = wc->byte_len;		ib_dma_sync_single_for_cpu(			rdmab_to_ia(rep->rr_buffer)->ri_id->device,			rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);		/* Keep (only) the most recent credits, after check validity */		if (rep->rr_len >= 16) {			struct rpcrdma_msg *p =					(struct rpcrdma_msg *) rep->rr_base;			unsigned int credits = ntohl(p->rm_credit);			if (credits == 0) {				dprintk("RPC:       %s: server"					" dropped credits to 0!\n", __func__);				/* don't deadlock */				credits = 1;			} else if (credits > rep->rr_buffer->rb_max_requests) {				dprintk("RPC:       %s: server"					" over-crediting: %d (%d)\n",					__func__, credits,					rep->rr_buffer->rb_max_requests);				credits = rep->rr_buffer->rb_max_requests;			}			atomic_set(&rep->rr_buffer->rb_credits, credits);		}		/* fall through */	case IB_WC_BIND_MW:		rpcrdma_schedule_tasklet(rep);		break;	default:		dprintk("RPC:       %s: unexpected WC event %X\n",			__func__, wc->opcode);		break;	}}static inline intrpcrdma_cq_poll(struct ib_cq *cq){	struct ib_wc wc;	int rc;	for (;;) {		rc = ib_poll_cq(cq, 1, &wc);		if (rc < 0) {			dprintk("RPC:       %s: ib_poll_cq failed %i\n",				__func__, rc);			return rc;		}		if (rc == 0)			break;		rpcrdma_event_process(&wc);	}	return 0;}/* * rpcrdma_cq_event_upcall * * This upcall handles recv, send, bind and unbind events. * It is reentrant but processes single events in order to maintain * ordering of receives to keep server credits. * * It is the responsibility of the scheduled tasklet to return * recv buffers to the pool. NOTE: this affects synchronization of * connection shutdown. That is, the structures required for * the completion of the reply handler must remain intact until * all memory has been reclaimed. * * Note that send events are suppressed and do not result in an upcall. */static voidrpcrdma_cq_event_upcall(struct ib_cq *cq, void *context){	int rc;	rc = rpcrdma_cq_poll(cq);	if (rc)		return;	rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);	if (rc) {		dprintk("RPC:       %s: ib_req_notify_cq failed %i\n",			__func__, rc);		return;	}	rpcrdma_cq_poll(cq);}#ifdef RPC_DEBUGstatic const char * const conn[] = {	"address resolved",	"address error",	"route resolved",	"route error",	"connect request",	"connect response",	"connect error",	"unreachable",	"rejected",	"established",	"disconnected",	"device removal"};#endifstatic intrpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event){	struct rpcrdma_xprt *xprt = id->context;	struct rpcrdma_ia *ia = &xprt->rx_ia;	struct rpcrdma_ep *ep = &xprt->rx_ep;	struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;	struct ib_qp_attr attr;	struct ib_qp_init_attr iattr;	int connstate = 0;	switch (event->event) {	case RDMA_CM_EVENT_ADDR_RESOLVED:	case RDMA_CM_EVENT_ROUTE_RESOLVED:		complete(&ia->ri_done);		break;	case RDMA_CM_EVENT_ADDR_ERROR:		ia->ri_async_rc = -EHOSTUNREACH;		dprintk("RPC:       %s: CM address resolution error, ep 0x%p\n",			__func__, ep);		complete(&ia->ri_done);		break;	case RDMA_CM_EVENT_ROUTE_ERROR:		ia->ri_async_rc = -ENETUNREACH;		dprintk("RPC:       %s: CM route resolution error, ep 0x%p\n",			__func__, ep);		complete(&ia->ri_done);		break;	case RDMA_CM_EVENT_ESTABLISHED:		connstate = 1;		ib_query_qp(ia->ri_id->qp, &attr,			IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,			&iattr);		dprintk("RPC:       %s: %d responder resources"			" (%d initiator)\n",			__func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);		goto connected;	case RDMA_CM_EVENT_CONNECT_ERROR:		connstate = -ENOTCONN;		goto connected;	case RDMA_CM_EVENT_UNREACHABLE:		connstate = -ENETDOWN;		goto connected;	case RDMA_CM_EVENT_REJECTED:		connstate = -ECONNREFUSED;		goto connected;	case RDMA_CM_EVENT_DISCONNECTED:		connstate = -ECONNABORTED;		goto connected;	case RDMA_CM_EVENT_DEVICE_REMOVAL:		connstate = -ENODEV;connected:		dprintk("RPC:       %s: %s: %u.%u.%u.%u:%u"			" (ep 0x%p event 0x%x)\n",			__func__,			(event->event <= 11) ? conn[event->event] :						"unknown connection error",			NIPQUAD(addr->sin_addr.s_addr),			ntohs(addr->sin_port),			ep, event->event);		atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);		dprintk("RPC:       %s: %sconnected\n",					__func__, connstate > 0 ? "" : "dis");		ep->rep_connected = connstate;		ep->rep_func(ep);		wake_up_all(&ep->rep_connect_wait);		break;	default:		ia->ri_async_rc = -EINVAL;		dprintk("RPC:       %s: unexpected CM event %X\n",			__func__, event->event);		complete(&ia->ri_done);		break;	}	return 0;}static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *xprt,			struct rpcrdma_ia *ia, struct sockaddr *addr){	struct rdma_cm_id *id;	int rc;	id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP);	if (IS_ERR(id)) {		rc = PTR_ERR(id);		dprintk("RPC:       %s: rdma_create_id() failed %i\n",			__func__, rc);		return id;	}	ia->ri_async_rc = 0;	rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);	if (rc) {		dprintk("RPC:       %s: rdma_resolve_addr() failed %i\n",			__func__, rc);		goto out;	}	wait_for_completion(&ia->ri_done);	rc = ia->ri_async_rc;	if (rc)		goto out;	ia->ri_async_rc = 0;	rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);	if (rc) {		dprintk("RPC:       %s: rdma_resolve_route() failed %i\n",			__func__, rc);		goto out;	}	wait_for_completion(&ia->ri_done);	rc = ia->ri_async_rc;	if (rc)		goto out;	return id;out:	rdma_destroy_id(id);	return ERR_PTR(rc);}/* * Drain any cq, prior to teardown. */static voidrpcrdma_clean_cq(struct ib_cq *cq){	struct ib_wc wc;	int count = 0;	while (1 == ib_poll_cq(cq, 1, &wc))		++count;	if (count)		dprintk("RPC:       %s: flushed %d events (last 0x%x)\n",			__func__, count, wc.opcode);}/* * Exported functions. *//* * Open and initialize an Interface Adapter. *  o initializes fields of struct rpcrdma_ia, including *    interface and provider attributes and protection zone. */intrpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg){	int rc;	struct rpcrdma_ia *ia = &xprt->rx_ia;	init_completion(&ia->ri_done);	ia->ri_id = rpcrdma_create_id(xprt, ia, addr);	if (IS_ERR(ia->ri_id)) {		rc = PTR_ERR(ia->ri_id);		goto out1;	}	ia->ri_pd = ib_alloc_pd(ia->ri_id->device);	if (IS_ERR(ia->ri_pd)) {		rc = PTR_ERR(ia->ri_pd);		dprintk("RPC:       %s: ib_alloc_pd() failed %i\n",			__func__, rc);		goto out2;	}	/*	 * Optionally obtain an underlying physical identity mapping in	 * order to do a memory window-based bind. This base registration	 * is protected from remote access - that is enabled only by binding	 * for the specific bytes targeted during each RPC operation, and	 * revoked after the corresponding completion similar to a storage	 * adapter.	 */	if (memreg > RPCRDMA_REGISTER) {		int mem_priv = IB_ACCESS_LOCAL_WRITE;		switch (memreg) {#if RPCRDMA_PERSISTENT_REGISTRATION		case RPCRDMA_ALLPHYSICAL:			mem_priv |= IB_ACCESS_REMOTE_WRITE;			mem_priv |= IB_ACCESS_REMOTE_READ;			break;#endif		case RPCRDMA_MEMWINDOWS_ASYNC:		case RPCRDMA_MEMWINDOWS:			mem_priv |= IB_ACCESS_MW_BIND;			break;		default:			break;		}		ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);		if (IS_ERR(ia->ri_bind_mem)) {			printk(KERN_ALERT "%s: ib_get_dma_mr for "				"phys register failed with %lX\n\t"				"Will continue with degraded performance\n",				__func__, PTR_ERR(ia->ri_bind_mem));			memreg = RPCRDMA_REGISTER;			ia->ri_bind_mem = NULL;		}	}	/* Else will do memory reg/dereg for each chunk */	ia->ri_memreg_strategy = memreg;	return 0;out2:	rdma_destroy_id(ia->ri_id);out1:	return rc;}/* * Clean up/close an IA. *   o if event handles and PD have been initialized, free them. *   o close the IA */voidrpcrdma_ia_close(struct rpcrdma_ia *ia){	int rc;	dprintk("RPC:       %s: entering\n", __func__);	if (ia->ri_bind_mem != NULL) {		rc = ib_dereg_mr(ia->ri_bind_mem);		dprintk("RPC:       %s: ib_dereg_mr returned %i\n",			__func__, rc);	}	if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp)		rdma_destroy_qp(ia->ri_id);	if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {		rc = ib_dealloc_pd(ia->ri_pd);		dprintk("RPC:       %s: ib_dealloc_pd returned %i\n",			__func__, rc);	}	if (ia->ri_id != NULL && !IS_ERR(ia->ri_id))		rdma_destroy_id(ia->ri_id);}/* * Create unconnected endpoint. */intrpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,				struct rpcrdma_create_data_internal *cdata){	struct ib_device_attr devattr;	int rc;	rc = ib_query_device(ia->ri_id->device, &devattr);	if (rc) {		dprintk("RPC:       %s: ib_query_device failed %d\n",			__func__, rc);		return rc;	}	/* check provider's send/recv wr limits */	if (cdata->max_requests > devattr.max_qp_wr)		cdata->max_requests = devattr.max_qp_wr;	ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;	ep->rep_attr.qp_context = ep;	/* send_cq and recv_cq initialized below */	ep->rep_attr.srq = NULL;	ep->rep_attr.cap.max_send_wr = cdata->max_requests;	switch (ia->ri_memreg_strategy) {
12 3 下一页
💿 文件大小 57701 K
👤 上传用户 huanzhudev
📂 所属分类网络
🏷️ 相关标签

#linux #内核 #源代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -