📄 rpc_rdma.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
		rtype = rpcrdma_readch;	/* The following simplification is not true forever */	if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)		wtype = rpcrdma_noch;	BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch);	if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS &&	    (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) {		/* forced to "pure inline"? */		dprintk("RPC:       %s: too much data (%d/%d) for inline\n",			__func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len);		return -1;	}	hdrlen = 28; /*sizeof *headerp;*/	padlen = 0;	/*	 * Pull up any extra send data into the preregistered buffer.	 * When padding is in use and applies to the transfer, insert	 * it and change the message type.	 */	if (rtype == rpcrdma_noch) {		padlen = rpcrdma_inline_pullup(rqst,						RPCRDMA_INLINE_PAD_VALUE(rqst));		if (padlen) {			headerp->rm_type = __constant_htonl(RDMA_MSGP);			headerp->rm_body.rm_padded.rm_align =				htonl(RPCRDMA_INLINE_PAD_VALUE(rqst));			headerp->rm_body.rm_padded.rm_thresh =				__constant_htonl(RPCRDMA_INLINE_PAD_THRESH);			headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero;			headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;			headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;			hdrlen += 2 * sizeof(u32); /* extra words in padhdr */			BUG_ON(wtype != rpcrdma_noch);		} else {			headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero;			headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero;			headerp->rm_body.rm_nochunks.rm_empty[2] = xdr_zero;			/* new length after pullup */			rpclen = rqst->rq_svec[0].iov_len;			/*			 * Currently we try to not actually use read inline.			 * Reply chunks have the desirable property that			 * they land, packed, directly in the target buffers			 * without headers, so they require no fixup. The			 * additional RDMA Write op sends the same amount			 * of data, streams on-the-wire and adds no overhead			 * on receive. Therefore, we request a reply chunk			 * for non-writes wherever feasible and efficient.			 */			if (wtype == rpcrdma_noch &&			    r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER)				wtype = rpcrdma_replych;		}	}	/*	 * Marshal chunks. This routine will return the header length	 * consumed by marshaling.	 */	if (rtype != rpcrdma_noch) {		hdrlen = rpcrdma_create_chunks(rqst,					&rqst->rq_snd_buf, headerp, rtype);		wtype = rtype;	/* simplify dprintk */	} else if (wtype != rpcrdma_noch) {		hdrlen = rpcrdma_create_chunks(rqst,					&rqst->rq_rcv_buf, headerp, wtype);	}	if (hdrlen == 0)		return -1;	dprintk("RPC:       %s: %s: hdrlen %zd rpclen %zd padlen %zd\n"		"                   headerp 0x%p base 0x%p lkey 0x%x\n",		__func__, transfertypes[wtype], hdrlen, rpclen, padlen,		headerp, base, req->rl_iov.lkey);	/*	 * initialize send_iov's - normally only two: rdma chunk header and	 * single preregistered RPC header buffer, but if padding is present,	 * then use a preregistered (and zeroed) pad buffer between the RPC	 * header and any write data. In all non-rdma cases, any following	 * data has been copied into the RPC header buffer.	 */	req->rl_send_iov[0].addr = req->rl_iov.addr;	req->rl_send_iov[0].length = hdrlen;	req->rl_send_iov[0].lkey = req->rl_iov.lkey;	req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base);	req->rl_send_iov[1].length = rpclen;	req->rl_send_iov[1].lkey = req->rl_iov.lkey;	req->rl_niovs = 2;	if (padlen) {		struct rpcrdma_ep *ep = &r_xprt->rx_ep;		req->rl_send_iov[2].addr = ep->rep_pad.addr;		req->rl_send_iov[2].length = padlen;		req->rl_send_iov[2].lkey = ep->rep_pad.lkey;		req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen;		req->rl_send_iov[3].length = rqst->rq_slen - rpclen;		req->rl_send_iov[3].lkey = req->rl_iov.lkey;		req->rl_niovs = 4;	}	return 0;}/* * Chase down a received write or reply chunklist to get length * RDMA'd by server. See map at rpcrdma_create_chunks()! :-) */static intrpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, __be32 **iptrp){	unsigned int i, total_len;	struct rpcrdma_write_chunk *cur_wchunk;	i = ntohl(**iptrp);	/* get array count */	if (i > max)		return -1;	cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1);	total_len = 0;	while (i--) {		struct rpcrdma_segment *seg = &cur_wchunk->wc_target;		ifdebug(FACILITY) {			u64 off;			xdr_decode_hyper((__be32 *)&seg->rs_offset, &off);			dprintk("RPC:       %s: chunk %d@0x%llx:0x%x\n",				__func__,				ntohl(seg->rs_length),				(unsigned long long)off,				ntohl(seg->rs_handle));		}		total_len += ntohl(seg->rs_length);		++cur_wchunk;	}	/* check and adjust for properly terminated write chunk */	if (wrchunk) {		__be32 *w = (__be32 *) cur_wchunk;		if (*w++ != xdr_zero)			return -1;		cur_wchunk = (struct rpcrdma_write_chunk *) w;	}	if ((char *) cur_wchunk > rep->rr_base + rep->rr_len)		return -1;	*iptrp = (__be32 *) cur_wchunk;	return total_len;}/* * Scatter inline received data back into provided iov's. */static voidrpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len){	int i, npages, curlen, olen;	char *destp;	curlen = rqst->rq_rcv_buf.head[0].iov_len;	if (curlen > copy_len) {	/* write chunk header fixup */		curlen = copy_len;		rqst->rq_rcv_buf.head[0].iov_len = curlen;	}	dprintk("RPC:       %s: srcp 0x%p len %d hdrlen %d\n",		__func__, srcp, copy_len, curlen);	/* Shift pointer for first receive segment only */	rqst->rq_rcv_buf.head[0].iov_base = srcp;	srcp += curlen;	copy_len -= curlen;	olen = copy_len;	i = 0;	rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen;	if (copy_len && rqst->rq_rcv_buf.page_len) {		npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base +			rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT;		for (; i < npages; i++) {			if (i == 0)				curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base;			else				curlen = PAGE_SIZE;			if (curlen > copy_len)				curlen = copy_len;			dprintk("RPC:       %s: page %d"				" srcp 0x%p len %d curlen %d\n",				__func__, i, srcp, copy_len, curlen);			destp = kmap_atomic(rqst->rq_rcv_buf.pages[i],						KM_SKB_SUNRPC_DATA);			if (i == 0)				memcpy(destp + rqst->rq_rcv_buf.page_base,						srcp, curlen);			else				memcpy(destp, srcp, curlen);			flush_dcache_page(rqst->rq_rcv_buf.pages[i]);			kunmap_atomic(destp, KM_SKB_SUNRPC_DATA);			srcp += curlen;			copy_len -= curlen;			if (copy_len == 0)				break;		}		rqst->rq_rcv_buf.page_len = olen - copy_len;	} else		rqst->rq_rcv_buf.page_len = 0;	if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) {		curlen = copy_len;		if (curlen > rqst->rq_rcv_buf.tail[0].iov_len)			curlen = rqst->rq_rcv_buf.tail[0].iov_len;		if (rqst->rq_rcv_buf.tail[0].iov_base != srcp)			memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);		dprintk("RPC:       %s: tail srcp 0x%p len %d curlen %d\n",			__func__, srcp, copy_len, curlen);		rqst->rq_rcv_buf.tail[0].iov_len = curlen;		copy_len -= curlen; ++i;	} else		rqst->rq_rcv_buf.tail[0].iov_len = 0;	if (copy_len)		dprintk("RPC:       %s: %d bytes in"			" %d extra segments (%d lost)\n",			__func__, olen, i, copy_len);	/* TBD avoid a warning from call_decode() */	rqst->rq_private_buf = rqst->rq_rcv_buf;}/* * This function is called when an async event is posted to * the connection which changes the connection state. All it * does at this point is mark the connection up/down, the rpc * timers do the rest. */voidrpcrdma_conn_func(struct rpcrdma_ep *ep){	struct rpc_xprt *xprt = ep->rep_xprt;	spin_lock_bh(&xprt->transport_lock);	if (ep->rep_connected > 0) {		if (!xprt_test_and_set_connected(xprt))			xprt_wake_pending_tasks(xprt, 0);	} else {		if (xprt_test_and_clear_connected(xprt))			xprt_wake_pending_tasks(xprt, ep->rep_connected);	}	spin_unlock_bh(&xprt->transport_lock);}/* * This function is called when memory window unbind which we are waiting * for completes. Just use rr_func (zeroed by upcall) to signal completion. */static voidrpcrdma_unbind_func(struct rpcrdma_rep *rep){	wake_up(&rep->rr_unbind);}/* * Called as a tasklet to do req/reply match and complete a request * Errors must result in the RPC task either being awakened, or * allowed to timeout, to discover the errors at that time. */voidrpcrdma_reply_handler(struct rpcrdma_rep *rep){	struct rpcrdma_msg *headerp;	struct rpcrdma_req *req;	struct rpc_rqst *rqst;	struct rpc_xprt *xprt = rep->rr_xprt;	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);	__be32 *iptr;	int i, rdmalen, status;	/* Check status. If bad, signal disconnect and return rep to pool */	if (rep->rr_len == ~0U) {		rpcrdma_recv_buffer_put(rep);		if (r_xprt->rx_ep.rep_connected == 1) {			r_xprt->rx_ep.rep_connected = -EIO;			rpcrdma_conn_func(&r_xprt->rx_ep);		}		return;	}	if (rep->rr_len < 28) {		dprintk("RPC:       %s: short/invalid reply\n", __func__);		goto repost;	}	headerp = (struct rpcrdma_msg *) rep->rr_base;	if (headerp->rm_vers != xdr_one) {		dprintk("RPC:       %s: invalid version %d\n",			__func__, ntohl(headerp->rm_vers));		goto repost;	}	/* Get XID and try for a match. */	spin_lock(&xprt->transport_lock);	rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);	if (rqst == NULL) {		spin_unlock(&xprt->transport_lock);		dprintk("RPC:       %s: reply 0x%p failed "			"to match any request xid 0x%08x len %d\n",			__func__, rep, headerp->rm_xid, rep->rr_len);repost:		r_xprt->rx_stats.bad_reply_count++;		rep->rr_func = rpcrdma_reply_handler;		if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))			rpcrdma_recv_buffer_put(rep);		return;	}	/* get request object */	req = rpcr_to_rdmar(rqst);	dprintk("RPC:       %s: reply 0x%p completes request 0x%p\n"		"                   RPC request 0x%p xid 0x%08x\n",			__func__, rep, req, rqst, headerp->rm_xid);	BUG_ON(!req || req->rl_reply);	/* from here on, the reply is no longer an orphan */	req->rl_reply = rep;	/* check for expected message types */	/* The order of some of these tests is important. */	switch (headerp->rm_type) {	case __constant_htonl(RDMA_MSG):		/* never expect read chunks */		/* never expect reply chunks (two ways to check) */		/* never expect write chunks without having offered RDMA */		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||		    (headerp->rm_body.rm_chunks[1] == xdr_zero &&		     headerp->rm_body.rm_chunks[2] != xdr_zero) ||		    (headerp->rm_body.rm_chunks[1] != xdr_zero &&		     req->rl_nchunks == 0))			goto badheader;		if (headerp->rm_body.rm_chunks[1] != xdr_zero) {			/* count any expected write chunks in read reply */			/* start at write chunk array count */			iptr = &headerp->rm_body.rm_chunks[2];			rdmalen = rpcrdma_count_chunks(rep,						req->rl_nchunks, 1, &iptr);			/* check for validity, and no reply chunk after */			if (rdmalen < 0 || *iptr++ != xdr_zero)				goto badheader;			rep->rr_len -=			    ((unsigned char *)iptr - (unsigned char *)headerp);			status = rep->rr_len + rdmalen;			r_xprt->rx_stats.total_rdma_reply += rdmalen;		} else {			/* else ordinary inline */			iptr = (__be32 *)((unsigned char *)headerp + 28);			rep->rr_len -= 28; /*sizeof *headerp;*/			status = rep->rr_len;		}		/* Fix up the rpc results for upper layer */		rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len);		break;	case __constant_htonl(RDMA_NOMSG):		/* never expect read or write chunks, always reply chunks */		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||		    headerp->rm_body.rm_chunks[1] != xdr_zero ||		    headerp->rm_body.rm_chunks[2] != xdr_one ||		    req->rl_nchunks == 0)			goto badheader;		iptr = (__be32 *)((unsigned char *)headerp + 28);		rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr);		if (rdmalen < 0)			goto badheader;		r_xprt->rx_stats.total_rdma_reply += rdmalen;		/* Reply chunk buffer already is the reply vector - no fixup. */		status = rdmalen;		break;badheader:	default:		dprintk("%s: invalid rpcrdma reply header (type %d):"				" chunks[012] == %d %d %d"				" expected chunks <= %d\n",				__func__, ntohl(headerp->rm_type),				headerp->rm_body.rm_chunks[0],				headerp->rm_body.rm_chunks[1],				headerp->rm_body.rm_chunks[2],				req->rl_nchunks);		status = -EIO;		r_xprt->rx_stats.bad_reply_count++;		break;	}	/* If using mw bind, start the deregister process now. */	/* (Note: if mr_free(), cannot perform it here, in tasklet context) */	if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) {	case RPCRDMA_MEMWINDOWS:		for (i = 0; req->rl_nchunks-- > 1;)			i += rpcrdma_deregister_external(				&req->rl_segments[i], r_xprt, NULL);		/* Optionally wait (not here) for unbinds to complete */		rep->rr_func = rpcrdma_unbind_func;		(void) rpcrdma_deregister_external(&req->rl_segments[i],						   r_xprt, rep);		break;	case RPCRDMA_MEMWINDOWS_ASYNC:		for (i = 0; req->rl_nchunks--;)			i += rpcrdma_deregister_external(&req->rl_segments[i],							 r_xprt, NULL);		break;	default:		break;	}	dprintk("RPC:       %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",			__func__, xprt, rqst, status);	xprt_complete_rqst(rqst->rq_task, status);	spin_unlock(&xprt->transport_lock);}
上一页 12
💿 文件大小 57701 K
👤 上传用户 huanzhudev
📂 所属分类网络
🏷️ 相关标签

#linux #内核 #源代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -