ipath_rc.c

来自「linux 内核源代码」· C语言 代码 · 共 1,940 行 · 第 1/4 页

C
1,940
字号
/* * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses.  You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * *     Redistribution and use in source and binary forms, with or *     without modification, are permitted provided that the following *     conditions are met: * *      - Redistributions of source code must retain the above *        copyright notice, this list of conditions and the following *        disclaimer. * *      - Redistributions in binary form must reproduce the above *        copyright notice, this list of conditions and the following *        disclaimer in the documentation and/or other materials *        provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */#include "ipath_verbs.h"#include "ipath_kernel.h"/* cut down ridiculously long IB macro names */#define OP(x) IB_OPCODE_RC_##xstatic u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,		       u32 psn, u32 pmtu){	u32 len;	len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;	ss->sge = wqe->sg_list[0];	ss->sg_list = wqe->sg_list + 1;	ss->num_sge = wqe->wr.num_sge;	ipath_skip_sge(ss, len);	return wqe->length - len;}/** * ipath_init_restart- initialize the qp->s_sge after a restart * @qp: the QP who's SGE we're restarting * @wqe: the work queue to initialize the QP's SGE from * * The QP s_lock should be held and interrupts disabled. */static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe){	struct ipath_ibdev *dev;	qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,				ib_mtu_enum_to_int(qp->path_mtu));	dev = to_idev(qp->ibqp.device);	spin_lock(&dev->pending_lock);	if (list_empty(&qp->timerwait))		list_add_tail(&qp->timerwait,			      &dev->pending[dev->pending_index]);	spin_unlock(&dev->pending_lock);}/** * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read) * @qp: a pointer to the QP * @ohdr: a pointer to the IB header being constructed * @pmtu: the path MTU * * Return 1 if constructed; otherwise, return 0. * Note that we are in the responder's side of the QP context. * Note the QP s_lock must be held. */static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,			     struct ipath_other_headers *ohdr, u32 pmtu){	struct ipath_ack_entry *e;	u32 hwords;	u32 len;	u32 bth0;	u32 bth2;	/* header size in 32-bit words LRH+BTH = (8+12)/4. */	hwords = 5;	switch (qp->s_ack_state) {	case OP(RDMA_READ_RESPONSE_LAST):	case OP(RDMA_READ_RESPONSE_ONLY):	case OP(ATOMIC_ACKNOWLEDGE):		/*		 * We can increment the tail pointer now that the last		 * response has been sent instead of only being		 * constructed.		 */		if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)			qp->s_tail_ack_queue = 0;		/* FALLTHROUGH */	case OP(SEND_ONLY):	case OP(ACKNOWLEDGE):		/* Check for no next entry in the queue. */		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {			if (qp->s_flags & IPATH_S_ACK_PENDING)				goto normal;			qp->s_ack_state = OP(ACKNOWLEDGE);			goto bail;		}		e = &qp->s_ack_queue[qp->s_tail_ack_queue];		if (e->opcode == OP(RDMA_READ_REQUEST)) {			/* Copy SGE state in case we need to resend */			qp->s_ack_rdma_sge = e->rdma_sge;			qp->s_cur_sge = &qp->s_ack_rdma_sge;			len = e->rdma_sge.sge.sge_length;			if (len > pmtu) {				len = pmtu;				qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);			} else {				qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);				e->sent = 1;			}			ohdr->u.aeth = ipath_compute_aeth(qp);			hwords++;			qp->s_ack_rdma_psn = e->psn;			bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;		} else {			/* COMPARE_SWAP or FETCH_ADD */			qp->s_cur_sge = NULL;			len = 0;			qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);			ohdr->u.at.aeth = ipath_compute_aeth(qp);			ohdr->u.at.atomic_ack_eth[0] =				cpu_to_be32(e->atomic_data >> 32);			ohdr->u.at.atomic_ack_eth[1] =				cpu_to_be32(e->atomic_data);			hwords += sizeof(ohdr->u.at) / sizeof(u32);			bth2 = e->psn;			e->sent = 1;		}		bth0 = qp->s_ack_state << 24;		break;	case OP(RDMA_READ_RESPONSE_FIRST):		qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);		/* FALLTHROUGH */	case OP(RDMA_READ_RESPONSE_MIDDLE):		len = qp->s_ack_rdma_sge.sge.sge_length;		if (len > pmtu)			len = pmtu;		else {			ohdr->u.aeth = ipath_compute_aeth(qp);			hwords++;			qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);			qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;		}		bth0 = qp->s_ack_state << 24;		bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;		break;	default:	normal:		/*		 * Send a regular ACK.		 * Set the s_ack_state so we wait until after sending		 * the ACK before setting s_ack_state to ACKNOWLEDGE		 * (see above).		 */		qp->s_ack_state = OP(SEND_ONLY);		qp->s_flags &= ~IPATH_S_ACK_PENDING;		qp->s_cur_sge = NULL;		if (qp->s_nak_state)			ohdr->u.aeth =				cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |					    (qp->s_nak_state <<					     IPATH_AETH_CREDIT_SHIFT));		else			ohdr->u.aeth = ipath_compute_aeth(qp);		hwords++;		len = 0;		bth0 = OP(ACKNOWLEDGE) << 24;		bth2 = qp->s_ack_psn & IPATH_PSN_MASK;	}	qp->s_hdrwords = hwords;	qp->s_cur_size = len;	ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2);	return 1;bail:	return 0;}/** * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP * * Return 1 if constructed; otherwise, return 0. */int ipath_make_rc_req(struct ipath_qp *qp){	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);	struct ipath_other_headers *ohdr;	struct ipath_sge_state *ss;	struct ipath_swqe *wqe;	u32 hwords;	u32 len;	u32 bth0;	u32 bth2;	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);	char newreq;	unsigned long flags;	int ret = 0;	ohdr = &qp->s_hdr.u.oth;	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)		ohdr = &qp->s_hdr.u.l.oth;	/*	 * The lock is needed to synchronize between the sending tasklet,	 * the receive interrupt handler, and timeout resends.	 */	spin_lock_irqsave(&qp->s_lock, flags);	/* Sending responses has higher priority over sending requests. */	if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||	     (qp->s_flags & IPATH_S_ACK_PENDING) ||	     qp->s_ack_state != OP(ACKNOWLEDGE)) &&	    ipath_make_rc_ack(dev, qp, ohdr, pmtu))		goto done;	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||	    qp->s_rnr_timeout || qp->s_wait_credit)		goto bail;	/* Limit the number of packets sent without an ACK. */	if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {		qp->s_wait_credit = 1;		dev->n_rc_stalls++;		goto bail;	}	/* header size in 32-bit words LRH+BTH = (8+12)/4. */	hwords = 5;	bth0 = 1 << 22; /* Set M bit */	/* Send a request. */	wqe = get_swqe_ptr(qp, qp->s_cur);	switch (qp->s_state) {	default:		/*		 * Resend an old request or start a new one.		 *		 * We keep track of the current SWQE so that		 * we don't reset the "furthest progress" state		 * if we need to back up.		 */		newreq = 0;		if (qp->s_cur == qp->s_tail) {			/* Check if send work queue is empty. */			if (qp->s_tail == qp->s_head)				goto bail;			/*			 * If a fence is requested, wait for previous			 * RDMA read and atomic operations to finish.			 */			if ((wqe->wr.send_flags & IB_SEND_FENCE) &&			    qp->s_num_rd_atomic) {				qp->s_flags |= IPATH_S_FENCE_PENDING;				goto bail;			}			wqe->psn = qp->s_next_psn;			newreq = 1;		}		/*		 * Note that we have to be careful not to modify the		 * original work request since we may need to resend		 * it.		 */		len = wqe->length;		ss = &qp->s_sge;		bth2 = 0;		switch (wqe->wr.opcode) {		case IB_WR_SEND:		case IB_WR_SEND_WITH_IMM:			/* If no credit, return. */			if (qp->s_lsn != (u32) -1 &&			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)				goto bail;			wqe->lpsn = wqe->psn;			if (len > pmtu) {				wqe->lpsn += (len - 1) / pmtu;				qp->s_state = OP(SEND_FIRST);				len = pmtu;				break;			}			if (wqe->wr.opcode == IB_WR_SEND)				qp->s_state = OP(SEND_ONLY);			else {				qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);				/* Immediate data comes after the BTH */				ohdr->u.imm_data = wqe->wr.imm_data;				hwords += 1;			}			if (wqe->wr.send_flags & IB_SEND_SOLICITED)				bth0 |= 1 << 23;			bth2 = 1 << 31;	/* Request ACK. */			if (++qp->s_cur == qp->s_size)				qp->s_cur = 0;			break;		case IB_WR_RDMA_WRITE:			if (newreq && qp->s_lsn != (u32) -1)				qp->s_lsn++;			/* FALLTHROUGH */		case IB_WR_RDMA_WRITE_WITH_IMM:			/* If no credit, return. */			if (qp->s_lsn != (u32) -1 &&			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)				goto bail;			ohdr->u.rc.reth.vaddr =				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);			ohdr->u.rc.reth.rkey =				cpu_to_be32(wqe->wr.wr.rdma.rkey);			ohdr->u.rc.reth.length = cpu_to_be32(len);			hwords += sizeof(struct ib_reth) / sizeof(u32);			wqe->lpsn = wqe->psn;			if (len > pmtu) {				wqe->lpsn += (len - 1) / pmtu;				qp->s_state = OP(RDMA_WRITE_FIRST);				len = pmtu;				break;			}			if (wqe->wr.opcode == IB_WR_RDMA_WRITE)				qp->s_state = OP(RDMA_WRITE_ONLY);			else {				qp->s_state =					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);				/* Immediate data comes after RETH */				ohdr->u.rc.imm_data = wqe->wr.imm_data;				hwords += 1;				if (wqe->wr.send_flags & IB_SEND_SOLICITED)					bth0 |= 1 << 23;			}			bth2 = 1 << 31;	/* Request ACK. */			if (++qp->s_cur == qp->s_size)				qp->s_cur = 0;			break;		case IB_WR_RDMA_READ:			/*			 * Don't allow more operations to be started			 * than the QP limits allow.			 */			if (newreq) {				if (qp->s_num_rd_atomic >=				    qp->s_max_rd_atomic) {					qp->s_flags |= IPATH_S_RDMAR_PENDING;					goto bail;				}				qp->s_num_rd_atomic++;				if (qp->s_lsn != (u32) -1)					qp->s_lsn++;				/*				 * Adjust s_next_psn to count the				 * expected number of responses.				 */				if (len > pmtu)					qp->s_next_psn += (len - 1) / pmtu;				wqe->lpsn = qp->s_next_psn++;			}			ohdr->u.rc.reth.vaddr =				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);			ohdr->u.rc.reth.rkey =				cpu_to_be32(wqe->wr.wr.rdma.rkey);			ohdr->u.rc.reth.length = cpu_to_be32(len);			qp->s_state = OP(RDMA_READ_REQUEST);			hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);			ss = NULL;			len = 0;			if (++qp->s_cur == qp->s_size)				qp->s_cur = 0;			break;		case IB_WR_ATOMIC_CMP_AND_SWP:		case IB_WR_ATOMIC_FETCH_AND_ADD:			/*			 * Don't allow more operations to be started			 * than the QP limits allow.			 */			if (newreq) {				if (qp->s_num_rd_atomic >=				    qp->s_max_rd_atomic) {					qp->s_flags |= IPATH_S_RDMAR_PENDING;					goto bail;				}				qp->s_num_rd_atomic++;				if (qp->s_lsn != (u32) -1)					qp->s_lsn++;				wqe->lpsn = wqe->psn;			}			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {				qp->s_state = OP(COMPARE_SWAP);				ohdr->u.atomic_eth.swap_data = cpu_to_be64(					wqe->wr.wr.atomic.swap);				ohdr->u.atomic_eth.compare_data = cpu_to_be64(					wqe->wr.wr.atomic.compare_add);			} else {				qp->s_state = OP(FETCH_ADD);				ohdr->u.atomic_eth.swap_data = cpu_to_be64(					wqe->wr.wr.atomic.compare_add);				ohdr->u.atomic_eth.compare_data = 0;			}			ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(				wqe->wr.wr.atomic.remote_addr >> 32);			ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(				wqe->wr.wr.atomic.remote_addr);			ohdr->u.atomic_eth.rkey = cpu_to_be32(				wqe->wr.wr.atomic.rkey);			hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);			ss = NULL;			len = 0;			if (++qp->s_cur == qp->s_size)				qp->s_cur = 0;			break;		default:			goto bail;		}		qp->s_sge.sge = wqe->sg_list[0];		qp->s_sge.sg_list = wqe->sg_list + 1;		qp->s_sge.num_sge = wqe->wr.num_sge;		qp->s_len = wqe->length;		if (newreq) {			qp->s_tail++;			if (qp->s_tail >= qp->s_size)				qp->s_tail = 0;		}		bth2 |= qp->s_psn & IPATH_PSN_MASK;		if (wqe->wr.opcode == IB_WR_RDMA_READ)			qp->s_psn = wqe->lpsn + 1;		else {			qp->s_psn++;			if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)				qp->s_next_psn = qp->s_psn;		}		/*		 * Put the QP on the pending list so lost ACKs will cause		 * a retry.  More than one request can be pending so the		 * QP may already be on the dev->pending list.		 */		spin_lock(&dev->pending_lock);		if (list_empty(&qp->timerwait))			list_add_tail(&qp->timerwait,				      &dev->pending[dev->pending_index]);		spin_unlock(&dev->pending_lock);		break;	case OP(RDMA_READ_RESPONSE_FIRST):		/*		 * This case can only happen if a send is restarted.		 * See ipath_restart_rc().		 */		ipath_init_restart(qp, wqe);		/* FALLTHROUGH */	case OP(SEND_FIRST):		qp->s_state = OP(SEND_MIDDLE);		/* FALLTHROUGH */	case OP(SEND_MIDDLE):		bth2 = qp->s_psn++ & IPATH_PSN_MASK;		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)			qp->s_next_psn = qp->s_psn;		ss = &qp->s_sge;		len = qp->s_len;		if (len > pmtu) {			len = pmtu;			break;		}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?