ipath_rc.c

来自「LINUX 2.6.17.4的源码」· C语言 代码 · 共 1,857 行 · 第 1/4 页

C
1,857
字号
			 */			qp->s_last_psn = wqe->psn - 1;			/* Retry this request. */			ipath_restart_rc(qp, wqe->psn, &wc);			/*			 * No need to process the ACK/NAK since we are			 * restarting an earlier request.			 */			goto bail;		}		/* Post a send completion queue entry if requested. */		if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {			wc.wr_id = wqe->wr.wr_id;			wc.status = IB_WC_SUCCESS;			wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];			wc.vendor_err = 0;			wc.byte_len = wqe->length;			wc.qp_num = qp->ibqp.qp_num;			wc.src_qp = qp->remote_qpn;			wc.pkey_index = 0;			wc.slid = qp->remote_ah_attr.dlid;			wc.sl = qp->remote_ah_attr.sl;			wc.dlid_path_bits = 0;			wc.port_num = 0;			ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);		}		qp->s_retry = qp->s_retry_cnt;		/*		 * If we are completing a request which is in the process of		 * being resent, we can stop resending it since we know the		 * responder has already seen it.		 */		if (qp->s_last == qp->s_cur) {			if (++qp->s_cur >= qp->s_size)				qp->s_cur = 0;			wqe = get_swqe_ptr(qp, qp->s_cur);			qp->s_state = OP(SEND_LAST);			qp->s_psn = wqe->psn;		}		if (++qp->s_last >= qp->s_size)			qp->s_last = 0;		wqe = get_swqe_ptr(qp, qp->s_last);		if (qp->s_last == qp->s_tail)			break;	}	switch (aeth >> 29) {	case 0:		/* ACK */		dev->n_rc_acks++;		/* If this is a partial ACK, reset the retransmit timer. */		if (qp->s_last != qp->s_tail) {			spin_lock(&dev->pending_lock);			list_add_tail(&qp->timerwait,				      &dev->pending[dev->pending_index]);			spin_unlock(&dev->pending_lock);		}		ipath_get_credit(qp, aeth);		qp->s_rnr_retry = qp->s_rnr_retry_cnt;		qp->s_retry = qp->s_retry_cnt;		qp->s_last_psn = psn;		ret = 1;		goto bail;	case 1:		/* RNR NAK */		dev->n_rnr_naks++;		if (qp->s_rnr_retry == 0) {			if (qp->s_last == qp->s_tail)				goto bail;			wc.status = IB_WC_RNR_RETRY_EXC_ERR;			goto class_b;		}		if (qp->s_rnr_retry_cnt < 7)			qp->s_rnr_retry--;		if (qp->s_last == qp->s_tail)			goto bail;		/* The last valid PSN seen is the previous request's. */		qp->s_last_psn = wqe->psn - 1;		dev->n_rc_resends += (int)qp->s_psn - (int)psn;		/*		 * If we are starting the request from the beginning, let		 * the normal send code handle initialization.		 */		qp->s_cur = qp->s_last;		wqe = get_swqe_ptr(qp, qp->s_cur);		if (ipath_cmp24(psn, wqe->psn) <= 0) {			qp->s_state = OP(SEND_LAST);			qp->s_psn = wqe->psn;		} else			reset_psn(qp, psn);		qp->s_rnr_timeout =			ib_ipath_rnr_table[(aeth >> IPS_AETH_CREDIT_SHIFT) &					   IPS_AETH_CREDIT_MASK];		ipath_insert_rnr_queue(qp);		goto bail;	case 3:		/* NAK */		/* The last valid PSN seen is the previous request's. */		if (qp->s_last != qp->s_tail)			qp->s_last_psn = wqe->psn - 1;		switch ((aeth >> IPS_AETH_CREDIT_SHIFT) &			IPS_AETH_CREDIT_MASK) {		case 0:	/* PSN sequence error */			dev->n_seq_naks++;			/*			 * Back up to the responder's expected PSN.  XXX			 * Note that we might get a NAK in the middle of an			 * RDMA READ response which terminates the RDMA			 * READ.			 */			if (qp->s_last == qp->s_tail)				break;			if (ipath_cmp24(psn, wqe->psn) < 0)				break;			/* Retry the request. */			ipath_restart_rc(qp, psn, &wc);			break;		case 1:	/* Invalid Request */			wc.status = IB_WC_REM_INV_REQ_ERR;			dev->n_other_naks++;			goto class_b;		case 2:	/* Remote Access Error */			wc.status = IB_WC_REM_ACCESS_ERR;			dev->n_other_naks++;			goto class_b;		case 3:	/* Remote Operation Error */			wc.status = IB_WC_REM_OP_ERR;			dev->n_other_naks++;		class_b:			wc.wr_id = wqe->wr.wr_id;			wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];			wc.vendor_err = 0;			wc.byte_len = 0;			wc.qp_num = qp->ibqp.qp_num;			wc.src_qp = qp->remote_qpn;			wc.pkey_index = 0;			wc.slid = qp->remote_ah_attr.dlid;			wc.sl = qp->remote_ah_attr.sl;			wc.dlid_path_bits = 0;			wc.port_num = 0;			ipath_sqerror_qp(qp, &wc);			break;		default:			/* Ignore other reserved NAK error codes */			goto reserved;		}		qp->s_rnr_retry = qp->s_rnr_retry_cnt;		goto bail;	default:		/* 2: reserved */	reserved:		/* Ignore reserved NAK codes. */		goto bail;	}bail:	return ret;}/** * ipath_rc_rcv_resp - process an incoming RC response packet * @dev: the device this packet came in on * @ohdr: the other headers for this packet * @data: the packet data * @tlen: the packet length * @qp: the QP for this packet * @opcode: the opcode for this packet * @psn: the packet sequence number for this packet * @hdrsize: the header length * @pmtu: the path MTU * @header_in_data: true if part of the header data is in the data buffer * * This is called from ipath_rc_rcv() to process an incoming RC response * packet for the given QP. * Called at interrupt level. */static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,				     struct ipath_other_headers *ohdr,				     void *data, u32 tlen,				     struct ipath_qp *qp,				     u32 opcode,				     u32 psn, u32 hdrsize, u32 pmtu,				     int header_in_data){	unsigned long flags;	struct ib_wc wc;	int diff;	u32 pad;	u32 aeth;	spin_lock_irqsave(&qp->s_lock, flags);	/* Ignore invalid responses. */	if (ipath_cmp24(psn, qp->s_next_psn) >= 0)		goto ack_done;	/* Ignore duplicate responses. */	diff = ipath_cmp24(psn, qp->s_last_psn);	if (unlikely(diff <= 0)) {		/* Update credits for "ghost" ACKs */		if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {			if (!header_in_data)				aeth = be32_to_cpu(ohdr->u.aeth);			else {				aeth = be32_to_cpu(((__be32 *) data)[0]);				data += sizeof(__be32);			}			if ((aeth >> 29) == 0)				ipath_get_credit(qp, aeth);		}		goto ack_done;	}	switch (opcode) {	case OP(ACKNOWLEDGE):	case OP(ATOMIC_ACKNOWLEDGE):	case OP(RDMA_READ_RESPONSE_FIRST):		if (!header_in_data)			aeth = be32_to_cpu(ohdr->u.aeth);		else {			aeth = be32_to_cpu(((__be32 *) data)[0]);			data += sizeof(__be32);		}		if (opcode == OP(ATOMIC_ACKNOWLEDGE))			*(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data;		if (!do_rc_ack(qp, aeth, psn, opcode) ||		    opcode != OP(RDMA_READ_RESPONSE_FIRST))			goto ack_done;		hdrsize += 4;		/*		 * do_rc_ack() has already checked the PSN so skip		 * the sequence check.		 */		goto rdma_read;	case OP(RDMA_READ_RESPONSE_MIDDLE):		/* no AETH, no ACK */		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {			dev->n_rdma_seq++;			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);			goto ack_done;		}	rdma_read:	if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))		goto ack_done;	if (unlikely(tlen != (hdrsize + pmtu + 4)))		goto ack_done;	if (unlikely(pmtu >= qp->s_len))		goto ack_done;	/* We got a response so update the timeout. */	if (unlikely(qp->s_last == qp->s_tail ||		     get_swqe_ptr(qp, qp->s_last)->wr.opcode !=		     IB_WR_RDMA_READ))		goto ack_done;	spin_lock(&dev->pending_lock);	if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))		list_move_tail(&qp->timerwait,			       &dev->pending[dev->pending_index]);	spin_unlock(&dev->pending_lock);	/*	 * Update the RDMA receive state but do the copy w/o holding the	 * locks and blocking interrupts.  XXX Yet another place that	 * affects relaxed RDMA order since we don't want s_sge modified.	 */	qp->s_len -= pmtu;	qp->s_last_psn = psn;	spin_unlock_irqrestore(&qp->s_lock, flags);	ipath_copy_sge(&qp->s_sge, data, pmtu);	goto bail;	case OP(RDMA_READ_RESPONSE_LAST):		/* ACKs READ req. */		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {			dev->n_rdma_seq++;			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);			goto ack_done;		}		/* FALLTHROUGH */	case OP(RDMA_READ_RESPONSE_ONLY):		if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))			goto ack_done;		/*		 * Get the number of bytes the message was padded by.		 */		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;		/*		 * Check that the data size is >= 1 && <= pmtu.		 * Remember to account for the AETH header (4) and		 * ICRC (4).		 */		if (unlikely(tlen <= (hdrsize + pad + 8))) {			/*			 * XXX Need to generate an error CQ			 * entry.			 */			goto ack_done;		}		tlen -= hdrsize + pad + 8;		if (unlikely(tlen != qp->s_len)) {			/*			 * XXX Need to generate an error CQ			 * entry.			 */			goto ack_done;		}		if (!header_in_data)			aeth = be32_to_cpu(ohdr->u.aeth);		else {			aeth = be32_to_cpu(((__be32 *) data)[0]);			data += sizeof(__be32);		}		ipath_copy_sge(&qp->s_sge, data, tlen);		if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) {			/*			 * Change the state so we contimue			 * processing new requests.			 */			qp->s_state = OP(SEND_LAST);		}		goto ack_done;	}ack_done:	spin_unlock_irqrestore(&qp->s_lock, flags);bail:	return;}/** * ipath_rc_rcv_error - process an incoming duplicate or error RC packet * @dev: the device this packet came in on * @ohdr: the other headers for this packet * @data: the packet data * @qp: the QP for this packet * @opcode: the opcode for this packet * @psn: the packet sequence number for this packet * @diff: the difference between the PSN and the expected PSN * @header_in_data: true if part of the header data is in the data buffer * * This is called from ipath_rc_rcv() to process an unexpected * incoming RC packet for the given QP. * Called at interrupt level. * Return 1 if no more processing is needed; otherwise return 0 to * schedule a response to be sent and the s_lock unlocked. */static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,				     struct ipath_other_headers *ohdr,				     void *data,				     struct ipath_qp *qp,				     u32 opcode,				     u32 psn,				     int diff,				     int header_in_data){	struct ib_reth *reth;	if (diff > 0) {		/*		 * Packet sequence error.		 * A NAK will ACK earlier sends and RDMA writes.		 * Don't queue the NAK if a RDMA read, atomic, or		 * NAK is pending though.		 */		spin_lock(&qp->s_lock);		if ((qp->s_ack_state >= OP(RDMA_READ_REQUEST) &&		     qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) ||		    qp->s_nak_state != 0) {			spin_unlock(&qp->s_lock);			goto done;		}		qp->s_ack_state = OP(SEND_ONLY);		qp->s_nak_state = IB_NAK_PSN_ERROR;		/* Use the expected PSN. */		qp->s_ack_psn = qp->r_psn;		goto resched;	}	/*	 * Handle a duplicate request.  Don't re-execute SEND, RDMA	 * write or atomic op.  Don't NAK errors, just silently drop	 * the duplicate request.  Note that r_sge, r_len, and	 * r_rcv_len may be in use so don't modify them.	 *	 * We are supposed to ACK the earliest duplicate PSN but we	 * can coalesce an outstanding duplicate ACK.  We have to	 * send the earliest so that RDMA reads can be restarted at	 * the requester's expected PSN.	 */	spin_lock(&qp->s_lock);	if (qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE &&	    ipath_cmp24(psn, qp->s_ack_psn) >= 0) {		if (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST)			qp->s_ack_psn = psn;		spin_unlock(&qp->s_lock);		goto done;	}	switch (opcode) {	case OP(RDMA_READ_REQUEST):		/*		 * We have to be careful to not change s_rdma_sge		 * while ipath_do_rc_send() is using it and not		 * holding the s_lock.		 */		if (qp->s_ack_state != OP(ACKNOWLEDGE) &&		    qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) {			spin_unlock(&qp->s_lock);			dev->n_rdma_dup_busy++;			goto done;		}		/* RETH comes after BTH */		if (!header_in_data)			reth = &ohdr->u.rc.reth;		else {			reth = (struct ib_reth *)data;			data += sizeof(*reth);		}		qp->s_rdma_len = be32_to_cpu(reth->length);		if (qp->s_rdma_len != 0) {			u32 rkey = be32_to_cpu(reth->rkey);			u64 vaddr = be64_to_cpu(reth->vaddr);			int ok;			/*			 * Address range must be a subset of the original			 * request and start on pmtu boundaries.			 */			ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,					   qp->s_rdma_len, vaddr, rkey,					   IB_ACCESS_REMOTE_READ);			if (unlikely(!ok))				goto done;		} else {			qp->s_rdma_sge.sg_list = NULL;			qp->s_rdma_sge.num_sge = 0;			qp->s_rdma_sge.sge.mr = NULL;			qp->s_rdma_sge.sge.vaddr = NULL;			qp->s_rdma_sge.sge.length = 0;			qp->s_rdma_sge.sge.sge_length = 0;		}		break;	case OP(COMPARE_SWAP):	case OP(FETCH_ADD):		/*		 * Check for the PSN of the last atomic operations		 * performed and resend the result if found.		 */		if ((psn & IPS_PSN_MASK) != qp->r_atomic_psn) {			spin_unlock(&qp->s_lock);			goto done;		}		qp->s_ack_atomic = qp->r_atomic_data;		break;	}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?