ipath_rc.c

来自「linux 内核源代码」· C语言 代码 · 共 1,940 行 · 第 1/4 页

C
1,940
字号
		if (wqe->wr.opcode == IB_WR_SEND)			qp->s_state = OP(SEND_LAST);		else {			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);			/* Immediate data comes after the BTH */			ohdr->u.imm_data = wqe->wr.imm_data;			hwords += 1;		}		if (wqe->wr.send_flags & IB_SEND_SOLICITED)			bth0 |= 1 << 23;		bth2 |= 1 << 31;	/* Request ACK. */		qp->s_cur++;		if (qp->s_cur >= qp->s_size)			qp->s_cur = 0;		break;	case OP(RDMA_READ_RESPONSE_LAST):		/*		 * This case can only happen if a RDMA write is restarted.		 * See ipath_restart_rc().		 */		ipath_init_restart(qp, wqe);		/* FALLTHROUGH */	case OP(RDMA_WRITE_FIRST):		qp->s_state = OP(RDMA_WRITE_MIDDLE);		/* FALLTHROUGH */	case OP(RDMA_WRITE_MIDDLE):		bth2 = qp->s_psn++ & IPATH_PSN_MASK;		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)			qp->s_next_psn = qp->s_psn;		ss = &qp->s_sge;		len = qp->s_len;		if (len > pmtu) {			len = pmtu;			break;		}		if (wqe->wr.opcode == IB_WR_RDMA_WRITE)			qp->s_state = OP(RDMA_WRITE_LAST);		else {			qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);			/* Immediate data comes after the BTH */			ohdr->u.imm_data = wqe->wr.imm_data;			hwords += 1;			if (wqe->wr.send_flags & IB_SEND_SOLICITED)				bth0 |= 1 << 23;		}		bth2 |= 1 << 31;	/* Request ACK. */		qp->s_cur++;		if (qp->s_cur >= qp->s_size)			qp->s_cur = 0;		break;	case OP(RDMA_READ_RESPONSE_MIDDLE):		/*		 * This case can only happen if a RDMA read is restarted.		 * See ipath_restart_rc().		 */		ipath_init_restart(qp, wqe);		len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;		ohdr->u.rc.reth.vaddr =			cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);		ohdr->u.rc.reth.rkey =			cpu_to_be32(wqe->wr.wr.rdma.rkey);		ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);		qp->s_state = OP(RDMA_READ_REQUEST);		hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);		bth2 = qp->s_psn++ & IPATH_PSN_MASK;		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)			qp->s_next_psn = qp->s_psn;		ss = NULL;		len = 0;		qp->s_cur++;		if (qp->s_cur == qp->s_size)			qp->s_cur = 0;		break;	}	if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)		bth2 |= 1 << 31;	/* Request ACK. */	qp->s_len -= len;	qp->s_hdrwords = hwords;	qp->s_cur_sge = ss;	qp->s_cur_size = len;	ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);done:	ret = 1;bail:	spin_unlock_irqrestore(&qp->s_lock, flags);	return ret;}/** * send_rc_ack - Construct an ACK packet and send it * @qp: a pointer to the QP * * This is called from ipath_rc_rcv() and only uses the receive * side QP state. * Note that RDMA reads and atomics are handled in the * send side QP state and tasklet. */static void send_rc_ack(struct ipath_qp *qp){	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);	u16 lrh0;	u32 bth0;	u32 hwords;	struct ipath_ib_header hdr;	struct ipath_other_headers *ohdr;	unsigned long flags;	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */	if (qp->r_head_ack_queue != qp->s_tail_ack_queue ||	    (qp->s_flags & IPATH_S_ACK_PENDING) ||	    qp->s_ack_state != OP(ACKNOWLEDGE))		goto queue_ack;	/* Construct the header. */	ohdr = &hdr.u.oth;	lrh0 = IPATH_LRH_BTH;	/* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */	hwords = 6;	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {		hwords += ipath_make_grh(dev, &hdr.u.l.grh,					 &qp->remote_ah_attr.grh,					 hwords, 0);		ohdr = &hdr.u.l.oth;		lrh0 = IPATH_LRH_GRH;	}	/* read pkey_index w/o lock (its atomic) */	bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) |		(OP(ACKNOWLEDGE) << 24) | (1 << 22);	if (qp->r_nak_state)		ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |					    (qp->r_nak_state <<					     IPATH_AETH_CREDIT_SHIFT));	else		ohdr->u.aeth = ipath_compute_aeth(qp);	lrh0 |= qp->remote_ah_attr.sl << 4;	hdr.lrh[0] = cpu_to_be16(lrh0);	hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);	hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);	hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);	ohdr->bth[0] = cpu_to_be32(bth0);	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);	ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);	/*	 * If we can send the ACK, clear the ACK state.	 */	if (ipath_verbs_send(qp, &hdr, hwords, NULL, 0) == 0) {		dev->n_unicast_xmit++;		goto done;	}	/*	 * We are out of PIO buffers at the moment.	 * Pass responsibility for sending the ACK to the	 * send tasklet so that when a PIO buffer becomes	 * available, the ACK is sent ahead of other outgoing	 * packets.	 */	dev->n_rc_qacks++;queue_ack:	spin_lock_irqsave(&qp->s_lock, flags);	qp->s_flags |= IPATH_S_ACK_PENDING;	qp->s_nak_state = qp->r_nak_state;	qp->s_ack_psn = qp->r_ack_psn;	spin_unlock_irqrestore(&qp->s_lock, flags);	/* Call ipath_do_rc_send() in another thread. */	tasklet_hi_schedule(&qp->s_task);done:	return;}/** * reset_psn - reset the QP state to send starting from PSN * @qp: the QP * @psn: the packet sequence number to restart at * * This is called from ipath_rc_rcv() to process an incoming RC ACK * for the given QP. * Called at interrupt level with the QP s_lock held. */static void reset_psn(struct ipath_qp *qp, u32 psn){	u32 n = qp->s_last;	struct ipath_swqe *wqe = get_swqe_ptr(qp, n);	u32 opcode;	qp->s_cur = n;	/*	 * If we are starting the request from the beginning,	 * let the normal send code handle initialization.	 */	if (ipath_cmp24(psn, wqe->psn) <= 0) {		qp->s_state = OP(SEND_LAST);		goto done;	}	/* Find the work request opcode corresponding to the given PSN. */	opcode = wqe->wr.opcode;	for (;;) {		int diff;		if (++n == qp->s_size)			n = 0;		if (n == qp->s_tail)			break;		wqe = get_swqe_ptr(qp, n);		diff = ipath_cmp24(psn, wqe->psn);		if (diff < 0)			break;		qp->s_cur = n;		/*		 * If we are starting the request from the beginning,		 * let the normal send code handle initialization.		 */		if (diff == 0) {			qp->s_state = OP(SEND_LAST);			goto done;		}		opcode = wqe->wr.opcode;	}	/*	 * Set the state to restart in the middle of a request.	 * Don't change the s_sge, s_cur_sge, or s_cur_size.	 * See ipath_do_rc_send().	 */	switch (opcode) {	case IB_WR_SEND:	case IB_WR_SEND_WITH_IMM:		qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);		break;	case IB_WR_RDMA_WRITE:	case IB_WR_RDMA_WRITE_WITH_IMM:		qp->s_state = OP(RDMA_READ_RESPONSE_LAST);		break;	case IB_WR_RDMA_READ:		qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);		break;	default:		/*		 * This case shouldn't happen since its only		 * one PSN per req.		 */		qp->s_state = OP(SEND_LAST);	}done:	qp->s_psn = psn;}/** * ipath_restart_rc - back up requester to resend the last un-ACKed request * @qp: the QP to restart * @psn: packet sequence number for the request * @wc: the work completion request * * The QP s_lock should be held and interrupts disabled. */void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc){	struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);	struct ipath_ibdev *dev;	if (qp->s_retry == 0) {		wc->wr_id = wqe->wr.wr_id;		wc->status = IB_WC_RETRY_EXC_ERR;		wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];		wc->vendor_err = 0;		wc->byte_len = 0;		wc->qp = &qp->ibqp;		wc->imm_data = 0;		wc->src_qp = qp->remote_qpn;		wc->wc_flags = 0;		wc->pkey_index = 0;		wc->slid = qp->remote_ah_attr.dlid;		wc->sl = qp->remote_ah_attr.sl;		wc->dlid_path_bits = 0;		wc->port_num = 0;		ipath_sqerror_qp(qp, wc);		goto bail;	}	qp->s_retry--;	/*	 * Remove the QP from the timeout queue.	 * Note: it may already have been removed by ipath_ib_timer().	 */	dev = to_idev(qp->ibqp.device);	spin_lock(&dev->pending_lock);	if (!list_empty(&qp->timerwait))		list_del_init(&qp->timerwait);	spin_unlock(&dev->pending_lock);	if (wqe->wr.opcode == IB_WR_RDMA_READ)		dev->n_rc_resends++;	else		dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;	reset_psn(qp, psn);	tasklet_hi_schedule(&qp->s_task);bail:	return;}static inline void update_last_psn(struct ipath_qp *qp, u32 psn){	if (qp->s_wait_credit) {		qp->s_wait_credit = 0;		tasklet_hi_schedule(&qp->s_task);	}	qp->s_last_psn = psn;}/** * do_rc_ack - process an incoming RC ACK * @qp: the QP the ACK came in on * @psn: the packet sequence number of the ACK * @opcode: the opcode of the request that resulted in the ACK * * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK * for the given QP. * Called at interrupt level with the QP s_lock held and interrupts disabled. * Returns 1 if OK, 0 if current operation should be aborted (NAK). */static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,		     u64 val){	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);	struct ib_wc wc;	struct ipath_swqe *wqe;	int ret = 0;	u32 ack_psn;	int diff;	/*	 * Remove the QP from the timeout queue (or RNR timeout queue).	 * If ipath_ib_timer() has already removed it,	 * it's OK since we hold the QP s_lock and ipath_restart_rc()	 * just won't find anything to restart if we ACK everything.	 */	spin_lock(&dev->pending_lock);	if (!list_empty(&qp->timerwait))		list_del_init(&qp->timerwait);	spin_unlock(&dev->pending_lock);	/*	 * Note that NAKs implicitly ACK outstanding SEND and RDMA write	 * requests and implicitly NAK RDMA read and atomic requests issued	 * before the NAK'ed request.  The MSN won't include the NAK'ed	 * request but will include an ACK'ed request(s).	 */	ack_psn = psn;	if (aeth >> 29)		ack_psn--;	wqe = get_swqe_ptr(qp, qp->s_last);	/*	 * The MSN might be for a later WQE than the PSN indicates so	 * only complete WQEs that the PSN finishes.	 */	while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) {		/*		 * RDMA_READ_RESPONSE_ONLY is a special case since		 * we want to generate completion events for everything		 * before the RDMA read, copy the data, then generate		 * the completion for the read.		 */		if (wqe->wr.opcode == IB_WR_RDMA_READ &&		    opcode == OP(RDMA_READ_RESPONSE_ONLY) &&		    diff == 0) {			ret = 1;			goto bail;		}		/*		 * If this request is a RDMA read or atomic, and the ACK is		 * for a later operation, this ACK NAKs the RDMA read or		 * atomic.  In other words, only a RDMA_READ_LAST or ONLY		 * can ACK a RDMA read and likewise for atomic ops.  Note		 * that the NAK case can only happen if relaxed ordering is		 * used and requests are sent after an RDMA read or atomic		 * is sent but before the response is received.		 */		if ((wqe->wr.opcode == IB_WR_RDMA_READ &&		     (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||		    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&		     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {			/*			 * The last valid PSN seen is the previous			 * request's.			 */			update_last_psn(qp, wqe->psn - 1);			/* Retry this request. */			ipath_restart_rc(qp, wqe->psn, &wc);			/*			 * No need to process the ACK/NAK since we are			 * restarting an earlier request.			 */			goto bail;		}		if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)			*(u64 *) wqe->sg_list[0].vaddr = val;		if (qp->s_num_rd_atomic &&		    (wqe->wr.opcode == IB_WR_RDMA_READ ||		     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {			qp->s_num_rd_atomic--;			/* Restart sending task if fence is complete */			if ((qp->s_flags & IPATH_S_FENCE_PENDING) &&			    !qp->s_num_rd_atomic) {				qp->s_flags &= ~IPATH_S_FENCE_PENDING;				tasklet_hi_schedule(&qp->s_task);			} else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {				qp->s_flags &= ~IPATH_S_RDMAR_PENDING;				tasklet_hi_schedule(&qp->s_task);			}		}		/* Post a send completion queue entry if requested. */		if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {			wc.wr_id = wqe->wr.wr_id;			wc.status = IB_WC_SUCCESS;			wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];			wc.vendor_err = 0;			wc.byte_len = wqe->length;			wc.imm_data = 0;			wc.qp = &qp->ibqp;			wc.src_qp = qp->remote_qpn;			wc.wc_flags = 0;			wc.pkey_index = 0;			wc.slid = qp->remote_ah_attr.dlid;			wc.sl = qp->remote_ah_attr.sl;			wc.dlid_path_bits = 0;			wc.port_num = 0;			ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);		}		qp->s_retry = qp->s_retry_cnt;		/*		 * If we are completing a request which is in the process of		 * being resent, we can stop resending it since we know the		 * responder has already seen it.		 */		if (qp->s_last == qp->s_cur) {			if (++qp->s_cur >= qp->s_size)				qp->s_cur = 0;			qp->s_last = qp->s_cur;			if (qp->s_last == qp->s_tail)				break;			wqe = get_swqe_ptr(qp, qp->s_cur);			qp->s_state = OP(SEND_LAST);			qp->s_psn = wqe->psn;		} else {			if (++qp->s_last >= qp->s_size)				qp->s_last = 0;			if (qp->s_last == qp->s_tail)				break;			wqe = get_swqe_ptr(qp, qp->s_last);		}	}	switch (aeth >> 29) {	case 0:		/* ACK */		dev->n_rc_acks++;		/* If this is a partial ACK, reset the retransmit timer. */		if (qp->s_last != qp->s_tail) {			spin_lock(&dev->pending_lock);			if (list_empty(&qp->timerwait))				list_add_tail(&qp->timerwait,					&dev->pending[dev->pending_index]);			spin_unlock(&dev->pending_lock);			/*			 * If we get a partial ACK for a resent operation,			 * we can stop resending the earlier packets and			 * continue with the next packet the receiver wants.			 */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?