ipath_rc.c

来自「LINUX 2.6.17.4的源码」· C语言 代码 · 共 1,857 行 · 第 1/4 页

C
1,857
字号
		ss = NULL;		len = 0;		qp->s_cur++;		if (qp->s_cur == qp->s_size)			qp->s_cur = 0;		break;	case OP(RDMA_READ_REQUEST):	case OP(COMPARE_SWAP):	case OP(FETCH_ADD):		/*		 * We shouldn't start anything new until this request is		 * finished.  The ACK will handle rescheduling us.  XXX The		 * number of outstanding ones is negotiated at connection		 * setup time (see pg. 258,289)?  XXX Also, if we support		 * multiple outstanding requests, we need to check the WQE		 * IB_SEND_FENCE flag and not send a new request if a RDMA		 * read or atomic is pending.		 */		goto done;	}	qp->s_len -= len;	qp->s_hdrwords = hwords;	qp->s_cur_sge = ss;	qp->s_cur_size = len;	*bth0p = bth0 | (qp->s_state << 24);	*bth2p = bth2;	return 1;done:	return 0;}static inline void ipath_make_rc_grh(struct ipath_qp *qp,				     struct ib_global_route *grh,				     u32 nwords){	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);	/* GRH header size in 32-bit words. */	qp->s_hdrwords += 10;	qp->s_hdr.u.l.grh.version_tclass_flow =		cpu_to_be32((6 << 28) |			    (grh->traffic_class << 20) |			    grh->flow_label);	qp->s_hdr.u.l.grh.paylen =		cpu_to_be16(((qp->s_hdrwords - 12) + nwords +			     SIZE_OF_CRC) << 2);	/* next_hdr is defined by C8-7 in ch. 8.4.1 */	qp->s_hdr.u.l.grh.next_hdr = 0x1B;	qp->s_hdr.u.l.grh.hop_limit = grh->hop_limit;	/* The SGID is 32-bit aligned. */	qp->s_hdr.u.l.grh.sgid.global.subnet_prefix = dev->gid_prefix;	qp->s_hdr.u.l.grh.sgid.global.interface_id =		ipath_layer_get_guid(dev->dd);	qp->s_hdr.u.l.grh.dgid = grh->dgid;}/** * ipath_do_rc_send - perform a send on an RC QP * @data: contains a pointer to the QP * * Process entries in the send work queue until credit or queue is * exhausted.  Only allow one CPU to send a packet per QP (tasklet). * Otherwise, after we drop the QP s_lock, two threads could send * packets out of order. */void ipath_do_rc_send(unsigned long data){	struct ipath_qp *qp = (struct ipath_qp *)data;	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);	unsigned long flags;	u16 lrh0;	u32 nwords;	u32 extra_bytes;	u32 bth0;	u32 bth2;	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);	struct ipath_other_headers *ohdr;	if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))		goto bail;	if (unlikely(qp->remote_ah_attr.dlid ==		     ipath_layer_get_lid(dev->dd))) {		struct ib_wc wc;		/*		 * Pass in an uninitialized ib_wc to be consistent with		 * other places where ipath_ruc_loopback() is called.		 */		ipath_ruc_loopback(qp, &wc);		goto clear;	}	ohdr = &qp->s_hdr.u.oth;	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)		ohdr = &qp->s_hdr.u.l.oth;again:	/* Check for a constructed packet to be sent. */	if (qp->s_hdrwords != 0) {		/*		 * If no PIO bufs are available, return.  An interrupt will		 * call ipath_ib_piobufavail() when one is available.		 */		_VERBS_INFO("h %u %p\n", qp->s_hdrwords, &qp->s_hdr);		_VERBS_INFO("d %u %p %u %p %u %u %u %u\n", qp->s_cur_size,			    qp->s_cur_sge->sg_list,			    qp->s_cur_sge->num_sge,			    qp->s_cur_sge->sge.vaddr,			    qp->s_cur_sge->sge.sge_length,			    qp->s_cur_sge->sge.length,			    qp->s_cur_sge->sge.m,			    qp->s_cur_sge->sge.n);		if (ipath_verbs_send(dev->dd, qp->s_hdrwords,				     (u32 *) &qp->s_hdr, qp->s_cur_size,				     qp->s_cur_sge)) {			ipath_no_bufs_available(qp, dev);			goto bail;		}		dev->n_unicast_xmit++;		/* Record that we sent the packet and s_hdr is empty. */		qp->s_hdrwords = 0;	}	/*	 * The lock is needed to synchronize between setting	 * qp->s_ack_state, resend timer, and post_send().	 */	spin_lock_irqsave(&qp->s_lock, flags);	/* Sending responses has higher priority over sending requests. */	if (qp->s_ack_state != OP(ACKNOWLEDGE) &&	    (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0)		bth2 = qp->s_ack_psn++ & IPS_PSN_MASK;	else if (!ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2))		goto done;	spin_unlock_irqrestore(&qp->s_lock, flags);	/* Construct the header. */	extra_bytes = (4 - qp->s_cur_size) & 3;	nwords = (qp->s_cur_size + extra_bytes) >> 2;	lrh0 = IPS_LRH_BTH;	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {		ipath_make_rc_grh(qp, &qp->remote_ah_attr.grh, nwords);		lrh0 = IPS_LRH_GRH;	}	lrh0 |= qp->remote_ah_attr.sl << 4;	qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);	qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +				       SIZE_OF_CRC);	qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));	bth0 |= ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);	bth0 |= extra_bytes << 20;	ohdr->bth[0] = cpu_to_be32(bth0);	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);	ohdr->bth[2] = cpu_to_be32(bth2);	/* Check for more work to do. */	goto again;done:	spin_unlock_irqrestore(&qp->s_lock, flags);clear:	clear_bit(IPATH_S_BUSY, &qp->s_flags);bail:	return;}static void send_rc_ack(struct ipath_qp *qp){	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);	u16 lrh0;	u32 bth0;	struct ipath_other_headers *ohdr;	/* Construct the header. */	ohdr = &qp->s_hdr.u.oth;	lrh0 = IPS_LRH_BTH;	/* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */	qp->s_hdrwords = 6;	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {		ipath_make_rc_grh(qp, &qp->remote_ah_attr.grh, 0);		ohdr = &qp->s_hdr.u.l.oth;		lrh0 = IPS_LRH_GRH;	}	bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);	ohdr->u.aeth = ipath_compute_aeth(qp);	if (qp->s_ack_state >= OP(COMPARE_SWAP)) {		bth0 |= IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24;		ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic);		qp->s_hdrwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4;	}	else		bth0 |= OP(ACKNOWLEDGE) << 24;	lrh0 |= qp->remote_ah_attr.sl << 4;	qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);	qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + SIZE_OF_CRC);	qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));	ohdr->bth[0] = cpu_to_be32(bth0);	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);	ohdr->bth[2] = cpu_to_be32(qp->s_ack_psn & IPS_PSN_MASK);	/*	 * If we can send the ACK, clear the ACK state.	 */	if (ipath_verbs_send(dev->dd, qp->s_hdrwords, (u32 *) &qp->s_hdr,			     0, NULL) == 0) {		qp->s_ack_state = OP(ACKNOWLEDGE);		dev->n_rc_qacks++;		dev->n_unicast_xmit++;	}}/** * ipath_restart_rc - back up requester to resend the last un-ACKed request * @qp: the QP to restart * @psn: packet sequence number for the request * @wc: the work completion request * * The QP s_lock should be held. */void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc){	struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);	struct ipath_ibdev *dev;	u32 n;	/*	 * If there are no requests pending, we are done.	 */	if (ipath_cmp24(psn, qp->s_next_psn) >= 0 ||	    qp->s_last == qp->s_tail)		goto done;	if (qp->s_retry == 0) {		wc->wr_id = wqe->wr.wr_id;		wc->status = IB_WC_RETRY_EXC_ERR;		wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];		wc->vendor_err = 0;		wc->byte_len = 0;		wc->qp_num = qp->ibqp.qp_num;		wc->src_qp = qp->remote_qpn;		wc->pkey_index = 0;		wc->slid = qp->remote_ah_attr.dlid;		wc->sl = qp->remote_ah_attr.sl;		wc->dlid_path_bits = 0;		wc->port_num = 0;		ipath_sqerror_qp(qp, wc);		goto bail;	}	qp->s_retry--;	/*	 * Remove the QP from the timeout queue.	 * Note: it may already have been removed by ipath_ib_timer().	 */	dev = to_idev(qp->ibqp.device);	spin_lock(&dev->pending_lock);	if (!list_empty(&qp->timerwait))		list_del_init(&qp->timerwait);	spin_unlock(&dev->pending_lock);	if (wqe->wr.opcode == IB_WR_RDMA_READ)		dev->n_rc_resends++;	else		dev->n_rc_resends += (int)qp->s_psn - (int)psn;	/*	 * If we are starting the request from the beginning, let the normal	 * send code handle initialization.	 */	qp->s_cur = qp->s_last;	if (ipath_cmp24(psn, wqe->psn) <= 0) {		qp->s_state = OP(SEND_LAST);		qp->s_psn = wqe->psn;	} else {		n = qp->s_cur;		for (;;) {			if (++n == qp->s_size)				n = 0;			if (n == qp->s_tail) {				if (ipath_cmp24(psn, qp->s_next_psn) >= 0) {					qp->s_cur = n;					wqe = get_swqe_ptr(qp, n);				}				break;			}			wqe = get_swqe_ptr(qp, n);			if (ipath_cmp24(psn, wqe->psn) < 0)				break;			qp->s_cur = n;		}		qp->s_psn = psn;		/*		 * Reset the state to restart in the middle of a request.		 * Don't change the s_sge, s_cur_sge, or s_cur_size.		 * See ipath_do_rc_send().		 */		switch (wqe->wr.opcode) {		case IB_WR_SEND:		case IB_WR_SEND_WITH_IMM:			qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);			break;		case IB_WR_RDMA_WRITE:		case IB_WR_RDMA_WRITE_WITH_IMM:			qp->s_state = OP(RDMA_READ_RESPONSE_LAST);			break;		case IB_WR_RDMA_READ:			qp->s_state =				OP(RDMA_READ_RESPONSE_MIDDLE);			break;		default:			/*			 * This case shouldn't happen since its only			 * one PSN per req.			 */			qp->s_state = OP(SEND_LAST);		}	}done:	tasklet_hi_schedule(&qp->s_task);bail:	return;}/** * reset_psn - reset the QP state to send starting from PSN * @qp: the QP * @psn: the packet sequence number to restart at * * This is called from ipath_rc_rcv() to process an incoming RC ACK * for the given QP. * Called at interrupt level with the QP s_lock held. */static void reset_psn(struct ipath_qp *qp, u32 psn){	struct ipath_swqe *wqe;	u32 n;	n = qp->s_cur;	wqe = get_swqe_ptr(qp, n);	for (;;) {		if (++n == qp->s_size)			n = 0;		if (n == qp->s_tail) {			if (ipath_cmp24(psn, qp->s_next_psn) >= 0) {				qp->s_cur = n;				wqe = get_swqe_ptr(qp, n);			}			break;		}		wqe = get_swqe_ptr(qp, n);		if (ipath_cmp24(psn, wqe->psn) < 0)			break;		qp->s_cur = n;	}	qp->s_psn = psn;	/*	 * Set the state to restart in the middle of a	 * request.  Don't change the s_sge, s_cur_sge, or	 * s_cur_size.  See ipath_do_rc_send().	 */	switch (wqe->wr.opcode) {	case IB_WR_SEND:	case IB_WR_SEND_WITH_IMM:		qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);		break;	case IB_WR_RDMA_WRITE:	case IB_WR_RDMA_WRITE_WITH_IMM:		qp->s_state = OP(RDMA_READ_RESPONSE_LAST);		break;	case IB_WR_RDMA_READ:		qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);		break;	default:		/*		 * This case shouldn't happen since its only		 * one PSN per req.		 */		qp->s_state = OP(SEND_LAST);	}}/** * do_rc_ack - process an incoming RC ACK * @qp: the QP the ACK came in on * @psn: the packet sequence number of the ACK * @opcode: the opcode of the request that resulted in the ACK * * This is called from ipath_rc_rcv() to process an incoming RC ACK * for the given QP. * Called at interrupt level with the QP s_lock held. * Returns 1 if OK, 0 if current operation should be aborted (NAK). */static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode){	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);	struct ib_wc wc;	struct ipath_swqe *wqe;	int ret = 0;	/*	 * Remove the QP from the timeout queue (or RNR timeout queue).	 * If ipath_ib_timer() has already removed it,	 * it's OK since we hold the QP s_lock and ipath_restart_rc()	 * just won't find anything to restart if we ACK everything.	 */	spin_lock(&dev->pending_lock);	if (!list_empty(&qp->timerwait))		list_del_init(&qp->timerwait);	spin_unlock(&dev->pending_lock);	/*	 * Note that NAKs implicitly ACK outstanding SEND and RDMA write	 * requests and implicitly NAK RDMA read and atomic requests issued	 * before the NAK'ed request.  The MSN won't include the NAK'ed	 * request but will include an ACK'ed request(s).	 */	wqe = get_swqe_ptr(qp, qp->s_last);	/* Nothing is pending to ACK/NAK. */	if (qp->s_last == qp->s_tail)		goto bail;	/*	 * The MSN might be for a later WQE than the PSN indicates so	 * only complete WQEs that the PSN finishes.	 */	while (ipath_cmp24(psn, wqe->lpsn) >= 0) {		/* If we are ACKing a WQE, the MSN should be >= the SSN. */		if (ipath_cmp24(aeth, wqe->ssn) < 0)			break;		/*		 * If this request is a RDMA read or atomic, and the ACK is		 * for a later operation, this ACK NAKs the RDMA read or		 * atomic.  In other words, only a RDMA_READ_LAST or ONLY		 * can ACK a RDMA read and likewise for atomic ops.  Note		 * that the NAK case can only happen if relaxed ordering is		 * used and requests are sent after an RDMA read or atomic		 * is sent but before the response is received.		 */		if ((wqe->wr.opcode == IB_WR_RDMA_READ &&		     opcode != OP(RDMA_READ_RESPONSE_LAST)) ||		    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&		     (opcode != OP(ATOMIC_ACKNOWLEDGE) ||		      ipath_cmp24(wqe->psn, psn) != 0))) {			/*			 * The last valid PSN seen is the previous			 * request's.

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?