ipath_rc.c

来自「LINUX 2.6.17.4的源码」· C语言 代码 · 共 1,857 行 · 第 1/4 页

C
1,857
字号
	qp->s_ack_state = opcode;	qp->s_nak_state = 0;	qp->s_ack_psn = psn;resched:	return 0;done:	return 1;}/** * ipath_rc_rcv - process an incoming RC packet * @dev: the device this packet came in on * @hdr: the header of this packet * @has_grh: true if the header has a GRH * @data: the packet data * @tlen: the packet length * @qp: the QP for this packet * * This is called from ipath_qp_rcv() to process an incoming RC packet * for the given QP. * Called at interrupt level. */void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,		  int has_grh, void *data, u32 tlen, struct ipath_qp *qp){	struct ipath_other_headers *ohdr;	u32 opcode;	u32 hdrsize;	u32 psn;	u32 pad;	unsigned long flags;	struct ib_wc wc;	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);	int diff;	struct ib_reth *reth;	int header_in_data;	/* Check for GRH */	if (!has_grh) {		ohdr = &hdr->u.oth;		hdrsize = 8 + 12;	/* LRH + BTH */		psn = be32_to_cpu(ohdr->bth[2]);		header_in_data = 0;	} else {		ohdr = &hdr->u.l.oth;		hdrsize = 8 + 40 + 12;	/* LRH + GRH + BTH */		/*		 * The header with GRH is 60 bytes and the core driver sets		 * the eager header buffer size to 56 bytes so the last 4		 * bytes of the BTH header (PSN) is in the data buffer.		 */		header_in_data =			ipath_layer_get_rcvhdrentsize(dev->dd) == 16;		if (header_in_data) {			psn = be32_to_cpu(((__be32 *) data)[0]);			data += sizeof(__be32);		} else			psn = be32_to_cpu(ohdr->bth[2]);	}	/*	 * The opcode is in the low byte when its in network order	 * (top byte when in host order).	 */	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;	/*	 * Process responses (ACKs) before anything else.  Note that the	 * packet sequence number will be for something in the send work	 * queue rather than the expected receive packet sequence number.	 * In other words, this QP is the requester.	 */	if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&	    opcode <= OP(ATOMIC_ACKNOWLEDGE)) {		ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn,				  hdrsize, pmtu, header_in_data);		goto bail;	}	spin_lock_irqsave(&qp->r_rq.lock, flags);	/* Compute 24 bits worth of difference. */	diff = ipath_cmp24(psn, qp->r_psn);	if (unlikely(diff)) {		if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode,				       psn, diff, header_in_data))			goto done;		goto resched;	}	/* Check for opcode sequence errors. */	switch (qp->r_state) {	case OP(SEND_FIRST):	case OP(SEND_MIDDLE):		if (opcode == OP(SEND_MIDDLE) ||		    opcode == OP(SEND_LAST) ||		    opcode == OP(SEND_LAST_WITH_IMMEDIATE))			break;	nack_inv:	/*	 * A NAK will ACK earlier sends and RDMA writes.  Don't queue the	 * NAK if a RDMA read, atomic, or NAK is pending though.	 */	spin_lock(&qp->s_lock);	if (qp->s_ack_state >= OP(RDMA_READ_REQUEST) &&	    qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) {		spin_unlock(&qp->s_lock);		goto done;	}	/* XXX Flush WQEs */	qp->state = IB_QPS_ERR;	qp->s_ack_state = OP(SEND_ONLY);	qp->s_nak_state = IB_NAK_INVALID_REQUEST;	qp->s_ack_psn = qp->r_psn;	goto resched;	case OP(RDMA_WRITE_FIRST):	case OP(RDMA_WRITE_MIDDLE):		if (opcode == OP(RDMA_WRITE_MIDDLE) ||		    opcode == OP(RDMA_WRITE_LAST) ||		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))			break;		goto nack_inv;	case OP(RDMA_READ_REQUEST):	case OP(COMPARE_SWAP):	case OP(FETCH_ADD):		/*		 * Drop all new requests until a response has been sent.  A		 * new request then ACKs the RDMA response we sent.  Relaxed		 * ordering would allow new requests to be processed but we		 * would need to keep a queue of rwqe's for all that are in		 * progress.  Note that we can't RNR NAK this request since		 * the RDMA READ or atomic response is already queued to be		 * sent (unless we implement a response send queue).		 */		goto done;	default:		if (opcode == OP(SEND_MIDDLE) ||		    opcode == OP(SEND_LAST) ||		    opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||		    opcode == OP(RDMA_WRITE_MIDDLE) ||		    opcode == OP(RDMA_WRITE_LAST) ||		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))			goto nack_inv;		break;	}	wc.imm_data = 0;	wc.wc_flags = 0;	/* OK, process the packet. */	switch (opcode) {	case OP(SEND_FIRST):		if (!ipath_get_rwqe(qp, 0)) {		rnr_nak:			/*			 * A RNR NAK will ACK earlier sends and RDMA writes.			 * Don't queue the NAK if a RDMA read or atomic			 * is pending though.			 */			spin_lock(&qp->s_lock);			if (qp->s_ack_state >=			    OP(RDMA_READ_REQUEST) &&			    qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) {				spin_unlock(&qp->s_lock);				goto done;			}			qp->s_ack_state = OP(SEND_ONLY);			qp->s_nak_state = IB_RNR_NAK | qp->s_min_rnr_timer;			qp->s_ack_psn = qp->r_psn;			goto resched;		}		qp->r_rcv_len = 0;		/* FALLTHROUGH */	case OP(SEND_MIDDLE):	case OP(RDMA_WRITE_MIDDLE):	send_middle:		/* Check for invalid length PMTU or posted rwqe len. */		if (unlikely(tlen != (hdrsize + pmtu + 4)))			goto nack_inv;		qp->r_rcv_len += pmtu;		if (unlikely(qp->r_rcv_len > qp->r_len))			goto nack_inv;		ipath_copy_sge(&qp->r_sge, data, pmtu);		break;	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):		/* consume RWQE */		if (!ipath_get_rwqe(qp, 1))			goto rnr_nak;		goto send_last_imm;	case OP(SEND_ONLY):	case OP(SEND_ONLY_WITH_IMMEDIATE):		if (!ipath_get_rwqe(qp, 0))			goto rnr_nak;		qp->r_rcv_len = 0;		if (opcode == OP(SEND_ONLY))			goto send_last;		/* FALLTHROUGH */	case OP(SEND_LAST_WITH_IMMEDIATE):	send_last_imm:		if (header_in_data) {			wc.imm_data = *(__be32 *) data;			data += sizeof(__be32);		} else {			/* Immediate data comes after BTH */			wc.imm_data = ohdr->u.imm_data;		}		hdrsize += 4;		wc.wc_flags = IB_WC_WITH_IMM;		/* FALLTHROUGH */	case OP(SEND_LAST):	case OP(RDMA_WRITE_LAST):	send_last:		/* Get the number of bytes the message was padded by. */		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;		/* Check for invalid length. */		/* XXX LAST len should be >= 1 */		if (unlikely(tlen < (hdrsize + pad + 4)))			goto nack_inv;		/* Don't count the CRC. */		tlen -= (hdrsize + pad + 4);		wc.byte_len = tlen + qp->r_rcv_len;		if (unlikely(wc.byte_len > qp->r_len))			goto nack_inv;		ipath_copy_sge(&qp->r_sge, data, tlen);		atomic_inc(&qp->msn);		if (opcode == OP(RDMA_WRITE_LAST) ||		    opcode == OP(RDMA_WRITE_ONLY))			break;		wc.wr_id = qp->r_wr_id;		wc.status = IB_WC_SUCCESS;		wc.opcode = IB_WC_RECV;		wc.vendor_err = 0;		wc.qp_num = qp->ibqp.qp_num;		wc.src_qp = qp->remote_qpn;		wc.pkey_index = 0;		wc.slid = qp->remote_ah_attr.dlid;		wc.sl = qp->remote_ah_attr.sl;		wc.dlid_path_bits = 0;		wc.port_num = 0;		/* Signal completion event if the solicited bit is set. */		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,			       (ohdr->bth[0] &				__constant_cpu_to_be32(1 << 23)) != 0);		break;	case OP(RDMA_WRITE_FIRST):	case OP(RDMA_WRITE_ONLY):	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):		/* consume RWQE */		/* RETH comes after BTH */		if (!header_in_data)			reth = &ohdr->u.rc.reth;		else {			reth = (struct ib_reth *)data;			data += sizeof(*reth);		}		hdrsize += sizeof(*reth);		qp->r_len = be32_to_cpu(reth->length);		qp->r_rcv_len = 0;		if (qp->r_len != 0) {			u32 rkey = be32_to_cpu(reth->rkey);			u64 vaddr = be64_to_cpu(reth->vaddr);			int ok;			/* Check rkey & NAK */			ok = ipath_rkey_ok(dev, &qp->r_sge,					   qp->r_len, vaddr, rkey,					   IB_ACCESS_REMOTE_WRITE);			if (unlikely(!ok)) {			nack_acc:				/*				 * A NAK will ACK earlier sends and RDMA				 * writes.  Don't queue the NAK if a RDMA				 * read, atomic, or NAK is pending though.				 */				spin_lock(&qp->s_lock);				if (qp->s_ack_state >=				    OP(RDMA_READ_REQUEST) &&				    qp->s_ack_state !=				    IB_OPCODE_ACKNOWLEDGE) {					spin_unlock(&qp->s_lock);					goto done;				}				/* XXX Flush WQEs */				qp->state = IB_QPS_ERR;				qp->s_ack_state = OP(RDMA_WRITE_ONLY);				qp->s_nak_state =					IB_NAK_REMOTE_ACCESS_ERROR;				qp->s_ack_psn = qp->r_psn;				goto resched;			}		} else {			qp->r_sge.sg_list = NULL;			qp->r_sge.sge.mr = NULL;			qp->r_sge.sge.vaddr = NULL;			qp->r_sge.sge.length = 0;			qp->r_sge.sge.sge_length = 0;		}		if (unlikely(!(qp->qp_access_flags &			       IB_ACCESS_REMOTE_WRITE)))			goto nack_acc;		if (opcode == OP(RDMA_WRITE_FIRST))			goto send_middle;		else if (opcode == OP(RDMA_WRITE_ONLY))			goto send_last;		if (!ipath_get_rwqe(qp, 1))			goto rnr_nak;		goto send_last_imm;	case OP(RDMA_READ_REQUEST):		/* RETH comes after BTH */		if (!header_in_data)			reth = &ohdr->u.rc.reth;		else {			reth = (struct ib_reth *)data;			data += sizeof(*reth);		}		spin_lock(&qp->s_lock);		if (qp->s_ack_state != OP(ACKNOWLEDGE) &&		    qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) {			spin_unlock(&qp->s_lock);			goto done;		}		qp->s_rdma_len = be32_to_cpu(reth->length);		if (qp->s_rdma_len != 0) {			u32 rkey = be32_to_cpu(reth->rkey);			u64 vaddr = be64_to_cpu(reth->vaddr);			int ok;			/* Check rkey & NAK */			ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,					   qp->s_rdma_len, vaddr, rkey,					   IB_ACCESS_REMOTE_READ);			if (unlikely(!ok)) {				spin_unlock(&qp->s_lock);				goto nack_acc;			}			/*			 * Update the next expected PSN.  We add 1 later			 * below, so only add the remainder here.			 */			if (qp->s_rdma_len > pmtu)				qp->r_psn += (qp->s_rdma_len - 1) / pmtu;		} else {			qp->s_rdma_sge.sg_list = NULL;			qp->s_rdma_sge.num_sge = 0;			qp->s_rdma_sge.sge.mr = NULL;			qp->s_rdma_sge.sge.vaddr = NULL;			qp->s_rdma_sge.sge.length = 0;			qp->s_rdma_sge.sge.sge_length = 0;		}		if (unlikely(!(qp->qp_access_flags &			       IB_ACCESS_REMOTE_READ)))			goto nack_acc;		/*		 * We need to increment the MSN here instead of when we		 * finish sending the result since a duplicate request would		 * increment it more than once.		 */		atomic_inc(&qp->msn);		qp->s_ack_state = opcode;		qp->s_nak_state = 0;		qp->s_ack_psn = psn;		qp->r_psn++;		qp->r_state = opcode;		goto rdmadone;	case OP(COMPARE_SWAP):	case OP(FETCH_ADD): {		struct ib_atomic_eth *ateth;		u64 vaddr;		u64 sdata;		u32 rkey;		if (!header_in_data)			ateth = &ohdr->u.atomic_eth;		else {			ateth = (struct ib_atomic_eth *)data;			data += sizeof(*ateth);		}		vaddr = be64_to_cpu(ateth->vaddr);		if (unlikely(vaddr & (sizeof(u64) - 1)))			goto nack_inv;		rkey = be32_to_cpu(ateth->rkey);		/* Check rkey & NAK */		if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge,					    sizeof(u64), vaddr, rkey,					    IB_ACCESS_REMOTE_ATOMIC)))			goto nack_acc;		if (unlikely(!(qp->qp_access_flags &			       IB_ACCESS_REMOTE_ATOMIC)))			goto nack_acc;		/* Perform atomic OP and save result. */		sdata = be64_to_cpu(ateth->swap_data);		spin_lock(&dev->pending_lock);		qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;		if (opcode == OP(FETCH_ADD))			*(u64 *) qp->r_sge.sge.vaddr =				qp->r_atomic_data + sdata;		else if (qp->r_atomic_data ==			 be64_to_cpu(ateth->compare_data))			*(u64 *) qp->r_sge.sge.vaddr = sdata;		spin_unlock(&dev->pending_lock);		atomic_inc(&qp->msn);		qp->r_atomic_psn = psn & IPS_PSN_MASK;		psn |= 1 << 31;		break;	}	default:		/* Drop packet for unknown opcodes. */		goto done;	}	qp->r_psn++;	qp->r_state = opcode;	/* Send an ACK if requested or required. */	if (psn & (1 << 31)) {		/*		 * Coalesce ACKs unless there is a RDMA READ or		 * ATOMIC pending.		 */		spin_lock(&qp->s_lock);		if (qp->s_ack_state == OP(ACKNOWLEDGE) ||		    qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST) {			qp->s_ack_state = opcode;			qp->s_nak_state = 0;			qp->s_ack_psn = psn;			qp->s_ack_atomic = qp->r_atomic_data;			goto resched;		}		spin_unlock(&qp->s_lock);	}done:	spin_unlock_irqrestore(&qp->r_rq.lock, flags);	goto bail;resched:	/*	 * Try to send ACK right away but not if ipath_do_rc_send() is	 * active.	 */	if (qp->s_hdrwords == 0 &&	    (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST ||	     qp->s_ack_state >= IB_OPCODE_COMPARE_SWAP))		send_rc_ack(qp);rdmadone:	spin_unlock(&qp->s_lock);	spin_unlock_irqrestore(&qp->r_rq.lock, flags);	/* Call ipath_do_rc_send() in another thread. */	tasklet_hi_schedule(&qp->s_task);bail:	return;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?