ipath_rc.c
来自「LINUX 2.6.17.4的源码」· C语言 代码 · 共 1,857 行 · 第 1/4 页
C
1,857 行
ss = NULL; len = 0; qp->s_cur++; if (qp->s_cur == qp->s_size) qp->s_cur = 0; break; case OP(RDMA_READ_REQUEST): case OP(COMPARE_SWAP): case OP(FETCH_ADD): /* * We shouldn't start anything new until this request is * finished. The ACK will handle rescheduling us. XXX The * number of outstanding ones is negotiated at connection * setup time (see pg. 258,289)? XXX Also, if we support * multiple outstanding requests, we need to check the WQE * IB_SEND_FENCE flag and not send a new request if a RDMA * read or atomic is pending. */ goto done; } qp->s_len -= len; qp->s_hdrwords = hwords; qp->s_cur_sge = ss; qp->s_cur_size = len; *bth0p = bth0 | (qp->s_state << 24); *bth2p = bth2; return 1;done: return 0;}static inline void ipath_make_rc_grh(struct ipath_qp *qp, struct ib_global_route *grh, u32 nwords){ struct ipath_ibdev *dev = to_idev(qp->ibqp.device); /* GRH header size in 32-bit words. */ qp->s_hdrwords += 10; qp->s_hdr.u.l.grh.version_tclass_flow = cpu_to_be32((6 << 28) | (grh->traffic_class << 20) | grh->flow_label); qp->s_hdr.u.l.grh.paylen = cpu_to_be16(((qp->s_hdrwords - 12) + nwords + SIZE_OF_CRC) << 2); /* next_hdr is defined by C8-7 in ch. 8.4.1 */ qp->s_hdr.u.l.grh.next_hdr = 0x1B; qp->s_hdr.u.l.grh.hop_limit = grh->hop_limit; /* The SGID is 32-bit aligned. */ qp->s_hdr.u.l.grh.sgid.global.subnet_prefix = dev->gid_prefix; qp->s_hdr.u.l.grh.sgid.global.interface_id = ipath_layer_get_guid(dev->dd); qp->s_hdr.u.l.grh.dgid = grh->dgid;}/** * ipath_do_rc_send - perform a send on an RC QP * @data: contains a pointer to the QP * * Process entries in the send work queue until credit or queue is * exhausted. Only allow one CPU to send a packet per QP (tasklet). * Otherwise, after we drop the QP s_lock, two threads could send * packets out of order. */void ipath_do_rc_send(unsigned long data){ struct ipath_qp *qp = (struct ipath_qp *)data; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); unsigned long flags; u16 lrh0; u32 nwords; u32 extra_bytes; u32 bth0; u32 bth2; u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); struct ipath_other_headers *ohdr; if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags)) goto bail; if (unlikely(qp->remote_ah_attr.dlid == ipath_layer_get_lid(dev->dd))) { struct ib_wc wc; /* * Pass in an uninitialized ib_wc to be consistent with * other places where ipath_ruc_loopback() is called. */ ipath_ruc_loopback(qp, &wc); goto clear; } ohdr = &qp->s_hdr.u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) ohdr = &qp->s_hdr.u.l.oth;again: /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { /* * If no PIO bufs are available, return. An interrupt will * call ipath_ib_piobufavail() when one is available. */ _VERBS_INFO("h %u %p\n", qp->s_hdrwords, &qp->s_hdr); _VERBS_INFO("d %u %p %u %p %u %u %u %u\n", qp->s_cur_size, qp->s_cur_sge->sg_list, qp->s_cur_sge->num_sge, qp->s_cur_sge->sge.vaddr, qp->s_cur_sge->sge.sge_length, qp->s_cur_sge->sge.length, qp->s_cur_sge->sge.m, qp->s_cur_sge->sge.n); if (ipath_verbs_send(dev->dd, qp->s_hdrwords, (u32 *) &qp->s_hdr, qp->s_cur_size, qp->s_cur_sge)) { ipath_no_bufs_available(qp, dev); goto bail; } dev->n_unicast_xmit++; /* Record that we sent the packet and s_hdr is empty. */ qp->s_hdrwords = 0; } /* * The lock is needed to synchronize between setting * qp->s_ack_state, resend timer, and post_send(). */ spin_lock_irqsave(&qp->s_lock, flags); /* Sending responses has higher priority over sending requests. */ if (qp->s_ack_state != OP(ACKNOWLEDGE) && (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0) bth2 = qp->s_ack_psn++ & IPS_PSN_MASK; else if (!ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2)) goto done; spin_unlock_irqrestore(&qp->s_lock, flags); /* Construct the header. */ extra_bytes = (4 - qp->s_cur_size) & 3; nwords = (qp->s_cur_size + extra_bytes) >> 2; lrh0 = IPS_LRH_BTH; if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { ipath_make_rc_grh(qp, &qp->remote_ah_attr.grh, nwords); lrh0 = IPS_LRH_GRH; } lrh0 |= qp->remote_ah_attr.sl << 4; qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); bth0 |= ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); bth0 |= extra_bytes << 20; ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(bth2); /* Check for more work to do. */ goto again;done: spin_unlock_irqrestore(&qp->s_lock, flags);clear: clear_bit(IPATH_S_BUSY, &qp->s_flags);bail: return;}static void send_rc_ack(struct ipath_qp *qp){ struct ipath_ibdev *dev = to_idev(qp->ibqp.device); u16 lrh0; u32 bth0; struct ipath_other_headers *ohdr; /* Construct the header. */ ohdr = &qp->s_hdr.u.oth; lrh0 = IPS_LRH_BTH; /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */ qp->s_hdrwords = 6; if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { ipath_make_rc_grh(qp, &qp->remote_ah_attr.grh, 0); ohdr = &qp->s_hdr.u.l.oth; lrh0 = IPS_LRH_GRH; } bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); ohdr->u.aeth = ipath_compute_aeth(qp); if (qp->s_ack_state >= OP(COMPARE_SWAP)) { bth0 |= IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24; ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic); qp->s_hdrwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4; } else bth0 |= OP(ACKNOWLEDGE) << 24; lrh0 |= qp->remote_ah_attr.sl << 4; qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + SIZE_OF_CRC); qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(qp->s_ack_psn & IPS_PSN_MASK); /* * If we can send the ACK, clear the ACK state. */ if (ipath_verbs_send(dev->dd, qp->s_hdrwords, (u32 *) &qp->s_hdr, 0, NULL) == 0) { qp->s_ack_state = OP(ACKNOWLEDGE); dev->n_rc_qacks++; dev->n_unicast_xmit++; }}/** * ipath_restart_rc - back up requester to resend the last un-ACKed request * @qp: the QP to restart * @psn: packet sequence number for the request * @wc: the work completion request * * The QP s_lock should be held. */void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc){ struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); struct ipath_ibdev *dev; u32 n; /* * If there are no requests pending, we are done. */ if (ipath_cmp24(psn, qp->s_next_psn) >= 0 || qp->s_last == qp->s_tail) goto done; if (qp->s_retry == 0) { wc->wr_id = wqe->wr.wr_id; wc->status = IB_WC_RETRY_EXC_ERR; wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; wc->vendor_err = 0; wc->byte_len = 0; wc->qp_num = qp->ibqp.qp_num; wc->src_qp = qp->remote_qpn; wc->pkey_index = 0; wc->slid = qp->remote_ah_attr.dlid; wc->sl = qp->remote_ah_attr.sl; wc->dlid_path_bits = 0; wc->port_num = 0; ipath_sqerror_qp(qp, wc); goto bail; } qp->s_retry--; /* * Remove the QP from the timeout queue. * Note: it may already have been removed by ipath_ib_timer(). */ dev = to_idev(qp->ibqp.device); spin_lock(&dev->pending_lock); if (!list_empty(&qp->timerwait)) list_del_init(&qp->timerwait); spin_unlock(&dev->pending_lock); if (wqe->wr.opcode == IB_WR_RDMA_READ) dev->n_rc_resends++; else dev->n_rc_resends += (int)qp->s_psn - (int)psn; /* * If we are starting the request from the beginning, let the normal * send code handle initialization. */ qp->s_cur = qp->s_last; if (ipath_cmp24(psn, wqe->psn) <= 0) { qp->s_state = OP(SEND_LAST); qp->s_psn = wqe->psn; } else { n = qp->s_cur; for (;;) { if (++n == qp->s_size) n = 0; if (n == qp->s_tail) { if (ipath_cmp24(psn, qp->s_next_psn) >= 0) { qp->s_cur = n; wqe = get_swqe_ptr(qp, n); } break; } wqe = get_swqe_ptr(qp, n); if (ipath_cmp24(psn, wqe->psn) < 0) break; qp->s_cur = n; } qp->s_psn = psn; /* * Reset the state to restart in the middle of a request. * Don't change the s_sge, s_cur_sge, or s_cur_size. * See ipath_do_rc_send(). */ switch (wqe->wr.opcode) { case IB_WR_SEND: case IB_WR_SEND_WITH_IMM: qp->s_state = OP(RDMA_READ_RESPONSE_FIRST); break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: qp->s_state = OP(RDMA_READ_RESPONSE_LAST); break; case IB_WR_RDMA_READ: qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE); break; default: /* * This case shouldn't happen since its only * one PSN per req. */ qp->s_state = OP(SEND_LAST); } }done: tasklet_hi_schedule(&qp->s_task);bail: return;}/** * reset_psn - reset the QP state to send starting from PSN * @qp: the QP * @psn: the packet sequence number to restart at * * This is called from ipath_rc_rcv() to process an incoming RC ACK * for the given QP. * Called at interrupt level with the QP s_lock held. */static void reset_psn(struct ipath_qp *qp, u32 psn){ struct ipath_swqe *wqe; u32 n; n = qp->s_cur; wqe = get_swqe_ptr(qp, n); for (;;) { if (++n == qp->s_size) n = 0; if (n == qp->s_tail) { if (ipath_cmp24(psn, qp->s_next_psn) >= 0) { qp->s_cur = n; wqe = get_swqe_ptr(qp, n); } break; } wqe = get_swqe_ptr(qp, n); if (ipath_cmp24(psn, wqe->psn) < 0) break; qp->s_cur = n; } qp->s_psn = psn; /* * Set the state to restart in the middle of a * request. Don't change the s_sge, s_cur_sge, or * s_cur_size. See ipath_do_rc_send(). */ switch (wqe->wr.opcode) { case IB_WR_SEND: case IB_WR_SEND_WITH_IMM: qp->s_state = OP(RDMA_READ_RESPONSE_FIRST); break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: qp->s_state = OP(RDMA_READ_RESPONSE_LAST); break; case IB_WR_RDMA_READ: qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE); break; default: /* * This case shouldn't happen since its only * one PSN per req. */ qp->s_state = OP(SEND_LAST); }}/** * do_rc_ack - process an incoming RC ACK * @qp: the QP the ACK came in on * @psn: the packet sequence number of the ACK * @opcode: the opcode of the request that resulted in the ACK * * This is called from ipath_rc_rcv() to process an incoming RC ACK * for the given QP. * Called at interrupt level with the QP s_lock held. * Returns 1 if OK, 0 if current operation should be aborted (NAK). */static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode){ struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; struct ipath_swqe *wqe; int ret = 0; /* * Remove the QP from the timeout queue (or RNR timeout queue). * If ipath_ib_timer() has already removed it, * it's OK since we hold the QP s_lock and ipath_restart_rc() * just won't find anything to restart if we ACK everything. */ spin_lock(&dev->pending_lock); if (!list_empty(&qp->timerwait)) list_del_init(&qp->timerwait); spin_unlock(&dev->pending_lock); /* * Note that NAKs implicitly ACK outstanding SEND and RDMA write * requests and implicitly NAK RDMA read and atomic requests issued * before the NAK'ed request. The MSN won't include the NAK'ed * request but will include an ACK'ed request(s). */ wqe = get_swqe_ptr(qp, qp->s_last); /* Nothing is pending to ACK/NAK. */ if (qp->s_last == qp->s_tail) goto bail; /* * The MSN might be for a later WQE than the PSN indicates so * only complete WQEs that the PSN finishes. */ while (ipath_cmp24(psn, wqe->lpsn) >= 0) { /* If we are ACKing a WQE, the MSN should be >= the SSN. */ if (ipath_cmp24(aeth, wqe->ssn) < 0) break; /* * If this request is a RDMA read or atomic, and the ACK is * for a later operation, this ACK NAKs the RDMA read or * atomic. In other words, only a RDMA_READ_LAST or ONLY * can ACK a RDMA read and likewise for atomic ops. Note * that the NAK case can only happen if relaxed ordering is * used and requests are sent after an RDMA read or atomic * is sent but before the response is received. */ if ((wqe->wr.opcode == IB_WR_RDMA_READ && opcode != OP(RDMA_READ_RESPONSE_LAST)) || ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && (opcode != OP(ATOMIC_ACKNOWLEDGE) || ipath_cmp24(wqe->psn, psn) != 0))) { /* * The last valid PSN seen is the previous * request's.
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?