ipath_rc.c
来自「linux 内核源代码」· C语言 代码 · 共 1,940 行 · 第 1/4 页
C
1,940 行
if (wqe->wr.opcode == IB_WR_SEND) qp->s_state = OP(SEND_LAST); else { qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ ohdr->u.imm_data = wqe->wr.imm_data; hwords += 1; } if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= 1 << 23; bth2 |= 1 << 31; /* Request ACK. */ qp->s_cur++; if (qp->s_cur >= qp->s_size) qp->s_cur = 0; break; case OP(RDMA_READ_RESPONSE_LAST): /* * This case can only happen if a RDMA write is restarted. * See ipath_restart_rc(). */ ipath_init_restart(qp, wqe); /* FALLTHROUGH */ case OP(RDMA_WRITE_FIRST): qp->s_state = OP(RDMA_WRITE_MIDDLE); /* FALLTHROUGH */ case OP(RDMA_WRITE_MIDDLE): bth2 = qp->s_psn++ & IPATH_PSN_MASK; if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { len = pmtu; break; } if (wqe->wr.opcode == IB_WR_RDMA_WRITE) qp->s_state = OP(RDMA_WRITE_LAST); else { qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ ohdr->u.imm_data = wqe->wr.imm_data; hwords += 1; if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= 1 << 23; } bth2 |= 1 << 31; /* Request ACK. */ qp->s_cur++; if (qp->s_cur >= qp->s_size) qp->s_cur = 0; break; case OP(RDMA_READ_RESPONSE_MIDDLE): /* * This case can only happen if a RDMA read is restarted. * See ipath_restart_rc(). */ ipath_init_restart(qp, wqe); len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->wr.wr.rdma.rkey); ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); bth2 = qp->s_psn++ & IPATH_PSN_MASK; if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) qp->s_next_psn = qp->s_psn; ss = NULL; len = 0; qp->s_cur++; if (qp->s_cur == qp->s_size) qp->s_cur = 0; break; } if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) bth2 |= 1 << 31; /* Request ACK. */ qp->s_len -= len; qp->s_hdrwords = hwords; qp->s_cur_sge = ss; qp->s_cur_size = len; ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);done: ret = 1;bail: spin_unlock_irqrestore(&qp->s_lock, flags); return ret;}/** * send_rc_ack - Construct an ACK packet and send it * @qp: a pointer to the QP * * This is called from ipath_rc_rcv() and only uses the receive * side QP state. * Note that RDMA reads and atomics are handled in the * send side QP state and tasklet. */static void send_rc_ack(struct ipath_qp *qp){ struct ipath_ibdev *dev = to_idev(qp->ibqp.device); u16 lrh0; u32 bth0; u32 hwords; struct ipath_ib_header hdr; struct ipath_other_headers *ohdr; unsigned long flags; /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ if (qp->r_head_ack_queue != qp->s_tail_ack_queue || (qp->s_flags & IPATH_S_ACK_PENDING) || qp->s_ack_state != OP(ACKNOWLEDGE)) goto queue_ack; /* Construct the header. */ ohdr = &hdr.u.oth; lrh0 = IPATH_LRH_BTH; /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */ hwords = 6; if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { hwords += ipath_make_grh(dev, &hdr.u.l.grh, &qp->remote_ah_attr.grh, hwords, 0); ohdr = &hdr.u.l.oth; lrh0 = IPATH_LRH_GRH; } /* read pkey_index w/o lock (its atomic) */ bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) | (OP(ACKNOWLEDGE) << 24) | (1 << 22); if (qp->r_nak_state) ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | (qp->r_nak_state << IPATH_AETH_CREDIT_SHIFT)); else ohdr->u.aeth = ipath_compute_aeth(qp); lrh0 |= qp->remote_ah_attr.sl << 4; hdr.lrh[0] = cpu_to_be16(lrh0); hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK); /* * If we can send the ACK, clear the ACK state. */ if (ipath_verbs_send(qp, &hdr, hwords, NULL, 0) == 0) { dev->n_unicast_xmit++; goto done; } /* * We are out of PIO buffers at the moment. * Pass responsibility for sending the ACK to the * send tasklet so that when a PIO buffer becomes * available, the ACK is sent ahead of other outgoing * packets. */ dev->n_rc_qacks++;queue_ack: spin_lock_irqsave(&qp->s_lock, flags); qp->s_flags |= IPATH_S_ACK_PENDING; qp->s_nak_state = qp->r_nak_state; qp->s_ack_psn = qp->r_ack_psn; spin_unlock_irqrestore(&qp->s_lock, flags); /* Call ipath_do_rc_send() in another thread. */ tasklet_hi_schedule(&qp->s_task);done: return;}/** * reset_psn - reset the QP state to send starting from PSN * @qp: the QP * @psn: the packet sequence number to restart at * * This is called from ipath_rc_rcv() to process an incoming RC ACK * for the given QP. * Called at interrupt level with the QP s_lock held. */static void reset_psn(struct ipath_qp *qp, u32 psn){ u32 n = qp->s_last; struct ipath_swqe *wqe = get_swqe_ptr(qp, n); u32 opcode; qp->s_cur = n; /* * If we are starting the request from the beginning, * let the normal send code handle initialization. */ if (ipath_cmp24(psn, wqe->psn) <= 0) { qp->s_state = OP(SEND_LAST); goto done; } /* Find the work request opcode corresponding to the given PSN. */ opcode = wqe->wr.opcode; for (;;) { int diff; if (++n == qp->s_size) n = 0; if (n == qp->s_tail) break; wqe = get_swqe_ptr(qp, n); diff = ipath_cmp24(psn, wqe->psn); if (diff < 0) break; qp->s_cur = n; /* * If we are starting the request from the beginning, * let the normal send code handle initialization. */ if (diff == 0) { qp->s_state = OP(SEND_LAST); goto done; } opcode = wqe->wr.opcode; } /* * Set the state to restart in the middle of a request. * Don't change the s_sge, s_cur_sge, or s_cur_size. * See ipath_do_rc_send(). */ switch (opcode) { case IB_WR_SEND: case IB_WR_SEND_WITH_IMM: qp->s_state = OP(RDMA_READ_RESPONSE_FIRST); break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: qp->s_state = OP(RDMA_READ_RESPONSE_LAST); break; case IB_WR_RDMA_READ: qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE); break; default: /* * This case shouldn't happen since its only * one PSN per req. */ qp->s_state = OP(SEND_LAST); }done: qp->s_psn = psn;}/** * ipath_restart_rc - back up requester to resend the last un-ACKed request * @qp: the QP to restart * @psn: packet sequence number for the request * @wc: the work completion request * * The QP s_lock should be held and interrupts disabled. */void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc){ struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); struct ipath_ibdev *dev; if (qp->s_retry == 0) { wc->wr_id = wqe->wr.wr_id; wc->status = IB_WC_RETRY_EXC_ERR; wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; wc->vendor_err = 0; wc->byte_len = 0; wc->qp = &qp->ibqp; wc->imm_data = 0; wc->src_qp = qp->remote_qpn; wc->wc_flags = 0; wc->pkey_index = 0; wc->slid = qp->remote_ah_attr.dlid; wc->sl = qp->remote_ah_attr.sl; wc->dlid_path_bits = 0; wc->port_num = 0; ipath_sqerror_qp(qp, wc); goto bail; } qp->s_retry--; /* * Remove the QP from the timeout queue. * Note: it may already have been removed by ipath_ib_timer(). */ dev = to_idev(qp->ibqp.device); spin_lock(&dev->pending_lock); if (!list_empty(&qp->timerwait)) list_del_init(&qp->timerwait); spin_unlock(&dev->pending_lock); if (wqe->wr.opcode == IB_WR_RDMA_READ) dev->n_rc_resends++; else dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; reset_psn(qp, psn); tasklet_hi_schedule(&qp->s_task);bail: return;}static inline void update_last_psn(struct ipath_qp *qp, u32 psn){ if (qp->s_wait_credit) { qp->s_wait_credit = 0; tasklet_hi_schedule(&qp->s_task); } qp->s_last_psn = psn;}/** * do_rc_ack - process an incoming RC ACK * @qp: the QP the ACK came in on * @psn: the packet sequence number of the ACK * @opcode: the opcode of the request that resulted in the ACK * * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK * for the given QP. * Called at interrupt level with the QP s_lock held and interrupts disabled. * Returns 1 if OK, 0 if current operation should be aborted (NAK). */static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, u64 val){ struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; struct ipath_swqe *wqe; int ret = 0; u32 ack_psn; int diff; /* * Remove the QP from the timeout queue (or RNR timeout queue). * If ipath_ib_timer() has already removed it, * it's OK since we hold the QP s_lock and ipath_restart_rc() * just won't find anything to restart if we ACK everything. */ spin_lock(&dev->pending_lock); if (!list_empty(&qp->timerwait)) list_del_init(&qp->timerwait); spin_unlock(&dev->pending_lock); /* * Note that NAKs implicitly ACK outstanding SEND and RDMA write * requests and implicitly NAK RDMA read and atomic requests issued * before the NAK'ed request. The MSN won't include the NAK'ed * request but will include an ACK'ed request(s). */ ack_psn = psn; if (aeth >> 29) ack_psn--; wqe = get_swqe_ptr(qp, qp->s_last); /* * The MSN might be for a later WQE than the PSN indicates so * only complete WQEs that the PSN finishes. */ while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) { /* * RDMA_READ_RESPONSE_ONLY is a special case since * we want to generate completion events for everything * before the RDMA read, copy the data, then generate * the completion for the read. */ if (wqe->wr.opcode == IB_WR_RDMA_READ && opcode == OP(RDMA_READ_RESPONSE_ONLY) && diff == 0) { ret = 1; goto bail; } /* * If this request is a RDMA read or atomic, and the ACK is * for a later operation, this ACK NAKs the RDMA read or * atomic. In other words, only a RDMA_READ_LAST or ONLY * can ACK a RDMA read and likewise for atomic ops. Note * that the NAK case can only happen if relaxed ordering is * used and requests are sent after an RDMA read or atomic * is sent but before the response is received. */ if ((wqe->wr.opcode == IB_WR_RDMA_READ && (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) || ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) { /* * The last valid PSN seen is the previous * request's. */ update_last_psn(qp, wqe->psn - 1); /* Retry this request. */ ipath_restart_rc(qp, wqe->psn, &wc); /* * No need to process the ACK/NAK since we are * restarting an earlier request. */ goto bail; } if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) *(u64 *) wqe->sg_list[0].vaddr = val; if (qp->s_num_rd_atomic && (wqe->wr.opcode == IB_WR_RDMA_READ || wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { qp->s_num_rd_atomic--; /* Restart sending task if fence is complete */ if ((qp->s_flags & IPATH_S_FENCE_PENDING) && !qp->s_num_rd_atomic) { qp->s_flags &= ~IPATH_S_FENCE_PENDING; tasklet_hi_schedule(&qp->s_task); } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) { qp->s_flags &= ~IPATH_S_RDMAR_PENDING; tasklet_hi_schedule(&qp->s_task); } } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { wc.wr_id = wqe->wr.wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; wc.vendor_err = 0; wc.byte_len = wqe->length; wc.imm_data = 0; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; wc.wc_flags = 0; wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; wc.dlid_path_bits = 0; wc.port_num = 0; ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); } qp->s_retry = qp->s_retry_cnt; /* * If we are completing a request which is in the process of * being resent, we can stop resending it since we know the * responder has already seen it. */ if (qp->s_last == qp->s_cur) { if (++qp->s_cur >= qp->s_size) qp->s_cur = 0; qp->s_last = qp->s_cur; if (qp->s_last == qp->s_tail) break; wqe = get_swqe_ptr(qp, qp->s_cur); qp->s_state = OP(SEND_LAST); qp->s_psn = wqe->psn; } else { if (++qp->s_last >= qp->s_size) qp->s_last = 0; if (qp->s_last == qp->s_tail) break; wqe = get_swqe_ptr(qp, qp->s_last); } } switch (aeth >> 29) { case 0: /* ACK */ dev->n_rc_acks++; /* If this is a partial ACK, reset the retransmit timer. */ if (qp->s_last != qp->s_tail) { spin_lock(&dev->pending_lock); if (list_empty(&qp->timerwait)) list_add_tail(&qp->timerwait, &dev->pending[dev->pending_index]); spin_unlock(&dev->pending_lock); /* * If we get a partial ACK for a resent operation, * we can stop resending the earlier packets and * continue with the next packet the receiver wants. */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?