📄 mthca_qp.c
字号:
qp->atomic_rd_en = 0; qp->resp_depth = 0; qp->sq_policy = send_policy; mthca_wq_init(&qp->sq); mthca_wq_init(&qp->rq); ret = mthca_map_memfree(dev, qp); if (ret) return ret; ret = mthca_alloc_wqe_buf(dev, pd, qp); if (ret) { mthca_unmap_memfree(dev, qp); return ret; } mthca_adjust_qp_caps(dev, pd, qp); /* * If this is a userspace QP, we're done now. The doorbells * will be allocated and buffers will be initialized in * userspace. */ if (pd->ibpd.uobject) return 0; ret = mthca_alloc_memfree(dev, qp); if (ret) { mthca_free_wqe_buf(dev, qp); mthca_unmap_memfree(dev, qp); return ret; } if (mthca_is_memfree(dev)) { struct mthca_next_seg *next; struct mthca_data_seg *scatter; int size = (sizeof (struct mthca_next_seg) + qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16; for (i = 0; i < qp->rq.max; ++i) { next = get_recv_wqe(qp, i); next->nda_op = cpu_to_be32(((i + 1) & (qp->rq.max - 1)) << qp->rq.wqe_shift); next->ee_nds = cpu_to_be32(size); for (scatter = (void *) (next + 1); (void *) scatter < (void *) next + (1 << qp->rq.wqe_shift); ++scatter) scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); } for (i = 0; i < qp->sq.max; ++i) { next = get_send_wqe(qp, i); next->nda_op = cpu_to_be32((((i + 1) & (qp->sq.max - 1)) << qp->sq.wqe_shift) + qp->send_wqe_offset); } } qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1); return 0;}static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, struct mthca_pd *pd, struct mthca_qp *qp){ int max_data_size = mthca_max_data_size(dev, qp, dev->limits.max_desc_sz); /* Sanity check QP size before proceeding */ if (cap->max_send_wr > dev->limits.max_wqes || cap->max_recv_wr > dev->limits.max_wqes || cap->max_send_sge > dev->limits.max_sg || cap->max_recv_sge > dev->limits.max_sg || cap->max_inline_data > mthca_max_inline_data(pd, max_data_size)) return -EINVAL; /* * For MLX transport we need 2 extra S/G entries: * one for the header and one for the checksum at the end */ if (qp->transport == MLX && cap->max_recv_sge + 2 > dev->limits.max_sg) return -EINVAL; if (mthca_is_memfree(dev)) { qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0; qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 0; } else { qp->rq.max = cap->max_recv_wr; qp->sq.max = cap->max_send_wr; } qp->rq.max_gs = cap->max_recv_sge; qp->sq.max_gs = max_t(int, cap->max_send_sge, ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE, MTHCA_INLINE_CHUNK_SIZE) / sizeof (struct mthca_data_seg)); return 0;}int mthca_alloc_qp(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_qp_type type, enum ib_sig_type send_policy, struct ib_qp_cap *cap, struct mthca_qp *qp){ int err; err = mthca_set_qp_size(dev, cap, pd, qp); if (err) return err; switch (type) { case IB_QPT_RC: qp->transport = RC; break; case IB_QPT_UC: qp->transport = UC; break; case IB_QPT_UD: qp->transport = UD; break; default: return -EINVAL; } qp->qpn = mthca_alloc(&dev->qp_table.alloc); if (qp->qpn == -1) return -ENOMEM; err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, send_policy, qp); if (err) { mthca_free(&dev->qp_table.alloc, qp->qpn); return err; } spin_lock_irq(&dev->qp_table.lock); mthca_array_set(&dev->qp_table.qp, qp->qpn & (dev->limits.num_qps - 1), qp); spin_unlock_irq(&dev->qp_table.lock); return 0;}int mthca_alloc_sqp(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_sig_type send_policy, struct ib_qp_cap *cap, int qpn, int port, struct mthca_sqp *sqp){ u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; int err; err = mthca_set_qp_size(dev, cap, pd, &sqp->qp); if (err) return err; sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE; sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size, &sqp->header_dma, GFP_KERNEL); if (!sqp->header_buf) return -ENOMEM; spin_lock_irq(&dev->qp_table.lock); if (mthca_array_get(&dev->qp_table.qp, mqpn)) err = -EBUSY; else mthca_array_set(&dev->qp_table.qp, mqpn, sqp); spin_unlock_irq(&dev->qp_table.lock); if (err) goto err_out; sqp->port = port; sqp->qp.qpn = mqpn; sqp->qp.transport = MLX; err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, send_policy, &sqp->qp); if (err) goto err_out_free; atomic_inc(&pd->sqp_count); return 0; err_out_free: /* * Lock CQs here, so that CQ polling code can do QP lookup * without taking a lock. */ spin_lock_irq(&send_cq->lock); if (send_cq != recv_cq) spin_lock(&recv_cq->lock); spin_lock(&dev->qp_table.lock); mthca_array_clear(&dev->qp_table.qp, mqpn); spin_unlock(&dev->qp_table.lock); if (send_cq != recv_cq) spin_unlock(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); err_out: dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size, sqp->header_buf, sqp->header_dma); return err;}void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp){ u8 status; struct mthca_cq *send_cq; struct mthca_cq *recv_cq; send_cq = to_mcq(qp->ibqp.send_cq); recv_cq = to_mcq(qp->ibqp.recv_cq); /* * Lock CQs here, so that CQ polling code can do QP lookup * without taking a lock. */ spin_lock_irq(&send_cq->lock); if (send_cq != recv_cq) spin_lock(&recv_cq->lock); spin_lock(&dev->qp_table.lock); mthca_array_clear(&dev->qp_table.qp, qp->qpn & (dev->limits.num_qps - 1)); spin_unlock(&dev->qp_table.lock); if (send_cq != recv_cq) spin_unlock(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); atomic_dec(&qp->refcount); wait_event(qp->wait, !atomic_read(&qp->refcount)); if (qp->state != IB_QPS_RESET) mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status); /* * If this is a userspace QP, the buffers, MR, CQs and so on * will be cleaned up in userspace, so all we have to do is * unref the mem-free tables and free the QPN in our table. */ if (!qp->ibqp.uobject) { mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); if (qp->ibqp.send_cq != qp->ibqp.recv_cq) mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); mthca_free_memfree(dev, qp); mthca_free_wqe_buf(dev, qp); } mthca_unmap_memfree(dev, qp); if (is_sqp(dev, qp)) { atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count)); dma_free_coherent(&dev->pdev->dev, to_msqp(qp)->header_buf_size, to_msqp(qp)->header_buf, to_msqp(qp)->header_dma); } else mthca_free(&dev->qp_table.alloc, qp->qpn);}/* Create UD header for an MLX send and build a data segment for it */static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, int ind, struct ib_send_wr *wr, struct mthca_mlx_seg *mlx, struct mthca_data_seg *data){ int header_size; int err; u16 pkey; ib_ud_header_init(256, /* assume a MAD */ sqp->ud_header.grh_present, &sqp->ud_header); err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header); if (err) return err; mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1); mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) | (sqp->ud_header.lrh.service_level << 8)); mlx->rlid = sqp->ud_header.lrh.destination_lid; mlx->vcrc = 0; switch (wr->opcode) { case IB_WR_SEND: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; break; case IB_WR_SEND_WITH_IMM: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; sqp->ud_header.immediate_present = 1; sqp->ud_header.immediate_data = wr->imm_data; break; default: return -EINVAL; } sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(&dev->ib_dev, sqp->port, sqp->pkey_index, &pkey); else ib_get_cached_pkey(&dev->ib_dev, sqp->port, wr->wr.ud.pkey_index, &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? sqp->qkey : wr->wr.ud.remote_qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf + ind * MTHCA_UD_HEADER_SIZE); data->byte_count = cpu_to_be32(header_size); data->lkey = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey); data->addr = cpu_to_be64(sqp->header_dma + ind * MTHCA_UD_HEADER_SIZE); return 0;}static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, struct ib_cq *ib_cq){ unsigned cur; struct mthca_cq *cq; cur = wq->head - wq->tail; if (likely(cur + nreq < wq->max)) return 0; cq = to_mcq(ib_cq); spin_lock(&cq->lock); cur = wq->head - wq->tail; spin_unlock(&cq->lock); return cur + nreq >= wq->max;}int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr){ struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); void *wqe; void *prev_wqe; unsigned long flags; int err = 0; int nreq; int i; int size; int size0 = 0; u32 f0 = 0; int ind; u8 op0 = 0; spin_lock_irqsave(&qp->sq.lock, flags); /* XXX check that state is OK to post send */ ind = qp->sq.next_ind; for (nreq = 0; wr; ++nreq, wr = wr->next) { if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { mthca_err(dev, "SQ %06x full (%u head, %u tail," " %d max, %d nreq)\n", qp->qpn, qp->sq.head, qp->sq.tail, qp->sq.max, nreq); err = -ENOMEM; *bad_wr = wr; goto out; } wqe = get_send_wqe(qp, ind); prev_wqe = qp->sq.last; qp->sq.last = wqe; ((struct mthca_next_seg *) wqe)->nda_op = 0; ((struct mthca_next_seg *) wqe)->ee_nds = 0; ((struct mthca_next_seg *) wqe)->flags = ((wr->send_flags & IB_SEND_SIGNALED) ? cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) | ((wr->send_flags & IB_SEND_SOLICITED) ? cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) | cpu_to_be32(1); if (wr->opcode == IB_WR_SEND_WITH_IMM || wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) ((struct mthca_next_seg *) wqe)->imm = wr->imm_data; wqe += sizeof (struct mthca_next_seg); size = sizeof (struct mthca_next_seg) / 16; switch (qp->transport) { case RC: switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: ((struct mthca_raddr_seg *) wqe)->raddr = cpu_to_be64(wr->wr.atomic.remote_addr); ((struct mthca_raddr_seg *) wqe)->rkey = cpu_to_be32(wr->wr.atomic.rkey); ((struct mthca_raddr_seg *) wqe)->reserved = 0; wqe += sizeof (struct mthca_raddr_seg); if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { ((struct mthca_atomic_seg *) wqe)->swap_add = cpu_to_be64(wr->wr.atomic.swap); ((struct mthca_atomic_seg *) wqe)->compare = cpu_to_be64(wr->wr.atomic.compare_add); } else { ((struct mthca_atomic_seg *) wqe)->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); ((struct mthca_atomic_seg *) wqe)->compare = 0; } wqe += sizeof (struct mthca_atomic_seg); size += (sizeof (struct mthca_raddr_seg) + sizeof (struct mthca_atomic_seg)) / 16; break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: case IB_WR_RDMA_READ: ((struct mthca_raddr_seg *) wqe)->raddr = cpu_to_be64(wr->wr.rdma.remote_addr); ((struct mthca_raddr_seg *) wqe)->rkey = cpu_to_be32(wr->wr.rdma.rkey); ((struct mthca_raddr_seg *) wqe)->reserved = 0; wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; default: /* No extra segments required for sends */ break; } break; case UC: switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: ((struct mthca_raddr_seg *) wqe)->raddr = cpu_to_be64(wr->wr.rdma.remote_addr); ((struct mthca_raddr_seg *) wqe)->rkey = cpu_to_be32(wr->wr.rdma.rkey); ((struct mthca_raddr_seg *) wqe)->reserved = 0; wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; default: /* No extra segments required for sends */ break; } break; case UD: ((struct mthca_tavor_ud_seg *) wqe)->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key); ((struct mthca_tavor_ud_seg *) wqe)->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma); ((struct mthca_tavor_ud_seg *) wqe)->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); ((struct mthca_tavor_ud_seg *) wqe)->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); wqe += sizeof (struct mthca_tavor_ud_seg); size += sizeof (struct mthca_tavor_ud_seg) / 16; break; case MLX: err = build_mlx_header(dev, to_msqp(qp), ind, wr, wqe - sizeof (struct mthca_next_seg), wqe); if (err) { *bad_wr = wr; goto out; } wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; break; } if (wr->num_sge > qp->sq.max_gs) { mthca_err(dev, "too many gathers\n"); err = -EINVAL; *bad_wr = wr; goto out; } for (i = 0; i < wr->num_sge; ++i) { ((struct mthca_data_seg *) wqe)->byte_count = cpu_to_be32(wr->sg_list[i].length); ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(wr->sg_list[i].lkey); ((struct mthca_data_seg *) wqe)->addr = cpu_to_be64(wr->sg_list[i].addr); wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; } /* Add one more inline data segment for ICRC */ if (qp->transport == MLX) { ((struct mthca_data_seg *) wqe)->byte_count = cpu_to_be32((1 << 31) | 4); ((u32 *) wqe)[1] = 0; wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; } qp->wrid[ind + qp->rq.max] = wr->wr_id; if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) { mthca_err(dev, "opcode invalid\n"); err = -EINVAL; *bad_wr = wr; goto out; } ((struct mthca_next_seg *) prev_wqe)->nda_op = cpu_to_be32(((ind << qp->sq.wqe_shift) + qp->send_wqe_offset) | mthca_opcode[wr->opcode]); wmb(); ((struct mthca_next_seg *) prev_wqe)->ee_nds = cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size); if (!size0) { size0 = size; op0 = mthca_opcode[wr->opcode]; } ++ind; if (unlikely(ind >= qp->sq.max)) ind -= qp->sq.max; }out:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -