📄 qp.c
字号:
if (attr_mask & IB_QP_RETRY_CNT) { context->params1 |= cpu_to_be32(attr->retry_cnt << 16); optpar |= MLX4_QP_OPTPAR_RETRY_COUNT; } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { if (attr->max_rd_atomic) context->params1 |= cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); optpar |= MLX4_QP_OPTPAR_SRA_MAX; } if (attr_mask & IB_QP_SQ_PSN) context->next_send_psn = cpu_to_be32(attr->sq_psn); context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn); if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { if (attr->max_dest_rd_atomic) context->params2 |= cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); optpar |= MLX4_QP_OPTPAR_RRA_MAX; } if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask); optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE; } if (ibqp->srq) context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC); if (attr_mask & IB_QP_MIN_RNR_TIMER) { context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT; } if (attr_mask & IB_QP_RQ_PSN) context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn); if (attr_mask & IB_QP_QKEY) { context->qkey = cpu_to_be32(attr->qkey); optpar |= MLX4_QP_OPTPAR_Q_KEY; } if (ibqp->srq) context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); if (!ibqp->srq && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->db_rec_addr = cpu_to_be64(qp->db.dma); if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR && (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || ibqp->qp_type == IB_QPT_UD)) { context->pri_path.sched_queue = (qp->port - 1) << 6; if (is_qp0(dev, qp)) context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; else context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; } if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) sqd_event = 1; else sqd_event = 0; /* * Before passing a kernel QP to the HW, make sure that the * ownership bits of the send queue are set and the SQ * headroom is stamped so that the hardware doesn't start * processing stale work requests. */ if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { struct mlx4_wqe_ctrl_seg *ctrl; int i; for (i = 0; i < qp->sq.wqe_cnt; ++i) { ctrl = get_send_wqe(qp, i); ctrl->owner_opcode = cpu_to_be32(1 << 31); stamp_send_wqe(qp, i); } } err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state), to_mlx4_state(new_state), context, optpar, sqd_event, &qp->mqp); if (err) goto out; qp->state = new_state; if (attr_mask & IB_QP_ACCESS_FLAGS) qp->atomic_rd_en = attr->qp_access_flags; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) qp->resp_depth = attr->max_dest_rd_atomic; if (attr_mask & IB_QP_PORT) qp->port = attr->port_num; if (attr_mask & IB_QP_ALT_PATH) qp->alt_port = attr->alt_port_num; if (is_sqp(dev, qp)) store_sqp_attrs(to_msqp(qp), attr, attr_mask); /* * If we moved QP0 to RTR, bring the IB link up; if we moved * QP0 to RESET or ERROR, bring the link back down. */ if (is_qp0(dev, qp)) { if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR) if (mlx4_INIT_PORT(dev->dev, qp->port)) printk(KERN_WARNING "INIT_PORT failed for port %d\n", qp->port); if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) mlx4_CLOSE_PORT(dev->dev, qp->port); } /* * If we moved a kernel QP to RESET, clean up all old CQ * entries and reinitialize the QP. */ if (new_state == IB_QPS_RESET && !ibqp->uobject) { mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn, ibqp->srq ? to_msrq(ibqp->srq): NULL); if (ibqp->send_cq != ibqp->recv_cq) mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL); qp->rq.head = 0; qp->rq.tail = 0; qp->sq.head = 0; qp->sq.tail = 0; if (!ibqp->srq) *qp->db.db = 0; }out: kfree(context); return err;}static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 };static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),};int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata){ struct mlx4_ib_dev *dev = to_mdev(ibqp->device); struct mlx4_ib_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; int err = -EINVAL; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) goto out; if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { goto out; } if (attr_mask & IB_QP_PKEY_INDEX) { int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) goto out; } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { goto out; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) { goto out; } if (cur_state == new_state && cur_state == IB_QPS_RESET) { err = 0; goto out; } if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) { err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr, mlx4_ib_qp_attr_mask_table[ibqp->qp_type], IB_QPS_RESET, IB_QPS_INIT); if (err) goto out; cur_state = IB_QPS_INIT; } err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);out: mutex_unlock(&qp->mutex); return err;}static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, void *wqe){ struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev; struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); u16 pkey; int send_size; int header_size; int spc; int i; send_size = 0; for (i = 0; i < wr->num_sge; ++i) send_size += wr->sg_list[i].length; ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header); sqp->ud_header.lrh.service_level = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28; sqp->ud_header.lrh.destination_lid = ah->av.dlid; sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f); if (mlx4_ib_ah_grh_present(ah)) { sqp->ud_header.grh.traffic_class = (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff; sqp->ud_header.grh.flow_label = ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff); sqp->ud_header.grh.hop_limit = ah->av.hop_limit; ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24, ah->av.gid_index, &sqp->ud_header.grh.source_gid); memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.dgid, 16); } mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | (sqp->ud_header.lrh.service_level << 8)); mlx->rlid = sqp->ud_header.lrh.destination_lid; switch (wr->opcode) { case IB_WR_SEND: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; break; case IB_WR_SEND_WITH_IMM: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; sqp->ud_header.immediate_present = 1; sqp->ud_header.immediate_data = wr->imm_data; break; default: return -EINVAL; } sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); else ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? sqp->qkey : wr->wr.ud.remote_qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); if (0) { printk(KERN_ERR "built UD header of size %d:\n", header_size); for (i = 0; i < header_size / 4; ++i) { if (i % 8 == 0) printk(" [%02x] ", i * 4); printk(" %08x", be32_to_cpu(((__be32 *) sqp->header_buf)[i])); if ((i + 1) % 8 == 0) printk("\n"); } printk("\n"); } /* * Inline data segments may not cross a 64 byte boundary. If * our UD header is bigger than the space available up to the * next 64 byte boundary in the WQE, use two inline data * segments to hold the UD header. */ spc = MLX4_INLINE_ALIGN - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); if (header_size <= spc) { inl->byte_count = cpu_to_be32(1 << 31 | header_size); memcpy(inl + 1, sqp->header_buf, header_size); i = 1; } else { inl->byte_count = cpu_to_be32(1 << 31 | spc); memcpy(inl + 1, sqp->header_buf, spc); inl = (void *) (inl + 1) + spc; memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); /* * Need a barrier here to make sure all the data is * visible before the byte_count field is set. * Otherwise the HCA prefetcher could grab the 64-byte * chunk with this inline segment and get a valid (!= * 0xffffffff) byte count but stale data, and end up * generating a packet with bad headers. * * The first inline segment's byte_count field doesn't * need a barrier, because it comes after a * control/MLX segment and therefore is at an offset * of 16 mod 64. */ wmb(); inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc)); i = 2; } return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);}static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq){ unsigned cur; struct mlx4_ib_cq *cq; cur = wq->head - wq->tail; if (likely(cur + nreq < wq->max_post)) return 0; cq = to_mcq(ib_cq); spin_lock(&cq->lock); cur = wq->head - wq->tail; spin_unlock(&cq->lock); return cur + nreq >= wq->max_post;}static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, u64 remote_addr, u32 rkey){ rseg->raddr = cpu_to_be64(remote_addr); rseg->rkey = cpu_to_be32(rkey); rseg->reserved = 0;}static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr){ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); } else { aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); aseg->compare = 0; }}static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, struct ib_send_wr *wr){ memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);}static void set_mlx_icrc_seg(void *dseg){ u32 *t = dseg; struct mlx4_wqe_inline_seg *iseg = dseg; t[1] = 0; /* * Need a barrier here before writing the byte_count field to * make sure that all the data is visible before the * byte_count field is set. Otherwise, if the segment begins * a new cacheline, the HCA prefetcher could grab the 64-byte * chunk and get a valid (!= * 0xffffffff) byte count but * stale data, and end up sending the wrong data. */ wmb(); iseg->byte_count = cpu_to_be32((1 << 31) | 4);}static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg){ dseg->lkey = cpu_to_be32(sg->lkey); dseg->addr = cpu_to_be64(sg->addr); /*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -