📄 qp.c
字号:
/* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */#include <rdma/ib_cache.h>#include <rdma/ib_pack.h>#include <linux/mlx4/qp.h>#include "mlx4_ib.h"#include "user.h"enum { MLX4_IB_ACK_REQ_FREQ = 8,};enum { MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f};enum { /* * Largest possible UD header: send with GRH and immediate data. */ MLX4_IB_UD_HEADER_SIZE = 72};struct mlx4_ib_sqp { struct mlx4_ib_qp qp; int pkey_index; u32 qkey; u32 send_psn; struct ib_ud_header ud_header; u8 header_buf[MLX4_IB_UD_HEADER_SIZE];};enum { MLX4_IB_MIN_SQ_STRIDE = 6};static const __be32 mlx4_ib_opcode[] = { [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND), [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM), [IB_WR_RDMA_WRITE] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), [IB_WR_RDMA_WRITE_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ), [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),};static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp){ return container_of(mqp, struct mlx4_ib_sqp, qp);}static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp){ return qp->mqp.qpn >= dev->dev->caps.sqp_start && qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;}static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp){ return qp->mqp.qpn >= dev->dev->caps.sqp_start && qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;}static void *get_wqe(struct mlx4_ib_qp *qp, int offset){ if (qp->buf.nbufs == 1) return qp->buf.u.direct.buf + offset; else return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf + (offset & (PAGE_SIZE - 1));}static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n){ return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));}static void *get_send_wqe(struct mlx4_ib_qp *qp, int n){ return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));}/* * Stamp a SQ WQE so that it is invalid if prefetched by marking the * first four bytes of every 64 byte chunk with 0xffffffff, except for * the very first chunk of the WQE. */static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n){ u32 *wqe = get_send_wqe(qp, n); int i; for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16) wqe[i] = 0xffffffff;}static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type){ struct ib_event event; struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; if (type == MLX4_EVENT_TYPE_PATH_MIG) to_mibqp(qp)->port = to_mibqp(qp)->alt_port; if (ibqp->event_handler) { event.device = ibqp->device; event.element.qp = ibqp; switch (type) { case MLX4_EVENT_TYPE_PATH_MIG: event.event = IB_EVENT_PATH_MIG; break; case MLX4_EVENT_TYPE_COMM_EST: event.event = IB_EVENT_COMM_EST; break; case MLX4_EVENT_TYPE_SQ_DRAINED: event.event = IB_EVENT_SQ_DRAINED; break; case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE: event.event = IB_EVENT_QP_LAST_WQE_REACHED; break; case MLX4_EVENT_TYPE_WQ_CATAS_ERROR: event.event = IB_EVENT_QP_FATAL; break; case MLX4_EVENT_TYPE_PATH_MIG_FAILED: event.event = IB_EVENT_PATH_MIG_ERR; break; case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR: event.event = IB_EVENT_QP_REQ_ERR; break; case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR: event.event = IB_EVENT_QP_ACCESS_ERR; break; default: printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " "on QP %06x\n", type, qp->qpn); return; } ibqp->event_handler(&event, ibqp->qp_context); }}static int send_wqe_overhead(enum ib_qp_type type){ /* * UD WQEs must have a datagram segment. * RC and UC WQEs might have a remote address segment. * MLX WQEs need two extra inline data segments (for the UD * header and space for the ICRC). */ switch (type) { case IB_QPT_UD: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_datagram_seg); case IB_QPT_UC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_raddr_seg); case IB_QPT_RC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_atomic_seg) + sizeof (struct mlx4_wqe_raddr_seg); case IB_QPT_SMI: case IB_QPT_GSI: return sizeof (struct mlx4_wqe_ctrl_seg) + ALIGN(MLX4_IB_UD_HEADER_SIZE + DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, MLX4_INLINE_ALIGN) * sizeof (struct mlx4_wqe_inline_seg), sizeof (struct mlx4_wqe_data_seg)) + ALIGN(4 + sizeof (struct mlx4_wqe_inline_seg), sizeof (struct mlx4_wqe_data_seg)); default: return sizeof (struct mlx4_wqe_ctrl_seg); }}static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, int is_user, int has_srq, struct mlx4_ib_qp *qp){ /* Sanity check RQ size before proceeding */ if (cap->max_recv_wr > dev->dev->caps.max_wqes || cap->max_recv_sge > dev->dev->caps.max_rq_sg) return -EINVAL; if (has_srq) { /* QPs attached to an SRQ should have no RQ */ if (cap->max_recv_wr) return -EINVAL; qp->rq.wqe_cnt = qp->rq.max_gs = 0; } else { /* HW requires >= 1 RQ entry with >= 1 gather entry */ if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) return -EINVAL; qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr)); qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); } cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt; cap->max_recv_sge = qp->rq.max_gs; return 0;}static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, enum ib_qp_type type, struct mlx4_ib_qp *qp){ /* Sanity check SQ size before proceeding */ if (cap->max_send_wr > dev->dev->caps.max_wqes || cap->max_send_sge > dev->dev->caps.max_sq_sg || cap->max_inline_data + send_wqe_overhead(type) + sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) return -EINVAL; /* * For MLX transport we need 2 extra S/G entries: * one for the header and one for the checksum at the end */ if ((type == IB_QPT_SMI || type == IB_QPT_GSI) && cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) return -EINVAL; qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg), cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) + send_wqe_overhead(type))); qp->sq.max_gs = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) / sizeof (struct mlx4_wqe_data_seg); /* * We need to leave 2 KB + 1 WQE of headroom in the SQ to * allow HW to prefetch. */ qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1; qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes); qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + (qp->sq.wqe_cnt << qp->sq.wqe_shift); if (qp->rq.wqe_shift > qp->sq.wqe_shift) { qp->rq.offset = 0; qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; } else { qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift; qp->sq.offset = 0; } cap->max_send_wr = qp->sq.max_post = qp->sq.wqe_cnt - qp->sq_spare_wqes; cap->max_send_sge = qp->sq.max_gs; /* We don't support inline sends for kernel QPs (yet) */ cap->max_inline_data = 0; return 0;}static int set_user_sq_size(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, struct mlx4_ib_create_qp *ucmd){ /* Sanity check SQ size before proceeding */ if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes || ucmd->log_sq_stride > ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) || ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE) return -EINVAL; qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; qp->sq.wqe_shift = ucmd->log_sq_stride; qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + (qp->sq.wqe_cnt << qp->sq.wqe_shift); return 0;}static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp){ int err; mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); qp->state = IB_QPS_RESET; qp->atomic_rd_en = 0; qp->resp_depth = 0; qp->rq.head = 0; qp->rq.tail = 0; qp->sq.head = 0; qp->sq.tail = 0; err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp); if (err) goto err; if (pd->uobject) { struct mlx4_ib_create_qp ucmd; if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { err = -EFAULT; goto err; } qp->sq_no_prefetch = ucmd.sq_no_prefetch; err = set_user_sq_size(dev, qp, &ucmd); if (err) goto err; qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, qp->buf_size, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; } err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem), ilog2(qp->umem->page_size), &qp->mtt); if (err) goto err_buf; err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem); if (err) goto err_mtt; if (!init_attr->srq) { err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), ucmd.db_addr, &qp->db); if (err) goto err_mtt; } } else { qp->sq_no_prefetch = 0; err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); if (err) goto err; if (!init_attr->srq) { err = mlx4_ib_db_alloc(dev, &qp->db, 0); if (err) goto err; *qp->db.db = 0; } if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) { err = -ENOMEM; goto err_db; } err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift, &qp->mtt); if (err) goto err_buf; err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); if (err) goto err_mtt; qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL); qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL); if (!qp->sq.wrid || !qp->rq.wrid) { err = -ENOMEM; goto err_wrid; } } err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp); if (err) goto err_wrid; /* * Hardware wants QPN written in big-endian order (after * shifting) for send doorbell. Precompute this value to save * a little bit when posting sends. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -