📄 qp.c

📁 linux内核源码
💻 C
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses.  You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * *     Redistribution and use in source and binary forms, with or *     without modification, are permitted provided that the following *     conditions are met: * *      - Redistributions of source code must retain the above *        copyright notice, this list of conditions and the following *        disclaimer. * *      - Redistributions in binary form must reproduce the above *        copyright notice, this list of conditions and the following *        disclaimer in the documentation and/or other materials *        provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */#include <rdma/ib_cache.h>#include <rdma/ib_pack.h>#include <linux/mlx4/qp.h>#include "mlx4_ib.h"#include "user.h"enum {	MLX4_IB_ACK_REQ_FREQ	= 8,};enum {	MLX4_IB_DEFAULT_SCHED_QUEUE	= 0x83,	MLX4_IB_DEFAULT_QP0_SCHED_QUEUE	= 0x3f};enum {	/*	 * Largest possible UD header: send with GRH and immediate data.	 */	MLX4_IB_UD_HEADER_SIZE		= 72};struct mlx4_ib_sqp {	struct mlx4_ib_qp	qp;	int			pkey_index;	u32			qkey;	u32			send_psn;	struct ib_ud_header	ud_header;	u8			header_buf[MLX4_IB_UD_HEADER_SIZE];};enum {	MLX4_IB_MIN_SQ_STRIDE = 6};static const __be32 mlx4_ib_opcode[] = {	[IB_WR_SEND]			= __constant_cpu_to_be32(MLX4_OPCODE_SEND),	[IB_WR_SEND_WITH_IMM]		= __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),	[IB_WR_RDMA_WRITE]		= __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),	[IB_WR_RDMA_WRITE_WITH_IMM]	= __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),	[IB_WR_RDMA_READ]		= __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),	[IB_WR_ATOMIC_CMP_AND_SWP]	= __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),	[IB_WR_ATOMIC_FETCH_AND_ADD]	= __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),};static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp){	return container_of(mqp, struct mlx4_ib_sqp, qp);}static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp){	return qp->mqp.qpn >= dev->dev->caps.sqp_start &&		qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;}static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp){	return qp->mqp.qpn >= dev->dev->caps.sqp_start &&		qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;}static void *get_wqe(struct mlx4_ib_qp *qp, int offset){	if (qp->buf.nbufs == 1)		return qp->buf.u.direct.buf + offset;	else		return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf +			(offset & (PAGE_SIZE - 1));}static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n){	return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));}static void *get_send_wqe(struct mlx4_ib_qp *qp, int n){	return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));}/* * Stamp a SQ WQE so that it is invalid if prefetched by marking the * first four bytes of every 64 byte chunk with 0xffffffff, except for * the very first chunk of the WQE. */static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n){	u32 *wqe = get_send_wqe(qp, n);	int i;	for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16)		wqe[i] = 0xffffffff;}static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type){	struct ib_event event;	struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;	if (type == MLX4_EVENT_TYPE_PATH_MIG)		to_mibqp(qp)->port = to_mibqp(qp)->alt_port;	if (ibqp->event_handler) {		event.device     = ibqp->device;		event.element.qp = ibqp;		switch (type) {		case MLX4_EVENT_TYPE_PATH_MIG:			event.event = IB_EVENT_PATH_MIG;			break;		case MLX4_EVENT_TYPE_COMM_EST:			event.event = IB_EVENT_COMM_EST;			break;		case MLX4_EVENT_TYPE_SQ_DRAINED:			event.event = IB_EVENT_SQ_DRAINED;			break;		case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:			event.event = IB_EVENT_QP_LAST_WQE_REACHED;			break;		case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:			event.event = IB_EVENT_QP_FATAL;			break;		case MLX4_EVENT_TYPE_PATH_MIG_FAILED:			event.event = IB_EVENT_PATH_MIG_ERR;			break;		case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:			event.event = IB_EVENT_QP_REQ_ERR;			break;		case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:			event.event = IB_EVENT_QP_ACCESS_ERR;			break;		default:			printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "			       "on QP %06x\n", type, qp->qpn);			return;		}		ibqp->event_handler(&event, ibqp->qp_context);	}}static int send_wqe_overhead(enum ib_qp_type type){	/*	 * UD WQEs must have a datagram segment.	 * RC and UC WQEs might have a remote address segment.	 * MLX WQEs need two extra inline data segments (for the UD	 * header and space for the ICRC).	 */	switch (type) {	case IB_QPT_UD:		return sizeof (struct mlx4_wqe_ctrl_seg) +			sizeof (struct mlx4_wqe_datagram_seg);	case IB_QPT_UC:		return sizeof (struct mlx4_wqe_ctrl_seg) +			sizeof (struct mlx4_wqe_raddr_seg);	case IB_QPT_RC:		return sizeof (struct mlx4_wqe_ctrl_seg) +			sizeof (struct mlx4_wqe_atomic_seg) +			sizeof (struct mlx4_wqe_raddr_seg);	case IB_QPT_SMI:	case IB_QPT_GSI:		return sizeof (struct mlx4_wqe_ctrl_seg) +			ALIGN(MLX4_IB_UD_HEADER_SIZE +			      DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,					   MLX4_INLINE_ALIGN) *			      sizeof (struct mlx4_wqe_inline_seg),			      sizeof (struct mlx4_wqe_data_seg)) +			ALIGN(4 +			      sizeof (struct mlx4_wqe_inline_seg),			      sizeof (struct mlx4_wqe_data_seg));	default:		return sizeof (struct mlx4_wqe_ctrl_seg);	}}static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,		       int is_user, int has_srq, struct mlx4_ib_qp *qp){	/* Sanity check RQ size before proceeding */	if (cap->max_recv_wr  > dev->dev->caps.max_wqes  ||	    cap->max_recv_sge > dev->dev->caps.max_rq_sg)		return -EINVAL;	if (has_srq) {		/* QPs attached to an SRQ should have no RQ */		if (cap->max_recv_wr)			return -EINVAL;		qp->rq.wqe_cnt = qp->rq.max_gs = 0;	} else {		/* HW requires >= 1 RQ entry with >= 1 gather entry */		if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))			return -EINVAL;		qp->rq.wqe_cnt	 = roundup_pow_of_two(max(1U, cap->max_recv_wr));		qp->rq.max_gs	 = roundup_pow_of_two(max(1U, cap->max_recv_sge));		qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));	}	cap->max_recv_wr  = qp->rq.max_post = qp->rq.wqe_cnt;	cap->max_recv_sge = qp->rq.max_gs;	return 0;}static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,			      enum ib_qp_type type, struct mlx4_ib_qp *qp){	/* Sanity check SQ size before proceeding */	if (cap->max_send_wr	 > dev->dev->caps.max_wqes  ||	    cap->max_send_sge	 > dev->dev->caps.max_sq_sg ||	    cap->max_inline_data + send_wqe_overhead(type) +	    sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)		return -EINVAL;	/*	 * For MLX transport we need 2 extra S/G entries:	 * one for the header and one for the checksum at the end	 */	if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&	    cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)		return -EINVAL;	qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *							sizeof (struct mlx4_wqe_data_seg),							cap->max_inline_data +							sizeof (struct mlx4_wqe_inline_seg)) +						    send_wqe_overhead(type)));	qp->sq.max_gs    = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) /		sizeof (struct mlx4_wqe_data_seg);	/*	 * We need to leave 2 KB + 1 WQE of headroom in the SQ to	 * allow HW to prefetch.	 */	qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;	qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes);	qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +		(qp->sq.wqe_cnt << qp->sq.wqe_shift);	if (qp->rq.wqe_shift > qp->sq.wqe_shift) {		qp->rq.offset = 0;		qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;	} else {		qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;		qp->sq.offset = 0;	}	cap->max_send_wr  = qp->sq.max_post = qp->sq.wqe_cnt - qp->sq_spare_wqes;	cap->max_send_sge = qp->sq.max_gs;	/* We don't support inline sends for kernel QPs (yet) */	cap->max_inline_data = 0;	return 0;}static int set_user_sq_size(struct mlx4_ib_dev *dev,			    struct mlx4_ib_qp *qp,			    struct mlx4_ib_create_qp *ucmd){	/* Sanity check SQ size before proceeding */	if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes	 ||	    ucmd->log_sq_stride >		ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||	    ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)		return -EINVAL;	qp->sq.wqe_cnt   = 1 << ucmd->log_sq_bb_count;	qp->sq.wqe_shift = ucmd->log_sq_stride;	qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +		(qp->sq.wqe_cnt << qp->sq.wqe_shift);	return 0;}static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,			    struct ib_qp_init_attr *init_attr,			    struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp){	int err;	mutex_init(&qp->mutex);	spin_lock_init(&qp->sq.lock);	spin_lock_init(&qp->rq.lock);	qp->state	 = IB_QPS_RESET;	qp->atomic_rd_en = 0;	qp->resp_depth   = 0;	qp->rq.head	    = 0;	qp->rq.tail	    = 0;	qp->sq.head	    = 0;	qp->sq.tail	    = 0;	err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp);	if (err)		goto err;	if (pd->uobject) {		struct mlx4_ib_create_qp ucmd;		if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {			err = -EFAULT;			goto err;		}		qp->sq_no_prefetch = ucmd.sq_no_prefetch;		err = set_user_sq_size(dev, qp, &ucmd);		if (err)			goto err;		qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,				       qp->buf_size, 0);		if (IS_ERR(qp->umem)) {			err = PTR_ERR(qp->umem);			goto err;		}		err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),				    ilog2(qp->umem->page_size), &qp->mtt);		if (err)			goto err_buf;		err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);		if (err)			goto err_mtt;		if (!init_attr->srq) {			err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),						  ucmd.db_addr, &qp->db);			if (err)				goto err_mtt;		}	} else {		qp->sq_no_prefetch = 0;		err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);		if (err)			goto err;		if (!init_attr->srq) {			err = mlx4_ib_db_alloc(dev, &qp->db, 0);			if (err)				goto err;			*qp->db.db = 0;		}		if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {			err = -ENOMEM;			goto err_db;		}		err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,				    &qp->mtt);		if (err)			goto err_buf;		err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);		if (err)			goto err_mtt;		qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);		qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);		if (!qp->sq.wrid || !qp->rq.wrid) {			err = -ENOMEM;			goto err_wrid;		}	}	err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp);	if (err)		goto err_wrid;	/*	 * Hardware wants QPN written in big-endian order (after	 * shifting) for send doorbell.  Precompute this value to save	 * a little bit when posting sends.	 */
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -