📄 ibwrapper.c
字号:
/* * Unix SMB/CIFS implementation. * Wrap Infiniband calls. * * Copyright (C) Sven Oehme <oehmes@de.ibm.com> 2006 * * Major code contributions by Peter Somogyi <psomogyi@gamax.hu> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */#include <stdlib.h>#include <string.h>#include <stdio.h>#include <errno.h>#include <sys/types.h>#include <netinet/in.h>#include <sys/socket.h>#include <netdb.h>#include <arpa/inet.h>#include <malloc.h>#include <assert.h>#include <unistd.h>#include "includes.h"#include "lib/events/events.h"#include "ibwrapper.h"#include <infiniband/kern-abi.h>#include <rdma/rdma_cma_abi.h>#include <rdma/rdma_cma.h>#include "ibwrapper_internal.h"#include "lib/util/dlinklist.h"#define IBW_LASTERR_BUFSIZE 512static char ibw_lasterr[IBW_LASTERR_BUFSIZE];#define IBW_MAX_SEND_WR 256#define IBW_MAX_RECV_WR 1024#define IBW_RECV_BUFSIZE 256#define IBW_RECV_THRESHOLD (1 * 1024 * 1024)static void ibw_event_handler_verbs(struct event_context *ev, struct fd_event *fde, uint16_t flags, void *private_data);static int ibw_fill_cq(struct ibw_conn *conn);static int ibw_wc_recv(struct ibw_conn *conn, struct ibv_wc *wc);static int ibw_wc_send(struct ibw_conn *conn, struct ibv_wc *wc);static int ibw_send_packet(struct ibw_conn *conn, void *buf, struct ibw_wr *p, uint32_t len);static void *ibw_alloc_mr(struct ibw_ctx_priv *pctx, struct ibw_conn_priv *pconn, uint32_t n, struct ibv_mr **ppmr){ void *buf; DEBUG(10, ("ibw_alloc_mr(cmid=%p, n=%u)\n", pconn->cm_id, n)); buf = memalign(pctx->pagesize, n); if (!buf) { sprintf(ibw_lasterr, "couldn't allocate memory\n"); return NULL; } *ppmr = ibv_reg_mr(pconn->pd, buf, n, IBV_ACCESS_LOCAL_WRITE); if (!*ppmr) { sprintf(ibw_lasterr, "couldn't allocate mr\n"); free(buf); return NULL; } return buf;}static void ibw_free_mr(char **ppbuf, struct ibv_mr **ppmr){ DEBUG(10, ("ibw_free_mr(%p %p)\n", *ppbuf, *ppmr)); if (*ppmr!=NULL) { ibv_dereg_mr(*ppmr); *ppmr = NULL; } if (*ppbuf) { free(*ppbuf); *ppbuf = NULL; }}static int ibw_init_memory(struct ibw_conn *conn){ struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv); struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); struct ibw_opts *opts = &pctx->opts; int i; struct ibw_wr *p; DEBUG(10, ("ibw_init_memory(cmid: %p)\n", pconn->cm_id)); pconn->buf_send = ibw_alloc_mr(pctx, pconn, opts->max_send_wr * opts->recv_bufsize, &pconn->mr_send); if (!pconn->buf_send) { sprintf(ibw_lasterr, "couldn't allocate work send buf\n"); return -1; } pconn->buf_recv = ibw_alloc_mr(pctx, pconn, opts->max_recv_wr * opts->recv_bufsize, &pconn->mr_recv); if (!pconn->buf_recv) { sprintf(ibw_lasterr, "couldn't allocate work recv buf\n"); return -1; } pconn->wr_index = talloc_size(pconn, opts->max_send_wr * sizeof(struct ibw_wr *)); assert(pconn->wr_index!=NULL); for(i=0; i<opts->max_send_wr; i++) { p = pconn->wr_index[i] = talloc_zero(pconn, struct ibw_wr); p->buf = pconn->buf_send + (i * opts->recv_bufsize); p->wr_id = i; DLIST_ADD(pconn->wr_list_avail, p); } return 0;}static int ibw_ctx_priv_destruct(struct ibw_ctx_priv *pctx){ DEBUG(10, ("ibw_ctx_priv_destruct(%p)\n", pctx)); /* destroy cm */ if (pctx->cm_channel) { rdma_destroy_event_channel(pctx->cm_channel); pctx->cm_channel = NULL; } if (pctx->cm_channel_event) { /* TODO: do we have to do this here? */ talloc_free(pctx->cm_channel_event); pctx->cm_channel_event = NULL; } if (pctx->cm_id) { rdma_destroy_id(pctx->cm_id); pctx->cm_id = NULL; } return 0;}static int ibw_ctx_destruct(struct ibw_ctx *ctx){ DEBUG(10, ("ibw_ctx_destruct(%p)\n", ctx)); return 0;}static int ibw_conn_priv_destruct(struct ibw_conn_priv *pconn){ DEBUG(10, ("ibw_conn_priv_destruct(%p, cmid: %p)\n", pconn, pconn->cm_id)); /* pconn->wr_index is freed by talloc */ /* pconn->wr_index[i] are freed by talloc */ /* destroy verbs */ if (pconn->cm_id!=NULL && pconn->cm_id->qp!=NULL) { rdma_destroy_qp(pconn->cm_id); pconn->cm_id->qp = NULL; } if (pconn->cq!=NULL) { ibv_destroy_cq(pconn->cq); pconn->cq = NULL; } if (pconn->verbs_channel!=NULL) { ibv_destroy_comp_channel(pconn->verbs_channel); pconn->verbs_channel = NULL; } /* must be freed here because its order is important */ if (pconn->verbs_channel_event) { talloc_free(pconn->verbs_channel_event); pconn->verbs_channel_event = NULL; } /* free memory regions */ ibw_free_mr(&pconn->buf_send, &pconn->mr_send); ibw_free_mr(&pconn->buf_recv, &pconn->mr_recv); if (pconn->pd) { ibv_dealloc_pd(pconn->pd); pconn->pd = NULL; DEBUG(10, ("pconn=%p pd deallocated\n", pconn)); } if (pconn->cm_id) { rdma_destroy_id(pconn->cm_id); pconn->cm_id = NULL; DEBUG(10, ("pconn=%p cm_id destroyed\n", pconn)); } return 0;}static int ibw_wr_destruct(struct ibw_wr *wr){ if (wr->buf_large!=NULL) ibw_free_mr(&wr->buf_large, &wr->mr_large); return 0;}static int ibw_conn_destruct(struct ibw_conn *conn){ DEBUG(10, ("ibw_conn_destruct(%p)\n", conn)); /* important here: ctx is a talloc _parent_ */ DLIST_REMOVE(conn->ctx->conn_list, conn); return 0;}struct ibw_conn *ibw_conn_new(struct ibw_ctx *ctx, TALLOC_CTX *mem_ctx){ struct ibw_conn *conn; struct ibw_conn_priv *pconn; assert(ctx!=NULL); conn = talloc_zero(mem_ctx, struct ibw_conn); assert(conn!=NULL); talloc_set_destructor(conn, ibw_conn_destruct); pconn = talloc_zero(conn, struct ibw_conn_priv); assert(pconn!=NULL); talloc_set_destructor(pconn, ibw_conn_priv_destruct); conn->ctx = ctx; conn->internal = (void *)pconn; DLIST_ADD(ctx->conn_list, conn); return conn;}static int ibw_setup_cq_qp(struct ibw_conn *conn){ struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv); struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); struct ibv_qp_init_attr init_attr; struct ibv_qp_attr attr; int rc; DEBUG(10, ("ibw_setup_cq_qp(cmid: %p)\n", pconn->cm_id)); /* init verbs */ pconn->verbs_channel = ibv_create_comp_channel(pconn->cm_id->verbs); if (!pconn->verbs_channel) { sprintf(ibw_lasterr, "ibv_create_comp_channel failed %d\n", errno); return -1; } DEBUG(10, ("created channel %p\n", pconn->verbs_channel)); pconn->verbs_channel_event = event_add_fd(pctx->ectx, NULL, /* not pconn or conn */ pconn->verbs_channel->fd, EVENT_FD_READ, ibw_event_handler_verbs, conn); pconn->pd = ibv_alloc_pd(pconn->cm_id->verbs); if (!pconn->pd) { sprintf(ibw_lasterr, "ibv_alloc_pd failed %d\n", errno); return -1; } DEBUG(10, ("created pd %p\n", pconn->pd)); /* init mr */ if (ibw_init_memory(conn)) return -1; /* init cq */ pconn->cq = ibv_create_cq(pconn->cm_id->verbs, pctx->opts.max_recv_wr + pctx->opts.max_send_wr, conn, pconn->verbs_channel, 0); if (pconn->cq==NULL) { sprintf(ibw_lasterr, "ibv_create_cq failed\n"); return -1; } rc = ibv_req_notify_cq(pconn->cq, 0); if (rc) { sprintf(ibw_lasterr, "ibv_req_notify_cq failed with %d\n", rc); return rc; } /* init qp */ memset(&init_attr, 0, sizeof(init_attr)); init_attr.cap.max_send_wr = pctx->opts.max_send_wr; init_attr.cap.max_recv_wr = pctx->opts.max_recv_wr; init_attr.cap.max_recv_sge = 1; init_attr.cap.max_send_sge = 1; init_attr.qp_type = IBV_QPT_RC; init_attr.send_cq = pconn->cq; init_attr.recv_cq = pconn->cq; rc = rdma_create_qp(pconn->cm_id, pconn->pd, &init_attr); if (rc) { sprintf(ibw_lasterr, "rdma_create_qp failed with %d\n", rc); return rc; } /* elase result is in pconn->cm_id->qp */ rc = ibv_query_qp(pconn->cm_id->qp, &attr, IBV_QP_PATH_MTU, &init_attr); if (rc) { sprintf(ibw_lasterr, "ibv_query_qp failed with %d\n", rc); return rc; } return ibw_fill_cq(conn);}static int ibw_refill_cq_recv(struct ibw_conn *conn){ struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv); struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); int rc; struct ibv_sge list = { .addr = (uintptr_t) NULL, /* filled below */ .length = pctx->opts.recv_bufsize, .lkey = pconn->mr_recv->lkey /* always the same */ }; struct ibv_recv_wr wr = { .wr_id = 0, /* filled below */ .sg_list = &list, .num_sge = 1, }; struct ibv_recv_wr *bad_wr; DEBUG(10, ("ibw_refill_cq_recv(cmid: %p)\n", pconn->cm_id)); list.addr = (uintptr_t) pconn->buf_recv + pctx->opts.recv_bufsize * pconn->recv_index; wr.wr_id = pconn->recv_index; pconn->recv_index = (pconn->recv_index + 1) % pctx->opts.max_recv_wr; rc = ibv_post_recv(pconn->cm_id->qp, &wr, &bad_wr); if (rc) { sprintf(ibw_lasterr, "refill/ibv_post_recv failed with %d\n", rc); DEBUG(0, (ibw_lasterr)); return -2; } return 0;}static int ibw_fill_cq(struct ibw_conn *conn){ struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv); struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); int i, rc; struct ibv_sge list = { .addr = (uintptr_t) NULL, /* filled below */ .length = pctx->opts.recv_bufsize, .lkey = pconn->mr_recv->lkey /* always the same */ }; struct ibv_recv_wr wr = { .wr_id = 0, /* filled below */ .sg_list = &list, .num_sge = 1, }; struct ibv_recv_wr *bad_wr; DEBUG(10, ("ibw_fill_cq(cmid: %p)\n", pconn->cm_id)); for(i = pctx->opts.max_recv_wr; i!=0; i--) { list.addr = (uintptr_t) pconn->buf_recv + pctx->opts.recv_bufsize * pconn->recv_index; wr.wr_id = pconn->recv_index; pconn->recv_index = (pconn->recv_index + 1) % pctx->opts.max_recv_wr; rc = ibv_post_recv(pconn->cm_id->qp, &wr, &bad_wr); if (rc) { sprintf(ibw_lasterr, "fill/ibv_post_recv failed with %d\n", rc); DEBUG(0, (ibw_lasterr)); return -2; } } return 0;}static int ibw_manage_connect(struct ibw_conn *conn){ struct rdma_conn_param conn_param; struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); int rc; DEBUG(10, ("ibw_manage_connect(cmid: %p)\n", pconn->cm_id)); if (ibw_setup_cq_qp(conn)) return -1; /* cm connect */ memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; conn_param.retry_count = 10; rc = rdma_connect(pconn->cm_id, &conn_param); if (rc) sprintf(ibw_lasterr, "rdma_connect error %d\n", rc); return rc;}static void ibw_event_handler_cm(struct event_context *ev, struct fd_event *fde, uint16_t flags, void *private_data){ int rc; struct ibw_ctx *ctx = talloc_get_type(private_data, struct ibw_ctx); struct ibw_ctx_priv *pctx = talloc_get_type(ctx->internal, struct ibw_ctx_priv); struct ibw_conn *conn = NULL; struct ibw_conn_priv *pconn = NULL; struct rdma_cm_id *cma_id = NULL; struct rdma_cm_event *event = NULL; assert(ctx!=NULL); rc = rdma_get_cm_event(pctx->cm_channel, &event); if (rc) { ctx->state = IBWS_ERROR; event = NULL; sprintf(ibw_lasterr, "rdma_get_cm_event error %d\n", rc); goto error; } cma_id = event->id; DEBUG(10, ("cma_event type %d cma_id %p (%s)\n", event->event, cma_id, (cma_id == pctx->cm_id) ? "parent" : "child")); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: DEBUG(11, ("RDMA_CM_EVENT_ADDR_RESOLVED\n")); /* continuing from ibw_connect ... */ rc = rdma_resolve_route(cma_id, 2000); if (rc) { sprintf(ibw_lasterr, "rdma_resolve_route error %d\n", rc); goto error; } /* continued at RDMA_CM_EVENT_ROUTE_RESOLVED */ break; case RDMA_CM_EVENT_ROUTE_RESOLVED: DEBUG(11, ("RDMA_CM_EVENT_ROUTE_RESOLVED\n")); /* after RDMA_CM_EVENT_ADDR_RESOLVED: */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -