📄 iiblnd.c
字号:
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (C) 2004 Cluster File Systems, Inc. * Author: Eric Barton <eric@bartonsoftware.com> * * This file is part of Lustre, http://www.lustre.org. * * Lustre is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * Lustre is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */#include "iiblnd.h"lnd_t the_kiblnd = { .lnd_type = IIBLND, .lnd_startup = kibnal_startup, .lnd_shutdown = kibnal_shutdown, .lnd_ctl = kibnal_ctl, .lnd_send = kibnal_send, .lnd_recv = kibnal_recv, .lnd_eager_recv = kibnal_eager_recv,};kib_data_t kibnal_data;__u32 kibnal_cksum (void *ptr, int nob){ char *c = ptr; __u32 sum = 0; while (nob-- > 0) sum = ((sum << 1) | (sum >> 31)) + *c++; /* ensure I don't return 0 (== no checksum) */ return (sum == 0) ? 1 : sum;}voidkibnal_init_msg(kib_msg_t *msg, int type, int body_nob){ msg->ibm_type = type; msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob;}voidkibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits, lnet_nid_t dstnid, __u64 dststamp, __u64 seq){ /* CAVEAT EMPTOR! all message fields not set here should have been * initialised previously. */ msg->ibm_magic = IBNAL_MSG_MAGIC; msg->ibm_version = version; /* ibm_type */ msg->ibm_credits = credits; /* ibm_nob */ msg->ibm_cksum = 0; msg->ibm_srcnid = lnet_ptlcompat_srcnid(kibnal_data.kib_ni->ni_nid, dstnid); msg->ibm_srcstamp = kibnal_data.kib_incarnation; msg->ibm_dstnid = dstnid; msg->ibm_dststamp = dststamp; msg->ibm_seq = seq; if (*kibnal_tunables.kib_cksum) { /* NB ibm_cksum zero while computing cksum */ msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob); }}voidkibnal_pack_connmsg(kib_msg_t *msg, __u32 version, int nob, int type, lnet_nid_t dstnid, __u64 dststamp){ LASSERT (nob >= offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)); memset(msg, 0, nob); kibnal_init_msg(msg, type, sizeof(kib_connparams_t)); msg->ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE; msg->ibm_u.connparams.ibcp_max_msg_size = IBNAL_MSG_SIZE; msg->ibm_u.connparams.ibcp_max_frags = IBNAL_MAX_RDMA_FRAGS; kibnal_pack_msg(msg, version, 0, dstnid, dststamp, 0);}intkibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob){ const int hdr_size = offsetof(kib_msg_t, ibm_u); __u32 msg_cksum; __u32 msg_version; int flip; int msg_nob;#if !IBNAL_USE_FMR int i; int n;#endif /* 6 bytes are enough to have received magic + version */ if (nob < 6) { CERROR("Short message: %d\n", nob); return -EPROTO; } /* Future protocol version compatibility support! * If the iiblnd-specific protocol changes, or when LNET unifies * protocols over all LNDs, the initial connection will negotiate a * protocol version. If I find this, I avoid any console errors. If * my is doing connection establishment, the reject will tell the peer * which version I'm running. */ if (msg->ibm_magic == IBNAL_MSG_MAGIC) { flip = 0; } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) { flip = 1; } else { if (msg->ibm_magic == LNET_PROTO_MAGIC || msg->ibm_magic == __swab32(LNET_PROTO_MAGIC)) return -EPROTO; /* Completely out to lunch */ CERROR("Bad magic: %08x\n", msg->ibm_magic); return -EPROTO; } msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version; if (expected_version == 0) { if (msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD && msg_version != IBNAL_MSG_VERSION) return -EPROTO; } else if (msg_version != expected_version) { CERROR("Bad version: %x(%x expected)\n", msg_version, expected_version); return -EPROTO; } if (nob < hdr_size) { CERROR("Short message: %d\n", nob); return -EPROTO; } msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob; if (msg_nob > nob) { CERROR("Short message: got %d, wanted %d\n", nob, msg_nob); return -EPROTO; } /* checksum must be computed with ibm_cksum zero and BEFORE anything * gets flipped */ msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum; msg->ibm_cksum = 0; if (msg_cksum != 0 && msg_cksum != kibnal_cksum(msg, msg_nob)) { CERROR("Bad checksum\n"); return -EPROTO; } msg->ibm_cksum = msg_cksum; if (flip) { /* leave magic unflipped as a clue to peer endianness */ msg->ibm_version = msg_version; CLASSERT (sizeof(msg->ibm_type) == 1); CLASSERT (sizeof(msg->ibm_credits) == 1); msg->ibm_nob = msg_nob; __swab64s(&msg->ibm_srcnid); __swab64s(&msg->ibm_srcstamp); __swab64s(&msg->ibm_dstnid); __swab64s(&msg->ibm_dststamp); __swab64s(&msg->ibm_seq); } if (msg->ibm_srcnid == LNET_NID_ANY) { CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid)); return -EPROTO; } switch (msg->ibm_type) { default: CERROR("Unknown message type %x\n", msg->ibm_type); return -EPROTO; case IBNAL_MSG_NOOP: break; case IBNAL_MSG_IMMEDIATE: if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) { CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob, (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])); return -EPROTO; } break; case IBNAL_MSG_PUT_REQ: if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) { CERROR("Short PUT_REQ: %d(%d)\n", msg_nob, (int)(hdr_size + sizeof(msg->ibm_u.putreq))); return -EPROTO; } break; case IBNAL_MSG_PUT_ACK: if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) { CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, (int)(hdr_size + sizeof(msg->ibm_u.putack))); return -EPROTO; }#if IBNAL_USE_FMR if (flip) { __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr); __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob); __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); }#else if (flip) { __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag); } n = msg->ibm_u.putack.ibpam_rd.rd_nfrag; if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) { CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n", n, IBNAL_MAX_RDMA_FRAGS); return -EPROTO; } if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) { CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])); return -EPROTO; } if (flip) { for (i = 0; i < n; i++) { __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob); __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr); } }#endif break; case IBNAL_MSG_GET_REQ: if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) { CERROR("Short GET_REQ: %d(%d)\n", msg_nob, (int)(hdr_size + sizeof(msg->ibm_u.get))); return -EPROTO; }#if IBNAL_USE_FMR if (flip) { __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr); __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob); __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); }#else if (flip) { __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag); } n = msg->ibm_u.get.ibgm_rd.rd_nfrag; if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) { CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n", n, IBNAL_MAX_RDMA_FRAGS); return -EPROTO; } if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) { CERROR("Short GET_REQ: %d(%d)\n", msg_nob, (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])); return -EPROTO; } if (flip) for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrag; i++) { __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob); __swab64s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr); }#endif break; case IBNAL_MSG_PUT_NAK: case IBNAL_MSG_PUT_DONE: case IBNAL_MSG_GET_DONE: if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) { CERROR("Short RDMA completion: %d(%d)\n", msg_nob, (int)(hdr_size + sizeof(msg->ibm_u.completion))); return -EPROTO; } if (flip) __swab32s(&msg->ibm_u.completion.ibcm_status); break; case IBNAL_MSG_CONNREQ: case IBNAL_MSG_CONNACK: if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) { CERROR("Short connreq/ack: %d(%d)\n", msg_nob, (int)(hdr_size + sizeof(msg->ibm_u.connparams))); return -EPROTO; } if (flip) { __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth); __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size); __swab32s(&msg->ibm_u.connparams.ibcp_max_frags); } break; } return 0;}IB_HANDLEkibnal_create_cep(lnet_nid_t nid){ FSTATUS frc; __u32 u32val; IB_HANDLE cep; cep = iba_cm_create_cep(CM_RC_TYPE); if (cep == NULL) { CERROR ("Can't create CEP for %s\n", (nid == LNET_NID_ANY) ? "listener" : libcfs_nid2str(nid)); return NULL; } if (nid == LNET_NID_ANY) { u32val = 1; frc = iba_cm_modify_cep(cep, CM_FLAG_ASYNC_ACCEPT, (char *)&u32val, sizeof(u32val), 0); if (frc != FSUCCESS) { CERROR("Can't set async_accept: %d\n", frc); goto failed; } u32val = 0; /* sets system max */ frc = iba_cm_modify_cep(cep, CM_FLAG_LISTEN_BACKLOG, (char *)&u32val, sizeof(u32val), 0); if (frc != FSUCCESS) { CERROR("Can't set listen backlog: %d\n", frc); goto failed; } } u32val = 1; frc = iba_cm_modify_cep(cep, CM_FLAG_TIMEWAIT_CALLBACK, (char *)&u32val, sizeof(u32val), 0); if (frc != FSUCCESS) { CERROR("Can't set timewait_callback for %s: %d\n", (nid == LNET_NID_ANY) ? "listener" : libcfs_nid2str(nid), frc); goto failed; } return cep; failed: iba_cm_destroy_cep(cep); return NULL;}#define IBNAL_CHECK_ADVERT 1#if IBNAL_CHECK_ADVERTvoidkibnal_service_query_done (void *arg, QUERY *qry, QUERY_RESULT_VALUES *qry_result){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -