📄 openiblnd.c
字号:
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (C) 2004 Cluster File Systems, Inc. * Author: Eric Barton <eric@bartonsoftware.com> * * This file is part of Lustre, http://www.lustre.org. * * Lustre is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * Lustre is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */#include "openiblnd.h"lnd_t the_kiblnd = {#ifdef USING_TSAPI .lnd_type = CIBLND,#else .lnd_type = OPENIBLND,#endif .lnd_startup = kibnal_startup, .lnd_shutdown = kibnal_shutdown, .lnd_ctl = kibnal_ctl, .lnd_send = kibnal_send, .lnd_recv = kibnal_recv, .lnd_eager_recv = kibnal_eager_recv, .lnd_accept = kibnal_accept,};kib_data_t kibnal_data;__u32 kibnal_cksum (void *ptr, int nob){ char *c = ptr; __u32 sum = 0; while (nob-- > 0) sum = ((sum << 1) | (sum >> 31)) + *c++; /* ensure I don't return 0 (== no checksum) */ return (sum == 0) ? 1 : sum;}voidkibnal_init_msg(kib_msg_t *msg, int type, int body_nob){ msg->ibm_type = type; msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob;}voidkibnal_pack_msg(kib_msg_t *msg, int version, int credits, lnet_nid_t dstnid, __u64 dststamp){ /* CAVEAT EMPTOR! all message fields not set here should have been * initialised previously. */ msg->ibm_magic = IBNAL_MSG_MAGIC; msg->ibm_version = version; /* ibm_type */ msg->ibm_credits = credits; /* ibm_nob */ msg->ibm_cksum = 0; msg->ibm_srcnid = lnet_ptlcompat_srcnid(kibnal_data.kib_ni->ni_nid, dstnid); msg->ibm_srcstamp = kibnal_data.kib_incarnation; msg->ibm_dstnid = dstnid; msg->ibm_dststamp = dststamp; if (*kibnal_tunables.kib_cksum) { /* NB ibm_cksum zero while computing cksum */ msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob); }}intkibnal_unpack_msg(kib_msg_t *msg, int expected_version, int nob){ const int hdr_size = offsetof(kib_msg_t, ibm_u); __u32 msg_cksum; int msg_version; int flip; int msg_nob; if (nob < 6) { CERROR("Short message: %d\n", nob); return -EPROTO; } if (msg->ibm_magic == IBNAL_MSG_MAGIC) { flip = 0; } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) { flip = 1; } else { CERROR("Bad magic: %08x\n", msg->ibm_magic); return -EPROTO; } msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version; if ((expected_version == 0) ? (msg_version != IBNAL_MSG_VERSION && msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) : (msg_version != expected_version)) { CERROR("Bad version: %x\n", msg_version); return -EPROTO; } if (nob < hdr_size) { CERROR("Short message: %d\n", nob); return -EPROTO; } msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob; if (msg_nob > nob) { CERROR("Short message: got %d, wanted %d\n", nob, msg_nob); return -EPROTO; } /* checksum must be computed with ibm_cksum zero and BEFORE anything * gets flipped */ msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum; msg->ibm_cksum = 0; if (msg_cksum != 0 && msg_cksum != kibnal_cksum(msg, msg_nob)) { CERROR("Bad checksum\n"); return -EPROTO; } msg->ibm_cksum = msg_cksum; if (flip) { /* leave magic unflipped as a clue to peer endianness */ msg->ibm_version = msg_version; LASSERT (sizeof(msg->ibm_type) == 1); LASSERT (sizeof(msg->ibm_credits) == 1); msg->ibm_nob = msg_nob; __swab64s(&msg->ibm_srcnid); __swab64s(&msg->ibm_srcstamp); __swab64s(&msg->ibm_dstnid); __swab64s(&msg->ibm_dststamp); } if (msg->ibm_srcnid == LNET_NID_ANY) { CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid)); return -EPROTO; } switch (msg->ibm_type) { default: CERROR("Unknown message type %x\n", msg->ibm_type); return -EPROTO; case IBNAL_MSG_SVCQRY: case IBNAL_MSG_NOOP: break; case IBNAL_MSG_SVCRSP: if (msg_nob < hdr_size + sizeof(msg->ibm_u.svcrsp)) { CERROR("Short SVCRSP: %d(%d)\n", msg_nob, (int)(hdr_size + sizeof(msg->ibm_u.svcrsp))); return -EPROTO; } if (flip) { __swab64s(&msg->ibm_u.svcrsp.ibsr_svc_id); __swab16s(&msg->ibm_u.svcrsp.ibsr_svc_pkey); } break; case IBNAL_MSG_CONNREQ: case IBNAL_MSG_CONNACK: if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) { CERROR("Short CONNREQ: %d(%d)\n", msg_nob, (int)(hdr_size + sizeof(msg->ibm_u.connparams))); return -EPROTO; } if (flip) __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth); break; case IBNAL_MSG_IMMEDIATE: if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) { CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob, (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])); return -EPROTO; } break; case IBNAL_MSG_PUT_RDMA: case IBNAL_MSG_GET_RDMA: if (msg_nob < hdr_size + sizeof(msg->ibm_u.rdma)) { CERROR("Short RDMA req: %d(%d)\n", msg_nob, (int)(hdr_size + sizeof(msg->ibm_u.rdma))); return -EPROTO; } if (flip) { __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_key); __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_nob); __swab64s(&msg->ibm_u.rdma.ibrm_desc.rd_addr); } break; case IBNAL_MSG_PUT_DONE: case IBNAL_MSG_GET_DONE: if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) { CERROR("Short RDMA completion: %d(%d)\n", msg_nob, (int)(hdr_size + sizeof(msg->ibm_u.completion))); return -EPROTO; } if (flip) __swab32s(&msg->ibm_u.completion.ibcm_status); break; } return 0;}intkibnal_make_svcqry (kib_conn_t *conn) { kib_peer_t *peer = conn->ibc_peer; int version = IBNAL_MSG_VERSION; int msg_version; kib_msg_t *msg; struct socket *sock; int rc; int nob; LASSERT (conn->ibc_connreq != NULL); msg = &conn->ibc_connreq->cr_msg; again: kibnal_init_msg(msg, IBNAL_MSG_SVCQRY, 0); kibnal_pack_msg(msg, version, 0, peer->ibp_nid, 0); rc = lnet_connect(&sock, peer->ibp_nid, 0, peer->ibp_ip, peer->ibp_port); if (rc != 0) return -ECONNABORTED; rc = libcfs_sock_write(sock, msg, msg->ibm_nob, lnet_acceptor_timeout()); if (rc != 0) { CERROR("Error %d sending svcqry to %s at %u.%u.%u.%u/%d\n", rc, libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), peer->ibp_port); goto out; } /* The first 6 bytes are invariably MAGIC + proto version */ rc = libcfs_sock_read(sock, msg, 6, *kibnal_tunables.kib_timeout); if (rc != 0) { CERROR("Error %d receiving svcrsp from %s at %u.%u.%u.%u/%d\n", rc, libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), peer->ibp_port); goto out; } if (msg->ibm_magic != IBNAL_MSG_MAGIC && msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) { CERROR("Bad magic: %08x from %s at %u.%u.%u.%u/%d\n", msg->ibm_magic, libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), peer->ibp_port); rc = -EPROTO; goto out; } msg_version = (msg->ibm_magic == IBNAL_MSG_MAGIC) ? msg->ibm_version : __swab16(msg->ibm_version); if (msg_version != version) { if (version == IBNAL_MSG_VERSION) { /* retry with previous version */ libcfs_sock_release(sock); version = IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD; goto again; } CERROR("Bad version %x from %s at %u.%u.%u.%u/%d\n", msg_version, libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), peer->ibp_port); rc = -EPROTO; goto out; } /* Read in the rest of the message now we know the expected format */ nob = offsetof(kib_msg_t, ibm_u) + sizeof(kib_svcrsp_t); rc = libcfs_sock_read(sock, ((char *)msg) + 6, nob - 6, *kibnal_tunables.kib_timeout); if (rc != 0) { CERROR("Error %d receiving svcrsp from %s at %u.%u.%u.%u/%d\n", rc, libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), peer->ibp_port); goto out; } rc = kibnal_unpack_msg(msg, version, nob); if (rc != 0) { CERROR("Error %d unpacking svcrsp from %s at %u.%u.%u.%u/%d\n", rc, libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), peer->ibp_port); goto out; } if (msg->ibm_type != IBNAL_MSG_SVCRSP) { CERROR("Unexpected response type %d from %s at %u.%u.%u.%u/%d\n", msg->ibm_type, libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), peer->ibp_port); rc = -EPROTO; goto out; } if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, msg->ibm_dstnid) || msg->ibm_dststamp != kibnal_data.kib_incarnation) { CERROR("Unexpected dst NID/stamp %s/"LPX64" from " "%s at %u.%u.%u.%u/%d\n", libcfs_nid2str(msg->ibm_dstnid), msg->ibm_dststamp, libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), peer->ibp_port); rc = -EPROTO; goto out; } if (!lnet_ptlcompat_matchnid(peer->ibp_nid, msg->ibm_srcnid)) { CERROR("Unexpected src NID %s from %s at %u.%u.%u.%u/%d\n", libcfs_nid2str(msg->ibm_srcnid), libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), peer->ibp_port); rc = -EPROTO; goto out; } conn->ibc_incarnation = msg->ibm_srcstamp; conn->ibc_connreq->cr_svcrsp = msg->ibm_u.svcrsp; conn->ibc_version = version; out: libcfs_sock_release(sock); return rc;}voidkibnal_handle_svcqry (struct socket *sock){ __u32 peer_ip; unsigned int peer_port; kib_msg_t *msg; __u64 srcnid; __u64 srcstamp; int version; int reject = 0; int rc; rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port); if (rc != 0) { CERROR("Can't get peer's IP: %d\n", rc); return; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -