📄 o2iblnd.c
字号:
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (C) 2006 Cluster File Systems, Inc. * Author: Eric Barton <eric@bartonsoftware.com> * * This file is part of Lustre, http://www.lustre.org. * * Lustre is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * Lustre is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */#include "o2iblnd.h"lnd_t the_kiblnd = { .lnd_type = O2IBLND, .lnd_startup = kiblnd_startup, .lnd_shutdown = kiblnd_shutdown, .lnd_ctl = kiblnd_ctl, .lnd_send = kiblnd_send, .lnd_recv = kiblnd_recv,};kib_data_t kiblnd_data;__u32kiblnd_cksum (void *ptr, int nob){ char *c = ptr; __u32 sum = 0; while (nob-- > 0) sum = ((sum << 1) | (sum >> 31)) + *c++; /* ensure I don't return 0 (== no checksum) */ return (sum == 0) ? 1 : sum;}voidkiblnd_init_msg (kib_msg_t *msg, int type, int body_nob){ msg->ibm_type = type; msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob;}voidkiblnd_pack_msg (lnet_ni_t *ni, kib_msg_t *msg, int credits, lnet_nid_t dstnid, __u64 dststamp){ kib_net_t *net = ni->ni_data; /* CAVEAT EMPTOR! all message fields not set here should have been * initialised previously. */ msg->ibm_magic = IBLND_MSG_MAGIC; msg->ibm_version = IBLND_MSG_VERSION; /* ibm_type */ msg->ibm_credits = credits; /* ibm_nob */ msg->ibm_cksum = 0; msg->ibm_srcnid = lnet_ptlcompat_srcnid(ni->ni_nid, dstnid); msg->ibm_srcstamp = net->ibn_incarnation; msg->ibm_dstnid = dstnid; msg->ibm_dststamp = dststamp; if (*kiblnd_tunables.kib_cksum) { /* NB ibm_cksum zero while computing cksum */ msg->ibm_cksum = kiblnd_cksum(msg, msg->ibm_nob); }}intkiblnd_unpack_msg(kib_msg_t *msg, int nob){ const int hdr_size = offsetof(kib_msg_t, ibm_u); __u32 msg_cksum; int flip; int msg_nob;#if !IBLND_MAP_ON_DEMAND int i; int n;#endif /* 6 bytes are enough to have received magic + version */ if (nob < 6) { CERROR("Short message: %d\n", nob); return -EPROTO; } if (msg->ibm_magic == IBLND_MSG_MAGIC) { flip = 0; } else if (msg->ibm_magic == __swab32(IBLND_MSG_MAGIC)) { flip = 1; } else { CERROR("Bad magic: %08x\n", msg->ibm_magic); return -EPROTO; } if (msg->ibm_version != (flip ? __swab16(IBLND_MSG_VERSION) : IBLND_MSG_VERSION)) { CERROR("Bad version: %d\n", msg->ibm_version); return -EPROTO; } if (nob < hdr_size) { CERROR("Short message: %d\n", nob); return -EPROTO; } msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob; if (msg_nob > nob) { CERROR("Short message: got %d, wanted %d\n", nob, msg_nob); return -EPROTO; } /* checksum must be computed with ibm_cksum zero and BEFORE anything * gets flipped */ msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum; msg->ibm_cksum = 0; if (msg_cksum != 0 && msg_cksum != kiblnd_cksum(msg, msg_nob)) { CERROR("Bad checksum\n"); return -EPROTO; } msg->ibm_cksum = msg_cksum; if (flip) { /* leave magic unflipped as a clue to peer endianness */ __swab16s(&msg->ibm_version); CLASSERT (sizeof(msg->ibm_type) == 1); CLASSERT (sizeof(msg->ibm_credits) == 1); msg->ibm_nob = msg_nob; __swab64s(&msg->ibm_srcnid); __swab64s(&msg->ibm_srcstamp); __swab64s(&msg->ibm_dstnid); __swab64s(&msg->ibm_dststamp); } if (msg->ibm_srcnid == LNET_NID_ANY) { CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid)); return -EPROTO; } switch (msg->ibm_type) { default: CERROR("Unknown message type %x\n", msg->ibm_type); return -EPROTO; case IBLND_MSG_NOOP: break; case IBLND_MSG_IMMEDIATE: if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) { CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob, (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])); return -EPROTO; } break; case IBLND_MSG_PUT_REQ: if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) { CERROR("Short PUT_REQ: %d(%d)\n", msg_nob, (int)(hdr_size + sizeof(msg->ibm_u.putreq))); return -EPROTO; } break; case IBLND_MSG_PUT_ACK: if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) { CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, (int)(hdr_size + sizeof(msg->ibm_u.putack))); return -EPROTO; }#if IBLND_MAP_ON_DEMAND if (flip) { __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr); __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob); __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); }#else if (flip) { __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrags); } n = msg->ibm_u.putack.ibpam_rd.rd_nfrags; if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) { CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n", n, IBLND_MAX_RDMA_FRAGS); return -EPROTO; } if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) { CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])); return -EPROTO; } if (flip) { for (i = 0; i < n; i++) { __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob); __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr); } }#endif break; case IBLND_MSG_GET_REQ: if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) { CERROR("Short GET_REQ: %d(%d)\n", msg_nob, (int)(hdr_size + sizeof(msg->ibm_u.get))); return -EPROTO; }#if IBLND_MAP_ON_DEMAND if (flip) { __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr); __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob); __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); }#else if (flip) { __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrags); } n = msg->ibm_u.get.ibgm_rd.rd_nfrags; if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) { CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n", n, IBLND_MAX_RDMA_FRAGS); return -EPROTO; } if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) { CERROR("Short GET_REQ: %d(%d)\n", msg_nob, (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])); return -EPROTO; } if (flip) for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrags; i++) { __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob); __swab64s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr); }#endif break; case IBLND_MSG_PUT_NAK: case IBLND_MSG_PUT_DONE: case IBLND_MSG_GET_DONE: if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) { CERROR("Short RDMA completion: %d(%d)\n", msg_nob, (int)(hdr_size + sizeof(msg->ibm_u.completion))); return -EPROTO; } if (flip) __swab32s(&msg->ibm_u.completion.ibcm_status); break; case IBLND_MSG_CONNREQ: case IBLND_MSG_CONNACK: if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) { CERROR("Short connreq/ack: %d(%d)\n", msg_nob, (int)(hdr_size + sizeof(msg->ibm_u.connparams))); return -EPROTO; } if (flip) { __swab16s(&msg->ibm_u.connparams.ibcp_queue_depth); __swab16s(&msg->ibm_u.connparams.ibcp_max_frags); __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size); } break; } return 0;}intkiblnd_create_peer (lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid){ kib_peer_t *peer; kib_net_t *net = ni->ni_data; unsigned long flags; LASSERT (net != NULL); LASSERT (nid != LNET_NID_ANY); LIBCFS_ALLOC(peer, sizeof(*peer)); if (peer == NULL) { CERROR("Cannot allocate peer\n"); return -ENOMEM; } memset(peer, 0, sizeof(*peer)); /* zero flags etc */ peer->ibp_ni = ni; peer->ibp_nid = nid; peer->ibp_error = 0; peer->ibp_last_alive = cfs_time_current(); atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */ INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */ INIT_LIST_HEAD(&peer->ibp_conns); INIT_LIST_HEAD(&peer->ibp_tx_queue); write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); /* always called with a ref on ni, which prevents ni being shutdown */ LASSERT (net->ibn_shutdown == 0); /* npeers only grows with the global lock held */ atomic_inc(&net->ibn_npeers); write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); *peerp = peer; return 0;}voidkiblnd_destroy_peer (kib_peer_t *peer){ kib_net_t *net = peer->ibp_ni->ni_data; LASSERT (net != NULL); LASSERT (atomic_read(&peer->ibp_refcount) == 0); LASSERT (!kiblnd_peer_active(peer)); LASSERT (peer->ibp_connecting == 0); LASSERT (peer->ibp_accepting == 0); LASSERT (list_empty(&peer->ibp_conns)); LASSERT (list_empty(&peer->ibp_tx_queue)); LIBCFS_FREE(peer, sizeof(*peer)); /* NB a peer's connections keep a reference on their peer until * they are destroyed, so we can be assured that _all_ state to do * with this peer has been cleaned up when its refcount drops to * zero. */ atomic_dec(&net->ibn_npeers);}voidkiblnd_destroy_dev (kib_dev_t *dev){ LASSERT (dev->ibd_nnets == 0); if (!list_empty(&dev->ibd_list)) /* on kib_devs? */ list_del_init(&dev->ibd_list); if (dev->ibd_mr != NULL) ib_dereg_mr(dev->ibd_mr); if (dev->ibd_pd != NULL) ib_dealloc_pd(dev->ibd_pd); if (dev->ibd_cmid != NULL) rdma_destroy_id(dev->ibd_cmid); LIBCFS_FREE(dev, sizeof(*dev));}kib_peer_t *kiblnd_find_peer_locked (lnet_nid_t nid){ /* the caller is responsible for accounting the additional reference * that this creates */ struct list_head *peer_list = kiblnd_nid2peerlist(nid); struct list_head *tmp; kib_peer_t *peer; list_for_each (tmp, peer_list) { peer = list_entry(tmp, kib_peer_t, ibp_list); LASSERT (peer->ibp_connecting > 0 || /* creating conns */ peer->ibp_accepting > 0 || !list_empty(&peer->ibp_conns)); /* active conn */ if (peer->ibp_nid != nid) continue; CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n", peer, libcfs_nid2str(nid), atomic_read(&peer->ibp_refcount)); return peer; } return NULL;}voidkiblnd_unlink_peer_locked (kib_peer_t *peer){ LASSERT (list_empty(&peer->ibp_conns)); LASSERT (kiblnd_peer_active(peer)); list_del_init(&peer->ibp_list); /* lose peerlist's ref */ kiblnd_peer_decref(peer);}intkiblnd_get_peer_info (lnet_ni_t *ni, int index, lnet_nid_t *nidp, int *count){ kib_peer_t *peer; struct list_head *ptmp; int i; unsigned long flags; read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { list_for_each (ptmp, &kiblnd_data.kib_peers[i]) { peer = list_entry(ptmp, kib_peer_t, ibp_list); LASSERT (peer->ibp_connecting > 0 || peer->ibp_accepting > 0 || !list_empty(&peer->ibp_conns)); if (peer->ibp_ni != ni)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -