⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 o2iblnd.c

📁 非常经典的一个分布式系统
💻 C
📖 第 1 页 / 共 4 页
字号:
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (C) 2006 Cluster File Systems, Inc. *   Author: Eric Barton <eric@bartonsoftware.com> * *   This file is part of Lustre, http://www.lustre.org. * *   Lustre is free software; you can redistribute it and/or *   modify it under the terms of version 2 of the GNU General Public *   License as published by the Free Software Foundation. * *   Lustre is distributed in the hope that it will be useful, *   but WITHOUT ANY WARRANTY; without even the implied warranty of *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *   GNU General Public License for more details. * *   You should have received a copy of the GNU General Public License *   along with Lustre; if not, write to the Free Software *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */#include "o2iblnd.h"lnd_t the_kiblnd = {        .lnd_type       = O2IBLND,        .lnd_startup    = kiblnd_startup,        .lnd_shutdown   = kiblnd_shutdown,        .lnd_ctl        = kiblnd_ctl,        .lnd_send       = kiblnd_send,        .lnd_recv       = kiblnd_recv,};kib_data_t              kiblnd_data;__u32kiblnd_cksum (void *ptr, int nob){        char  *c  = ptr;        __u32  sum = 0;        while (nob-- > 0)                sum = ((sum << 1) | (sum >> 31)) + *c++;        /* ensure I don't return 0 (== no checksum) */        return (sum == 0) ? 1 : sum;}voidkiblnd_init_msg (kib_msg_t *msg, int type, int body_nob){        msg->ibm_type = type;        msg->ibm_nob  = offsetof(kib_msg_t, ibm_u) + body_nob;}voidkiblnd_pack_msg (lnet_ni_t *ni, kib_msg_t *msg,                 int credits, lnet_nid_t dstnid, __u64 dststamp){        kib_net_t *net = ni->ni_data;        /* CAVEAT EMPTOR! all message fields not set here should have been         * initialised previously. */        msg->ibm_magic    = IBLND_MSG_MAGIC;        msg->ibm_version  = IBLND_MSG_VERSION;        /*   ibm_type */        msg->ibm_credits  = credits;        /*   ibm_nob */        msg->ibm_cksum    = 0;        msg->ibm_srcnid   = lnet_ptlcompat_srcnid(ni->ni_nid, dstnid);        msg->ibm_srcstamp = net->ibn_incarnation;        msg->ibm_dstnid   = dstnid;        msg->ibm_dststamp = dststamp;        if (*kiblnd_tunables.kib_cksum) {                /* NB ibm_cksum zero while computing cksum */                msg->ibm_cksum = kiblnd_cksum(msg, msg->ibm_nob);        }}intkiblnd_unpack_msg(kib_msg_t *msg, int nob){        const int hdr_size = offsetof(kib_msg_t, ibm_u);        __u32     msg_cksum;        int       flip;        int       msg_nob;#if !IBLND_MAP_ON_DEMAND        int       i;        int       n;#endif        /* 6 bytes are enough to have received magic + version */        if (nob < 6) {                CERROR("Short message: %d\n", nob);                return -EPROTO;        }        if (msg->ibm_magic == IBLND_MSG_MAGIC) {                flip = 0;        } else if (msg->ibm_magic == __swab32(IBLND_MSG_MAGIC)) {                flip = 1;        } else {                CERROR("Bad magic: %08x\n", msg->ibm_magic);                return -EPROTO;        }        if (msg->ibm_version !=            (flip ? __swab16(IBLND_MSG_VERSION) : IBLND_MSG_VERSION)) {                CERROR("Bad version: %d\n", msg->ibm_version);                return -EPROTO;        }        if (nob < hdr_size) {                CERROR("Short message: %d\n", nob);                return -EPROTO;        }        msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;        if (msg_nob > nob) {                CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);                return -EPROTO;        }        /* checksum must be computed with ibm_cksum zero and BEFORE anything         * gets flipped */        msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;        msg->ibm_cksum = 0;        if (msg_cksum != 0 &&            msg_cksum != kiblnd_cksum(msg, msg_nob)) {                CERROR("Bad checksum\n");                return -EPROTO;        }        msg->ibm_cksum = msg_cksum;        if (flip) {                /* leave magic unflipped as a clue to peer endianness */                __swab16s(&msg->ibm_version);                CLASSERT (sizeof(msg->ibm_type) == 1);                CLASSERT (sizeof(msg->ibm_credits) == 1);                msg->ibm_nob = msg_nob;                __swab64s(&msg->ibm_srcnid);                __swab64s(&msg->ibm_srcstamp);                __swab64s(&msg->ibm_dstnid);                __swab64s(&msg->ibm_dststamp);        }        if (msg->ibm_srcnid == LNET_NID_ANY) {                CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));                return -EPROTO;        }        switch (msg->ibm_type) {        default:                CERROR("Unknown message type %x\n", msg->ibm_type);                return -EPROTO;        case IBLND_MSG_NOOP:                break;        case IBLND_MSG_IMMEDIATE:                if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) {                        CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob,                               (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]));                        return -EPROTO;                }                break;        case IBLND_MSG_PUT_REQ:                if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) {                        CERROR("Short PUT_REQ: %d(%d)\n", msg_nob,                               (int)(hdr_size + sizeof(msg->ibm_u.putreq)));                        return -EPROTO;                }                break;        case IBLND_MSG_PUT_ACK:                if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) {                        CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,                               (int)(hdr_size + sizeof(msg->ibm_u.putack)));                        return -EPROTO;                }#if IBLND_MAP_ON_DEMAND                if (flip) {                        __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr);                        __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob);                        __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);                }#else                if (flip) {                        __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);                        __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrags);                }                                n = msg->ibm_u.putack.ibpam_rd.rd_nfrags;                if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) {                        CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n",                                n, IBLND_MAX_RDMA_FRAGS);                        return -EPROTO;                }                                if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) {                        CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,                               (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n]));                        return -EPROTO;                }                if (flip) {                        for (i = 0; i < n; i++) {                                __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob);                                __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr);                        }                }#endif                break;        case IBLND_MSG_GET_REQ:                if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) {                        CERROR("Short GET_REQ: %d(%d)\n", msg_nob,                               (int)(hdr_size + sizeof(msg->ibm_u.get)));                        return -EPROTO;                }#if IBLND_MAP_ON_DEMAND                if (flip) {                        __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr);                        __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob);                        __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);                }#else                if (flip) {                        __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);                        __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrags);                }                n = msg->ibm_u.get.ibgm_rd.rd_nfrags;                if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) {                        CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n",                                n, IBLND_MAX_RDMA_FRAGS);                        return -EPROTO;                }                                if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) {                        CERROR("Short GET_REQ: %d(%d)\n", msg_nob,                               (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n]));                        return -EPROTO;                }                                if (flip)                        for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrags; i++) {                                __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob);                                __swab64s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr);                        }#endif                break;        case IBLND_MSG_PUT_NAK:        case IBLND_MSG_PUT_DONE:        case IBLND_MSG_GET_DONE:                if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) {                        CERROR("Short RDMA completion: %d(%d)\n", msg_nob,                               (int)(hdr_size + sizeof(msg->ibm_u.completion)));                        return -EPROTO;                }                if (flip)                        __swab32s(&msg->ibm_u.completion.ibcm_status);                break;        case IBLND_MSG_CONNREQ:        case IBLND_MSG_CONNACK:                if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) {                        CERROR("Short connreq/ack: %d(%d)\n", msg_nob,                               (int)(hdr_size + sizeof(msg->ibm_u.connparams)));                        return -EPROTO;                }                if (flip) {                        __swab16s(&msg->ibm_u.connparams.ibcp_queue_depth);                        __swab16s(&msg->ibm_u.connparams.ibcp_max_frags);                        __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size);                }                break;        }        return 0;}intkiblnd_create_peer (lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid){        kib_peer_t     *peer;        kib_net_t      *net = ni->ni_data;        unsigned long   flags;        LASSERT (net != NULL);        LASSERT (nid != LNET_NID_ANY);        LIBCFS_ALLOC(peer, sizeof(*peer));        if (peer == NULL) {                CERROR("Cannot allocate peer\n");                return -ENOMEM;        }        memset(peer, 0, sizeof(*peer));         /* zero flags etc */        peer->ibp_ni = ni;        peer->ibp_nid = nid;        peer->ibp_error = 0;        peer->ibp_last_alive = cfs_time_current();        atomic_set(&peer->ibp_refcount, 1);     /* 1 ref for caller */        INIT_LIST_HEAD(&peer->ibp_list);       /* not in the peer table yet */        INIT_LIST_HEAD(&peer->ibp_conns);        INIT_LIST_HEAD(&peer->ibp_tx_queue);        write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);        /* always called with a ref on ni, which prevents ni being shutdown */        LASSERT (net->ibn_shutdown == 0);                /* npeers only grows with the global lock held */        atomic_inc(&net->ibn_npeers);        write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);        *peerp = peer;        return 0;}voidkiblnd_destroy_peer (kib_peer_t *peer){        kib_net_t *net = peer->ibp_ni->ni_data;        LASSERT (net != NULL);        LASSERT (atomic_read(&peer->ibp_refcount) == 0);        LASSERT (!kiblnd_peer_active(peer));        LASSERT (peer->ibp_connecting == 0);        LASSERT (peer->ibp_accepting == 0);        LASSERT (list_empty(&peer->ibp_conns));        LASSERT (list_empty(&peer->ibp_tx_queue));        LIBCFS_FREE(peer, sizeof(*peer));        /* NB a peer's connections keep a reference on their peer until         * they are destroyed, so we can be assured that _all_ state to do         * with this peer has been cleaned up when its refcount drops to         * zero. */        atomic_dec(&net->ibn_npeers);}voidkiblnd_destroy_dev (kib_dev_t *dev){        LASSERT (dev->ibd_nnets == 0);        if (!list_empty(&dev->ibd_list)) /* on kib_devs? */                list_del_init(&dev->ibd_list);        if (dev->ibd_mr != NULL)                ib_dereg_mr(dev->ibd_mr);        if (dev->ibd_pd != NULL)                ib_dealloc_pd(dev->ibd_pd);        if (dev->ibd_cmid != NULL)                rdma_destroy_id(dev->ibd_cmid);        LIBCFS_FREE(dev, sizeof(*dev));}kib_peer_t *kiblnd_find_peer_locked (lnet_nid_t nid){        /* the caller is responsible for accounting the additional reference         * that this creates */        struct list_head *peer_list = kiblnd_nid2peerlist(nid);        struct list_head *tmp;        kib_peer_t       *peer;        list_for_each (tmp, peer_list) {                peer = list_entry(tmp, kib_peer_t, ibp_list);                LASSERT (peer->ibp_connecting > 0 || /* creating conns */                         peer->ibp_accepting > 0 ||                         !list_empty(&peer->ibp_conns));  /* active conn */                if (peer->ibp_nid != nid)                        continue;                CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",                       peer, libcfs_nid2str(nid),                       atomic_read(&peer->ibp_refcount));                return peer;        }        return NULL;}voidkiblnd_unlink_peer_locked (kib_peer_t *peer){        LASSERT (list_empty(&peer->ibp_conns));        LASSERT (kiblnd_peer_active(peer));        list_del_init(&peer->ibp_list);        /* lose peerlist's ref */        kiblnd_peer_decref(peer);}intkiblnd_get_peer_info (lnet_ni_t *ni, int index,                       lnet_nid_t *nidp, int *count){        kib_peer_t        *peer;        struct list_head  *ptmp;        int                i;        unsigned long      flags;        read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);        for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {                list_for_each (ptmp, &kiblnd_data.kib_peers[i]) {                        peer = list_entry(ptmp, kib_peer_t, ibp_list);                        LASSERT (peer->ibp_connecting > 0 ||                                 peer->ibp_accepting > 0 ||                                 !list_empty(&peer->ibp_conns));                        if (peer->ibp_ni != ni)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -