⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 o2iblnd.h

📁 非常经典的一个分布式系统
💻 H
📖 第 1 页 / 共 2 页
字号:
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (C) 2006 Cluster File Systems, Inc. *   Author: Eric Barton <eric@bartonsoftware.com> * *   This file is part of Lustre, http://www.lustre.org. * *   Lustre is free software; you can redistribute it and/or *   modify it under the terms of version 2 of the GNU General Public *   License as published by the Free Software Foundation. * *   Lustre is distributed in the hope that it will be useful, *   but WITHOUT ANY WARRANTY; without even the implied warranty of *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *   GNU General Public License for more details. * *   You should have received a copy of the GNU General Public License *   along with Lustre; if not, write to the Free Software *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */#ifndef EXPORT_SYMTAB# define EXPORT_SYMTAB#endif#ifndef AUTOCONF_INCLUDED#include <linux/config.h>#endif#include <linux/module.h>#include <linux/kernel.h>#include <linux/mm.h>#include <linux/string.h>#include <linux/stat.h>#include <linux/errno.h>#include <linux/smp_lock.h>#include <linux/unistd.h>#include <linux/uio.h>#include <asm/system.h>#include <asm/uaccess.h>#include <asm/io.h>#include <linux/init.h>#include <linux/fs.h>#include <linux/file.h>#include <linux/stat.h>#include <linux/list.h>#include <linux/kmod.h>#include <linux/sysctl.h>#include <linux/random.h>#include <net/sock.h>#include <linux/in.h>#define DEBUG_SUBSYSTEM S_LND#include <libcfs/kp30.h>#include <lnet/lnet.h>#include <lnet/lib-lnet.h>#if !HAVE_GFP_Ttypedef int gfp_t;#endif#include <rdma/rdma_cm.h>#include <rdma/ib_cm.h>#include <rdma/ib_verbs.h>#include <rdma/ib_fmr_pool.h>/* tunables fixed at compile time */#ifdef CONFIG_SMP# define IBLND_N_SCHED      num_online_cpus()   /* # schedulers */#else# define IBLND_N_SCHED      1                   /* # schedulers */#endif#define IBLND_PEER_HASH_SIZE         101        /* # peer lists */#define IBLND_RESCHED                100        /* # scheduler loops before reschedule */#define IBLND_MSG_QUEUE_SIZE         8          /* # messages/RDMAs in-flight */#define IBLND_CREDIT_HIGHWATER       7          /* when eagerly to return credits */#define IBLND_MSG_SIZE              (4<<10)     /* max size of queued messages (inc hdr) */#define IBLND_MAP_ON_DEMAND  0#if IBLND_MAP_ON_DEMAND# define IBLND_MAX_RDMA_FRAGS        1#else# define IBLND_MAX_RDMA_FRAGS        LNET_MAX_IOV#endif/************************//* derived constants... *//* TX messages (shared by all connections) */#define IBLND_TX_MSGS()       (*kiblnd_tunables.kib_ntx)#define IBLND_TX_MSG_BYTES()  (IBLND_TX_MSGS() * IBLND_MSG_SIZE)#define IBLND_TX_MSG_PAGES()  ((IBLND_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE)/* RX messages (per connection) */#define IBLND_RX_MSGS         (IBLND_MSG_QUEUE_SIZE*2)#define IBLND_RX_MSG_BYTES    (IBLND_RX_MSGS * IBLND_MSG_SIZE)#define IBLND_RX_MSG_PAGES    ((IBLND_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE)#define IBLND_CQ_ENTRIES()    (IBLND_RX_MSGS +                                  \                               (*kiblnd_tunables.kib_concurrent_sends) *        \                               (1 + IBLND_MAX_RDMA_FRAGS))typedef struct{        unsigned int     *kib_service;          /* IB service number */        int              *kib_min_reconnect_interval; /* first failed connection retry... */        int              *kib_max_reconnect_interval; /* ...exponentially increasing to this */        int              *kib_cksum;            /* checksum kib_msg_t? */        int              *kib_timeout;          /* comms timeout (seconds) */        int              *kib_keepalive;        /* keepalive timeout (seconds) */        int              *kib_ntx;              /* # tx descs */        int              *kib_credits;          /* # concurrent sends */        int              *kib_peercredits;      /* # concurrent sends to 1 peer */        char            **kib_default_ipif;     /* default IPoIB interface */        int              *kib_retry_count;        int              *kib_rnr_retry_count;        int              *kib_concurrent_sends; /* send work queue sizing */        int		 *kib_ib_mtu;		/* IB MTU */#if IBLND_MAP_ON_DEMAND        int              *kib_fmr_pool_size;    /* # FMRs in pool */        int              *kib_fmr_flush_trigger; /* When to trigger FMR flush */        int              *kib_fmr_cache;        /* enable FMR pool cache? */#endif#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM        cfs_sysctl_table_header_t *kib_sysctl;  /* sysctl interface */#endif} kib_tunables_t;typedef struct{        int               ibp_npages;           /* # pages */        struct page      *ibp_pages[0];} kib_pages_t;typedef struct {        struct list_head     ibd_list;          /* chain on kib_devs */        __u32                ibd_ifip;          /* IPoIB interface IP */        char                 ibd_ifname[32];    /* IPoIB interface name */        int                  ibd_nnets;         /* # nets extant */        struct rdma_cm_id   *ibd_cmid;          /* IB listener (bound to 1 device) */        struct ib_pd        *ibd_pd;            /* PD for the device */        struct ib_mr        *ibd_mr;            /* MR for non RDMA I/O */} kib_dev_t;typedef struct{        __u64                ibn_incarnation;   /* my epoch */        int                  ibn_init;          /* initialisation state */        int                  ibn_shutdown;      /* shutting down? */        atomic_t             ibn_npeers;        /* # peers extant */        atomic_t             ibn_nconns;        /* # connections extant */        struct kib_tx       *ibn_tx_descs;      /* all the tx descriptors */        kib_pages_t         *ibn_tx_pages;      /* premapped tx msg pages */        struct list_head     ibn_idle_txs;      /* idle tx descriptors */        spinlock_t           ibn_tx_lock;       /* serialise */#if IBLND_MAP_ON_DEMAND        struct ib_fmr_pool  *ibn_fmrpool;       /* FMR pool for RDMA I/O */#endif        kib_dev_t           *ibn_dev;           /* underlying IB device */} kib_net_t;typedef struct{        int                  kib_init;          /* initialisation state */        int                  kib_shutdown;      /* shut down? */        struct list_head     kib_devs;          /* IB devices extant */        atomic_t             kib_nthreads;      /* # live threads */        rwlock_t             kib_global_lock;   /* stabilize net/dev/peer/conn ops */        struct list_head    *kib_peers;         /* hash table of all my known peers */        int                  kib_peer_hash_size; /* size of kib_peers */        void                *kib_connd;         /* the connd task (serialisation assertions) */        struct list_head     kib_connd_conns;   /* connections to setup/teardown */        struct list_head     kib_connd_zombies; /* connections with zero refcount */        wait_queue_head_t    kib_connd_waitq;   /* connection daemon sleeps here */        spinlock_t           kib_connd_lock;    /* serialise */        wait_queue_head_t    kib_sched_waitq;   /* schedulers sleep here */        struct list_head     kib_sched_conns;   /* conns to check for rx completions */        spinlock_t           kib_sched_lock;    /* serialise */        __u64                kib_next_tx_cookie; /* RDMA completion cookie */        struct ib_qp_attr    kib_error_qpa;      /* QP->ERROR */} kib_data_t;#define IBLND_INIT_NOTHING         0#define IBLND_INIT_DATA            1#define IBLND_INIT_ALL             2/************************************************************************ * IB Wire message format. * These are sent in sender's byte order (i.e. receiver flips). */typedef struct kib_connparams{        __u16             ibcp_queue_depth;        __u16             ibcp_max_frags;        __u32             ibcp_max_msg_size;} WIRE_ATTR kib_connparams_t;typedef struct{        lnet_hdr_t        ibim_hdr;             /* portals header */        char              ibim_payload[0];      /* piggy-backed payload */} WIRE_ATTR kib_immediate_msg_t;#if IBLND_MAP_ON_DEMANDtypedef struct{	__u64             rd_addr;             	/* IO VMA address */	__u32             rd_nob;              	/* # of bytes */	__u32             rd_key;		/* remote key */} WIRE_ATTR kib_rdma_desc_t;#elsetypedef struct{        __u32             rf_nob;               /* # bytes this frag */        __u64             rf_addr;              /* CAVEAT EMPTOR: misaligned!! */} WIRE_ATTR kib_rdma_frag_t;typedef struct{        __u32             rd_key;               /* local/remote key */        __u32             rd_nfrags;            /* # fragments */        kib_rdma_frag_t   rd_frags[0];          /* buffer frags */} WIRE_ATTR kib_rdma_desc_t;#endif        typedef struct{        lnet_hdr_t        ibprm_hdr;            /* portals header */        __u64             ibprm_cookie;         /* opaque completion cookie */} WIRE_ATTR kib_putreq_msg_t;typedef struct{        __u64             ibpam_src_cookie;     /* reflected completion cookie */        __u64             ibpam_dst_cookie;     /* opaque completion cookie */        kib_rdma_desc_t   ibpam_rd;             /* sender's sink buffer */} WIRE_ATTR kib_putack_msg_t;typedef struct{        lnet_hdr_t        ibgm_hdr;             /* portals header */        __u64             ibgm_cookie;          /* opaque completion cookie */        kib_rdma_desc_t   ibgm_rd;              /* rdma descriptor */} WIRE_ATTR kib_get_msg_t;typedef struct{        __u64             ibcm_cookie;          /* opaque completion cookie */        __s32             ibcm_status;          /* < 0 failure: >= 0 length */} WIRE_ATTR kib_completion_msg_t;typedef struct{        /* First 2 fields fixed FOR ALL TIME */        __u32             ibm_magic;            /* I'm an openibnal message */        __u16             ibm_version;          /* this is my version number */        __u8              ibm_type;             /* msg type */        __u8              ibm_credits;          /* returned credits */        __u32             ibm_nob;              /* # bytes in whole message */        __u32             ibm_cksum;            /* checksum (0 == no checksum) */        __u64             ibm_srcnid;           /* sender's NID */        __u64             ibm_srcstamp;         /* sender's incarnation */        __u64             ibm_dstnid;           /* destination's NID */        __u64             ibm_dststamp;         /* destination's incarnation */        union {                kib_connparams_t      connparams;                kib_immediate_msg_t   immediate;                kib_putreq_msg_t      putreq;                kib_putack_msg_t      putack;                kib_get_msg_t         get;                kib_completion_msg_t  completion;        } WIRE_ATTR ibm_u;} WIRE_ATTR kib_msg_t;#define IBLND_MSG_MAGIC LNET_PROTO_IB_MAGIC	/* unique magic */#define IBLND_MSG_VERSION           0x11#define IBLND_MSG_CONNREQ           0xc0        /* connection request */#define IBLND_MSG_CONNACK           0xc1        /* connection acknowledge */#define IBLND_MSG_NOOP              0xd0        /* nothing (just credits) */#define IBLND_MSG_IMMEDIATE         0xd1        /* immediate */#define IBLND_MSG_PUT_REQ           0xd2        /* putreq (src->sink) */#define IBLND_MSG_PUT_NAK           0xd3        /* completion (sink->src) */#define IBLND_MSG_PUT_ACK           0xd4        /* putack (sink->src) */#define IBLND_MSG_PUT_DONE          0xd5        /* completion (src->sink) */#define IBLND_MSG_GET_REQ           0xd6        /* getreq (sink->src) */#define IBLND_MSG_GET_DONE          0xd7        /* completion (src->sink: all OK) */typedef struct {        __u32            ibr_magic;             /* sender's magic */        __u16            ibr_version;           /* sender's version */        __u8             ibr_why;               /* reject reason */} WIRE_ATTR kib_rej_t;/* connection rejection reasons */#define IBLND_REJECT_CONN_RACE       1          /* You lost connection race */#define IBLND_REJECT_NO_RESOURCES    2          /* Out of memory/conns etc */#define IBLND_REJECT_FATAL           3          /* Anything else *//***********************************************************************/typedef struct kib_rx                           /* receive message */{        struct list_head          rx_list;      /* queue for attention */        struct kib_conn          *rx_conn;      /* owning conn */        int                       rx_nob;       /* # bytes received (-1 while posted) */        enum ib_wc_status         rx_status;    /* completion status */        kib_msg_t                *rx_msg;       /* message buffer (host vaddr) */        __u64                     rx_msgaddr;   /* message buffer (I/O addr) */        DECLARE_PCI_UNMAP_ADDR   (rx_msgunmap); /* for dma_unmap_single() */        struct ib_recv_wr         rx_wrq;       /* receive work item... */        struct ib_sge             rx_sge;       /* ...and its memory */} kib_rx_t;#define IBLND_POSTRX_DONT_POST    0             /* don't post */#define IBLND_POSTRX_NO_CREDIT    1             /* post: no credits */#define IBLND_POSTRX_PEER_CREDIT  2             /* post: give peer back 1 credit */#define IBLND_POSTRX_RSRVD_CREDIT 3             /* post: give myself back 1 reserved credit */typedef struct kib_tx                           /* transmit message */{        struct list_head          tx_list;      /* queue on idle_txs ibc_tx_queue etc. */        struct kib_conn          *tx_conn;      /* owning conn */        int                       tx_sending;   /* # tx callbacks outstanding */        int                       tx_queued;    /* queued for sending */        int                       tx_waiting;   /* waiting for peer */        int                       tx_status;    /* LNET completion status */        unsigned long             tx_deadline;  /* completion deadline */        __u64                     tx_cookie;    /* completion cookie */        lnet_msg_t               *tx_lntmsg[2]; /* lnet msgs to finalize on completion */        kib_msg_t                *tx_msg;       /* message buffer (host vaddr) */        __u64                     tx_msgaddr;   /* message buffer (I/O addr) */        DECLARE_PCI_UNMAP_ADDR   (tx_msgunmap); /* for dma_unmap_single() */        int                       tx_nwrq;      /* # send work items */#if IBLND_MAP_ON_DEMAND        struct ib_send_wr         tx_wrq[2];    /* send work items... */        struct ib_sge             tx_sge[2];    /* ...and their memory */        kib_rdma_desc_t           tx_rd[1];     /* rdma descriptor */        __u64                    *tx_pages;     /* rdma phys page addrs */        struct ib_pool_fmr       *tx_fmr;       /* rdma mapping (mapped if != NULL) */#else        struct ib_send_wr        *tx_wrq;       /* send work items... */        struct ib_sge            *tx_sge;       /* ...and their memory */        kib_rdma_desc_t          *tx_rd;        /* rdma descriptor */        int                       tx_nfrags;    /* # entries in... */        struct scatterlist       *tx_frags;     /* dma_map_sg descriptor */        int                       tx_dmadir;    /* dma direction */#endif        

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -