📄 o2iblnd.h
字号:
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (C) 2006 Cluster File Systems, Inc. * Author: Eric Barton <eric@bartonsoftware.com> * * This file is part of Lustre, http://www.lustre.org. * * Lustre is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * Lustre is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */#ifndef EXPORT_SYMTAB# define EXPORT_SYMTAB#endif#ifndef AUTOCONF_INCLUDED#include <linux/config.h>#endif#include <linux/module.h>#include <linux/kernel.h>#include <linux/mm.h>#include <linux/string.h>#include <linux/stat.h>#include <linux/errno.h>#include <linux/smp_lock.h>#include <linux/unistd.h>#include <linux/uio.h>#include <asm/system.h>#include <asm/uaccess.h>#include <asm/io.h>#include <linux/init.h>#include <linux/fs.h>#include <linux/file.h>#include <linux/stat.h>#include <linux/list.h>#include <linux/kmod.h>#include <linux/sysctl.h>#include <linux/random.h>#include <net/sock.h>#include <linux/in.h>#define DEBUG_SUBSYSTEM S_LND#include <libcfs/kp30.h>#include <lnet/lnet.h>#include <lnet/lib-lnet.h>#if !HAVE_GFP_Ttypedef int gfp_t;#endif#include <rdma/rdma_cm.h>#include <rdma/ib_cm.h>#include <rdma/ib_verbs.h>#include <rdma/ib_fmr_pool.h>/* tunables fixed at compile time */#ifdef CONFIG_SMP# define IBLND_N_SCHED num_online_cpus() /* # schedulers */#else# define IBLND_N_SCHED 1 /* # schedulers */#endif#define IBLND_PEER_HASH_SIZE 101 /* # peer lists */#define IBLND_RESCHED 100 /* # scheduler loops before reschedule */#define IBLND_MSG_QUEUE_SIZE 8 /* # messages/RDMAs in-flight */#define IBLND_CREDIT_HIGHWATER 7 /* when eagerly to return credits */#define IBLND_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */#define IBLND_MAP_ON_DEMAND 0#if IBLND_MAP_ON_DEMAND# define IBLND_MAX_RDMA_FRAGS 1#else# define IBLND_MAX_RDMA_FRAGS LNET_MAX_IOV#endif/************************//* derived constants... *//* TX messages (shared by all connections) */#define IBLND_TX_MSGS() (*kiblnd_tunables.kib_ntx)#define IBLND_TX_MSG_BYTES() (IBLND_TX_MSGS() * IBLND_MSG_SIZE)#define IBLND_TX_MSG_PAGES() ((IBLND_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE)/* RX messages (per connection) */#define IBLND_RX_MSGS (IBLND_MSG_QUEUE_SIZE*2)#define IBLND_RX_MSG_BYTES (IBLND_RX_MSGS * IBLND_MSG_SIZE)#define IBLND_RX_MSG_PAGES ((IBLND_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE)#define IBLND_CQ_ENTRIES() (IBLND_RX_MSGS + \ (*kiblnd_tunables.kib_concurrent_sends) * \ (1 + IBLND_MAX_RDMA_FRAGS))typedef struct{ unsigned int *kib_service; /* IB service number */ int *kib_min_reconnect_interval; /* first failed connection retry... */ int *kib_max_reconnect_interval; /* ...exponentially increasing to this */ int *kib_cksum; /* checksum kib_msg_t? */ int *kib_timeout; /* comms timeout (seconds) */ int *kib_keepalive; /* keepalive timeout (seconds) */ int *kib_ntx; /* # tx descs */ int *kib_credits; /* # concurrent sends */ int *kib_peercredits; /* # concurrent sends to 1 peer */ char **kib_default_ipif; /* default IPoIB interface */ int *kib_retry_count; int *kib_rnr_retry_count; int *kib_concurrent_sends; /* send work queue sizing */ int *kib_ib_mtu; /* IB MTU */#if IBLND_MAP_ON_DEMAND int *kib_fmr_pool_size; /* # FMRs in pool */ int *kib_fmr_flush_trigger; /* When to trigger FMR flush */ int *kib_fmr_cache; /* enable FMR pool cache? */#endif#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM cfs_sysctl_table_header_t *kib_sysctl; /* sysctl interface */#endif} kib_tunables_t;typedef struct{ int ibp_npages; /* # pages */ struct page *ibp_pages[0];} kib_pages_t;typedef struct { struct list_head ibd_list; /* chain on kib_devs */ __u32 ibd_ifip; /* IPoIB interface IP */ char ibd_ifname[32]; /* IPoIB interface name */ int ibd_nnets; /* # nets extant */ struct rdma_cm_id *ibd_cmid; /* IB listener (bound to 1 device) */ struct ib_pd *ibd_pd; /* PD for the device */ struct ib_mr *ibd_mr; /* MR for non RDMA I/O */} kib_dev_t;typedef struct{ __u64 ibn_incarnation; /* my epoch */ int ibn_init; /* initialisation state */ int ibn_shutdown; /* shutting down? */ atomic_t ibn_npeers; /* # peers extant */ atomic_t ibn_nconns; /* # connections extant */ struct kib_tx *ibn_tx_descs; /* all the tx descriptors */ kib_pages_t *ibn_tx_pages; /* premapped tx msg pages */ struct list_head ibn_idle_txs; /* idle tx descriptors */ spinlock_t ibn_tx_lock; /* serialise */#if IBLND_MAP_ON_DEMAND struct ib_fmr_pool *ibn_fmrpool; /* FMR pool for RDMA I/O */#endif kib_dev_t *ibn_dev; /* underlying IB device */} kib_net_t;typedef struct{ int kib_init; /* initialisation state */ int kib_shutdown; /* shut down? */ struct list_head kib_devs; /* IB devices extant */ atomic_t kib_nthreads; /* # live threads */ rwlock_t kib_global_lock; /* stabilize net/dev/peer/conn ops */ struct list_head *kib_peers; /* hash table of all my known peers */ int kib_peer_hash_size; /* size of kib_peers */ void *kib_connd; /* the connd task (serialisation assertions) */ struct list_head kib_connd_conns; /* connections to setup/teardown */ struct list_head kib_connd_zombies; /* connections with zero refcount */ wait_queue_head_t kib_connd_waitq; /* connection daemon sleeps here */ spinlock_t kib_connd_lock; /* serialise */ wait_queue_head_t kib_sched_waitq; /* schedulers sleep here */ struct list_head kib_sched_conns; /* conns to check for rx completions */ spinlock_t kib_sched_lock; /* serialise */ __u64 kib_next_tx_cookie; /* RDMA completion cookie */ struct ib_qp_attr kib_error_qpa; /* QP->ERROR */} kib_data_t;#define IBLND_INIT_NOTHING 0#define IBLND_INIT_DATA 1#define IBLND_INIT_ALL 2/************************************************************************ * IB Wire message format. * These are sent in sender's byte order (i.e. receiver flips). */typedef struct kib_connparams{ __u16 ibcp_queue_depth; __u16 ibcp_max_frags; __u32 ibcp_max_msg_size;} WIRE_ATTR kib_connparams_t;typedef struct{ lnet_hdr_t ibim_hdr; /* portals header */ char ibim_payload[0]; /* piggy-backed payload */} WIRE_ATTR kib_immediate_msg_t;#if IBLND_MAP_ON_DEMANDtypedef struct{ __u64 rd_addr; /* IO VMA address */ __u32 rd_nob; /* # of bytes */ __u32 rd_key; /* remote key */} WIRE_ATTR kib_rdma_desc_t;#elsetypedef struct{ __u32 rf_nob; /* # bytes this frag */ __u64 rf_addr; /* CAVEAT EMPTOR: misaligned!! */} WIRE_ATTR kib_rdma_frag_t;typedef struct{ __u32 rd_key; /* local/remote key */ __u32 rd_nfrags; /* # fragments */ kib_rdma_frag_t rd_frags[0]; /* buffer frags */} WIRE_ATTR kib_rdma_desc_t;#endif typedef struct{ lnet_hdr_t ibprm_hdr; /* portals header */ __u64 ibprm_cookie; /* opaque completion cookie */} WIRE_ATTR kib_putreq_msg_t;typedef struct{ __u64 ibpam_src_cookie; /* reflected completion cookie */ __u64 ibpam_dst_cookie; /* opaque completion cookie */ kib_rdma_desc_t ibpam_rd; /* sender's sink buffer */} WIRE_ATTR kib_putack_msg_t;typedef struct{ lnet_hdr_t ibgm_hdr; /* portals header */ __u64 ibgm_cookie; /* opaque completion cookie */ kib_rdma_desc_t ibgm_rd; /* rdma descriptor */} WIRE_ATTR kib_get_msg_t;typedef struct{ __u64 ibcm_cookie; /* opaque completion cookie */ __s32 ibcm_status; /* < 0 failure: >= 0 length */} WIRE_ATTR kib_completion_msg_t;typedef struct{ /* First 2 fields fixed FOR ALL TIME */ __u32 ibm_magic; /* I'm an openibnal message */ __u16 ibm_version; /* this is my version number */ __u8 ibm_type; /* msg type */ __u8 ibm_credits; /* returned credits */ __u32 ibm_nob; /* # bytes in whole message */ __u32 ibm_cksum; /* checksum (0 == no checksum) */ __u64 ibm_srcnid; /* sender's NID */ __u64 ibm_srcstamp; /* sender's incarnation */ __u64 ibm_dstnid; /* destination's NID */ __u64 ibm_dststamp; /* destination's incarnation */ union { kib_connparams_t connparams; kib_immediate_msg_t immediate; kib_putreq_msg_t putreq; kib_putack_msg_t putack; kib_get_msg_t get; kib_completion_msg_t completion; } WIRE_ATTR ibm_u;} WIRE_ATTR kib_msg_t;#define IBLND_MSG_MAGIC LNET_PROTO_IB_MAGIC /* unique magic */#define IBLND_MSG_VERSION 0x11#define IBLND_MSG_CONNREQ 0xc0 /* connection request */#define IBLND_MSG_CONNACK 0xc1 /* connection acknowledge */#define IBLND_MSG_NOOP 0xd0 /* nothing (just credits) */#define IBLND_MSG_IMMEDIATE 0xd1 /* immediate */#define IBLND_MSG_PUT_REQ 0xd2 /* putreq (src->sink) */#define IBLND_MSG_PUT_NAK 0xd3 /* completion (sink->src) */#define IBLND_MSG_PUT_ACK 0xd4 /* putack (sink->src) */#define IBLND_MSG_PUT_DONE 0xd5 /* completion (src->sink) */#define IBLND_MSG_GET_REQ 0xd6 /* getreq (sink->src) */#define IBLND_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */typedef struct { __u32 ibr_magic; /* sender's magic */ __u16 ibr_version; /* sender's version */ __u8 ibr_why; /* reject reason */} WIRE_ATTR kib_rej_t;/* connection rejection reasons */#define IBLND_REJECT_CONN_RACE 1 /* You lost connection race */#define IBLND_REJECT_NO_RESOURCES 2 /* Out of memory/conns etc */#define IBLND_REJECT_FATAL 3 /* Anything else *//***********************************************************************/typedef struct kib_rx /* receive message */{ struct list_head rx_list; /* queue for attention */ struct kib_conn *rx_conn; /* owning conn */ int rx_nob; /* # bytes received (-1 while posted) */ enum ib_wc_status rx_status; /* completion status */ kib_msg_t *rx_msg; /* message buffer (host vaddr) */ __u64 rx_msgaddr; /* message buffer (I/O addr) */ DECLARE_PCI_UNMAP_ADDR (rx_msgunmap); /* for dma_unmap_single() */ struct ib_recv_wr rx_wrq; /* receive work item... */ struct ib_sge rx_sge; /* ...and its memory */} kib_rx_t;#define IBLND_POSTRX_DONT_POST 0 /* don't post */#define IBLND_POSTRX_NO_CREDIT 1 /* post: no credits */#define IBLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */#define IBLND_POSTRX_RSRVD_CREDIT 3 /* post: give myself back 1 reserved credit */typedef struct kib_tx /* transmit message */{ struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */ struct kib_conn *tx_conn; /* owning conn */ int tx_sending; /* # tx callbacks outstanding */ int tx_queued; /* queued for sending */ int tx_waiting; /* waiting for peer */ int tx_status; /* LNET completion status */ unsigned long tx_deadline; /* completion deadline */ __u64 tx_cookie; /* completion cookie */ lnet_msg_t *tx_lntmsg[2]; /* lnet msgs to finalize on completion */ kib_msg_t *tx_msg; /* message buffer (host vaddr) */ __u64 tx_msgaddr; /* message buffer (I/O addr) */ DECLARE_PCI_UNMAP_ADDR (tx_msgunmap); /* for dma_unmap_single() */ int tx_nwrq; /* # send work items */#if IBLND_MAP_ON_DEMAND struct ib_send_wr tx_wrq[2]; /* send work items... */ struct ib_sge tx_sge[2]; /* ...and their memory */ kib_rdma_desc_t tx_rd[1]; /* rdma descriptor */ __u64 *tx_pages; /* rdma phys page addrs */ struct ib_pool_fmr *tx_fmr; /* rdma mapping (mapped if != NULL) */#else struct ib_send_wr *tx_wrq; /* send work items... */ struct ib_sge *tx_sge; /* ...and their memory */ kib_rdma_desc_t *tx_rd; /* rdma descriptor */ int tx_nfrags; /* # entries in... */ struct scatterlist *tx_frags; /* dma_map_sg descriptor */ int tx_dmadir; /* dma direction */#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -