📄 openiblnd.h
字号:
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (C) 2004 Cluster File Systems, Inc. * Author: Eric Barton <eric@bartonsoftware.com> * * This file is part of Lustre, http://www.lustre.org. * * Lustre is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * Lustre is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */#ifndef EXPORT_SYMTAB# define EXPORT_SYMTAB#endif#ifndef AUTOCONF_INCLUDED#include <linux/config.h>#endif#include <linux/module.h>#include <linux/kernel.h>#include <linux/mm.h>#include <linux/string.h>#include <linux/stat.h>#include <linux/errno.h>#include <linux/smp_lock.h>#include <linux/unistd.h>#include <linux/uio.h>#include <asm/system.h>#include <asm/uaccess.h>#include <asm/io.h>#include <linux/init.h>#include <linux/fs.h>#include <linux/file.h>#include <linux/stat.h>#include <linux/list.h>#include <linux/kmod.h>#include <linux/sysctl.h>#include <net/sock.h>#include <linux/in.h>#define DEBUG_SUBSYSTEM S_LND#include <libcfs/kp30.h>#include <lnet/lnet.h>#include <lnet/lib-lnet.h>#include <ts_ib_core.h>#include <ts_ib_cm.h>#include <ts_ib_sa_client.h>#ifndef USING_TSAPI/* OpenIB Gen1 */typedef struct ib_qp ib_qp_t;typedef struct ib_mr ib_mr_t;typedef struct ib_fmr ib_fmr_t;typedef struct ib_pd ib_pd_t;typedef struct ib_cq ib_cq_t;typedef struct ib_fmr_pool ib_fmr_pool_t;#else/* Cisco (topspin) */typedef void ib_qp_t;typedef void ib_mr_t;typedef void ib_fmr_t;typedef void ib_pd_t;typedef void ib_cq_t;typedef void ib_fmr_pool_t;#define IB_ACCESS_LOCAL_WRITE TS_IB_ACCESS_LOCAL_WRITE#define IB_WQ_SIGNAL_SELECTABLE TS_IB_ACCESS_LOCAL_WRITE#define IB_TRANSPORT_RC TS_IB_TRANSPORT_RC#define IB_QP_STATE_INIT TS_IB_QP_STATE_INIT#define IB_QP_ATTRIBUTE_STATE TS_IB_QP_ATTRIBUTE_STATE#define IB_QP_ATTRIBUTE_PORT TS_IB_QP_ATTRIBUTE_PORT#define IB_QP_ATTRIBUTE_PKEY_INDEX TS_IB_QP_ATTRIBUTE_PKEY_INDEX#define IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE TS_IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE#define IB_ACCESS_LOCAL_WRITE TS_IB_ACCESS_LOCAL_WRITE#define IB_ACCESS_REMOTE_WRITE TS_IB_ACCESS_REMOTE_WRITE#define IB_ACCESS_REMOTE_READ TS_IB_ACCESS_REMOTE_READ#define IB_CQ_CALLBACK_INTERRU TS_IB_CQ_CALLBACK_INTERRUPTPT#define IB_CQ_PROVIDER_REARM TS_IB_CQ_PROVIDER_REARM#define IB_CQ_CALLBACK_INTERRUPT TS_IB_CQ_CALLBACK_INTERRUPT#define IB_COMPLETION_STATUS_SUCCESS TS_IB_COMPLETION_STATUS_SUCCESS#define IB_OP_SEND TS_IB_OP_SEND#define IB_OP_RDMA_WRITE TS_IB_OP_RDMA_WRITE#define IB_OP_RDMA_READ TS_IB_OP_RDMA_READ#endif#ifdef CONFIG_SMP# define IBNAL_N_SCHED num_online_cpus() /* # schedulers */#else# define IBNAL_N_SCHED 1 /* # schedulers */#endif#define IBNAL_FMR 1//#define IBNAL_CALLBACK_CTXT IB_CQ_CALLBACK_PROCESS#define IBNAL_CALLBACK_CTXT IB_CQ_CALLBACK_INTERRUPT/* tunables fixed at compile time */#define IBNAL_PEER_HASH_SIZE 101 /* # peer lists */#define IBNAL_RESCHED 100 /* # scheduler loops before reschedule */#define IBNAL_MSG_QUEUE_SIZE 8 /* # messages/RDMAs in-flight */#define IBNAL_CREDIT_HIGHWATER 6 /* when to eagerly return credits */#define IBNAL_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */#define IBNAL_RDMA_BASE 0x0eeb0000/* QP tunables */#define IBNAL_RETRY 7 /* # times to retry */#define IBNAL_RNR_RETRY 7 /* */#define IBNAL_CM_RETRY 7 /* # times to retry connection */#define IBNAL_FLOW_CONTROL 1#define IBNAL_RESPONDER_RESOURCES 8/************************//* derived constants... *//* TX messages (shared by all connections) */#define IBNAL_TX_MSGS() (*kibnal_tunables.kib_ntx)#define IBNAL_TX_MSG_BYTES() (IBNAL_TX_MSGS() * IBNAL_MSG_SIZE)#define IBNAL_TX_MSG_PAGES() ((IBNAL_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE)/* RX messages (per connection) */#define IBNAL_RX_MSGS (IBNAL_MSG_QUEUE_SIZE * 2)#define IBNAL_RX_MSG_BYTES (IBNAL_RX_MSGS * IBNAL_MSG_SIZE)#define IBNAL_RX_MSG_PAGES ((IBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE)/* we may have up to 2 completions per transmit + 1 completion per receive, per connection */#define IBNAL_CQ_ENTRIES() ((2*IBNAL_TX_MSGS()) + \ (IBNAL_RX_MSGS * *kibnal_tunables.kib_concurrent_peers))typedef struct{ char **kib_ipif_basename; /* IPoIB interface base name */ int *kib_n_connd; /* # connection daemons */ int *kib_min_reconnect_interval; /* min connect retry seconds... */ int *kib_max_reconnect_interval; /* max connect retry seconds */ int *kib_concurrent_peers; /* max # peers */ int *kib_cksum; /* checksum kib_msg_t? */ int *kib_timeout; /* comms timeout (seconds) */ int *kib_keepalive; /* keepalive (seconds) */ int *kib_ntx; /* # tx descs */ int *kib_credits; /* # concurrent sends */ int *kib_peercredits; /* # concurrent sends to 1 peer */ cfs_sysctl_table_header_t *kib_sysctl; /* sysctl interface */} kib_tunables_t;typedef struct{ int ibp_npages; /* # pages */ int ibp_mapped; /* mapped? */ __u64 ibp_vaddr; /* mapped region vaddr */ __u32 ibp_lkey; /* mapped region lkey */ __u32 ibp_rkey; /* mapped region rkey */ ib_mr_t *ibp_handle; /* mapped region handle */ struct page *ibp_pages[0];} kib_pages_t;typedef struct{ int kib_init; /* initialisation state */ __u64 kib_incarnation; /* which one am I */ int kib_shutdown; /* shut down? */ atomic_t kib_nthreads; /* # live threads */ lnet_ni_t *kib_ni; /* _the_ openib interface */ __u64 kib_svc_id; /* service number I listen on */ tTS_IB_GID kib_svc_gid; /* device/port GID */ __u16 kib_svc_pkey; /* device/port pkey */ void *kib_listen_handle; /* IB listen handle */ rwlock_t kib_global_lock; /* stabilize peer/conn ops */ struct list_head *kib_peers; /* hash table of all my known peers */ int kib_peer_hash_size; /* size of kib_peers */ int kib_nonewpeers; /* prevent new peers? */ atomic_t kib_npeers; /* # peers extant */ atomic_t kib_nconns; /* # connections extant */ struct list_head kib_reaper_conns; /* connections to reap */ wait_queue_head_t kib_reaper_waitq; /* reaper sleeps here */ unsigned long kib_reaper_waketime; /* when reaper will wake */ spinlock_t kib_reaper_lock; /* serialise */ struct list_head kib_connd_peers; /* peers waiting for a connection */ struct list_head kib_connd_acceptq; /* accepted sockets to handle */ wait_queue_head_t kib_connd_waitq; /* connection daemons sleep here */ int kib_connd_connecting; /* # connds connecting */ spinlock_t kib_connd_lock; /* serialise */ wait_queue_head_t kib_sched_waitq; /* schedulers sleep here */ struct list_head kib_sched_txq; /* tx requiring attention */ struct list_head kib_sched_rxq; /* rx requiring attention */ spinlock_t kib_sched_lock; /* serialise */ struct kib_tx *kib_tx_descs; /* all the tx descriptors */ kib_pages_t *kib_tx_pages; /* premapped tx msg pages */ struct list_head kib_idle_txs; /* idle tx descriptors */ __u64 kib_next_tx_cookie; /* RDMA completion cookie */ spinlock_t kib_tx_lock; /* serialise */ int kib_hca_idx; /* my HCA number */ struct ib_device *kib_device; /* "the" device */ struct ib_device_properties kib_device_props; /* its properties */ int kib_port; /* port on the device */ struct ib_port_properties kib_port_props; /* its properties */ ib_pd_t *kib_pd; /* protection domain */#if IBNAL_FMR ib_fmr_pool_t *kib_fmr_pool; /* fast memory region pool */#endif ib_cq_t *kib_cq; /* completion queue */} kib_data_t;#define IBNAL_INIT_NOTHING 0#define IBNAL_INIT_DATA 1#define IBNAL_INIT_LIB 2#define IBNAL_INIT_PD 3#define IBNAL_INIT_FMR 4#define IBNAL_INIT_TXD 5#define IBNAL_INIT_CQ 6#define IBNAL_INIT_ALL 7typedef struct kib_acceptsock /* accepted socket queued for connd */{ struct list_head ibas_list; /* queue for attention */ struct socket *ibas_sock; /* the accepted socket */} kib_acceptsock_t;/************************************************************************ * IB Wire message format. * These are sent in sender's byte order (i.e. receiver flips). * They may be sent via TCP/IP (service ID,GID,PKEY query/response), * as private data in the connection request/response, or "normally". */typedef struct kib_svcrsp /* service response */{ __u64 ibsr_svc_id; /* service's id */ __u8 ibsr_svc_gid[16]; /* service's gid */ __u16 ibsr_svc_pkey; /* service's pkey */} WIRE_ATTR kib_svcrsp_t;typedef struct kib_connparams{ __u32 ibcp_queue_depth;} WIRE_ATTR kib_connparams_t;typedef struct{ union { ib_mr_t *mr; ib_fmr_t *fmr; } md_handle; __u32 md_lkey; __u32 md_rkey; __u64 md_addr;} kib_md_t;typedef struct{ __u32 rd_key; /* remote key */ __u32 rd_nob; /* # of bytes */ __u64 rd_addr; /* remote io vaddr */} WIRE_ATTR kib_rdma_desc_t;typedef struct{ lnet_hdr_t ibim_hdr; /* portals header */ char ibim_payload[0]; /* piggy-backed payload */} WIRE_ATTR kib_immediate_msg_t;typedef struct{ lnet_hdr_t ibrm_hdr; /* portals header */ __u64 ibrm_cookie; /* opaque completion cookie */ kib_rdma_desc_t ibrm_desc; /* where to suck/blow */} WIRE_ATTR kib_rdma_msg_t;typedef struct{ __u64 ibcm_cookie; /* opaque completion cookie */ __u32 ibcm_status; /* completion status */} WIRE_ATTR kib_completion_msg_t;typedef struct{ /* First 2 fields fixed FOR ALL TIME */ __u32 ibm_magic; /* I'm an openibnal message */ __u16 ibm_version; /* this is my version number */ __u8 ibm_type; /* msg type */ __u8 ibm_credits; /* returned credits */ __u32 ibm_nob; /* # bytes in whole message */ __u32 ibm_cksum; /* checksum (0 == no checksum) */ __u64 ibm_srcnid; /* sender's NID */ __u64 ibm_srcstamp; /* sender's incarnation */ __u64 ibm_dstnid; /* destination's NID */ __u64 ibm_dststamp; /* destination's incarnation */ union { kib_svcrsp_t svcrsp; kib_connparams_t connparams; kib_immediate_msg_t immediate; kib_rdma_msg_t rdma; kib_completion_msg_t completion; } WIRE_ATTR ibm_u;} WIRE_ATTR kib_msg_t;#define IBNAL_MSG_MAGIC LNET_PROTO_OPENIB_MAGIC /* unique magic */#define IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD 2 /* previous protocol version */#define IBNAL_MSG_VERSION 3 /* current protocol version */#define IBNAL_MSG_SVCQRY 0xb0 /* service query */#define IBNAL_MSG_SVCRSP 0xb1 /* service response */#define IBNAL_MSG_CONNREQ 0xc0 /* connection request */#define IBNAL_MSG_CONNACK 0xc1 /* connection acknowledge */#define IBNAL_MSG_NOOP 0xd0 /* nothing (just credits) */#define IBNAL_MSG_IMMEDIATE 0xd1 /* portals hdr + payload */#define IBNAL_MSG_PUT_RDMA 0xd2 /* portals PUT hdr + source rdma desc */#define IBNAL_MSG_PUT_DONE 0xd3 /* signal PUT rdma completion */#define IBNAL_MSG_GET_RDMA 0xd4 /* portals GET hdr + sink rdma desc */#define IBNAL_MSG_GET_DONE 0xd5 /* signal GET rdma completion *//***********************************************************************/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -