📄 ralnd.c
字号:
if (data->ioc_nid == ni->ni_nid) { rc = 0; } else { CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", libcfs_nid2str(data->ioc_nid), libcfs_nid2str(ni->ni_nid)); rc = -EINVAL; } break; } } return rc;}voidkranal_free_txdescs(struct list_head *freelist){ kra_tx_t *tx; while (!list_empty(freelist)) { tx = list_entry(freelist->next, kra_tx_t, tx_list); list_del(&tx->tx_list); LIBCFS_FREE(tx->tx_phys, LNET_MAX_IOV * sizeof(*tx->tx_phys)); LIBCFS_FREE(tx, sizeof(*tx)); }}intkranal_alloc_txdescs(struct list_head *freelist, int n){ int i; kra_tx_t *tx; LASSERT (freelist == &kranal_data.kra_idle_txs); LASSERT (list_empty(freelist)); for (i = 0; i < n; i++) { LIBCFS_ALLOC(tx, sizeof(*tx)); if (tx == NULL) { CERROR("Can't allocate tx[%d]\n", i); kranal_free_txdescs(freelist); return -ENOMEM; } LIBCFS_ALLOC(tx->tx_phys, LNET_MAX_IOV * sizeof(*tx->tx_phys)); if (tx->tx_phys == NULL) { CERROR("Can't allocate tx[%d]->tx_phys\n", i); LIBCFS_FREE(tx, sizeof(*tx)); kranal_free_txdescs(freelist); return -ENOMEM; } tx->tx_buftype = RANAL_BUF_NONE; tx->tx_msg.ram_type = RANAL_MSG_NONE; list_add(&tx->tx_list, freelist); } return 0;}intkranal_device_init(int id, kra_device_t *dev){ int total_ntx = *kranal_tunables.kra_ntx; RAP_RETURN rrc; dev->rad_id = id; rrc = RapkGetDeviceByIndex(id, kranal_device_callback, &dev->rad_handle); if (rrc != RAP_SUCCESS) { CERROR("Can't get Rapidarray Device %d: %d\n", id, rrc); goto failed_0; } rrc = RapkReserveRdma(dev->rad_handle, total_ntx); if (rrc != RAP_SUCCESS) { CERROR("Can't reserve %d RDMA descriptors" " for device %d: %d\n", total_ntx, id, rrc); goto failed_1; } rrc = RapkCreateCQ(dev->rad_handle, total_ntx, RAP_CQTYPE_SEND, &dev->rad_rdma_cqh); if (rrc != RAP_SUCCESS) { CERROR("Can't create rdma cq size %d for device %d: %d\n", total_ntx, id, rrc); goto failed_1; } rrc = RapkCreateCQ(dev->rad_handle, *kranal_tunables.kra_fma_cq_size, RAP_CQTYPE_RECV, &dev->rad_fma_cqh); if (rrc != RAP_SUCCESS) { CERROR("Can't create fma cq size %d for device %d: %d\n", *kranal_tunables.kra_fma_cq_size, id, rrc); goto failed_2; } return 0; failed_2: RapkDestroyCQ(dev->rad_handle, dev->rad_rdma_cqh); failed_1: RapkReleaseDevice(dev->rad_handle); failed_0: return -ENODEV;}voidkranal_device_fini(kra_device_t *dev){ LASSERT (list_empty(&dev->rad_ready_conns)); LASSERT (list_empty(&dev->rad_new_conns)); LASSERT (dev->rad_nphysmap == 0); LASSERT (dev->rad_nppphysmap == 0); LASSERT (dev->rad_nvirtmap == 0); LASSERT (dev->rad_nobvirtmap == 0); LASSERT(dev->rad_scheduler == NULL); RapkDestroyCQ(dev->rad_handle, dev->rad_fma_cqh); RapkDestroyCQ(dev->rad_handle, dev->rad_rdma_cqh); RapkReleaseDevice(dev->rad_handle);}voidkranal_shutdown (lnet_ni_t *ni){ int i; unsigned long flags; CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", atomic_read(&libcfs_kmemory)); LASSERT (ni == kranal_data.kra_ni); LASSERT (ni->ni_data == &kranal_data); switch (kranal_data.kra_init) { default: CERROR("Unexpected state %d\n", kranal_data.kra_init); LBUG(); case RANAL_INIT_ALL: /* Prevent new peers from being created */ write_lock_irqsave(&kranal_data.kra_global_lock, flags); kranal_data.kra_nonewpeers = 1; write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); /* Remove all existing peers from the peer table */ kranal_del_peer(LNET_NID_ANY); /* Wait for pending conn reqs to be handled */ i = 2; spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); while (!list_empty(&kranal_data.kra_connd_acceptq)) { spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); i++; CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */ "waiting for conn reqs to clean up\n"); cfs_pause(cfs_time_seconds(1)); spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); } spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); /* Wait for all peers to be freed */ i = 2; while (atomic_read(&kranal_data.kra_npeers) != 0) { i++; CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */ "waiting for %d peers to close down\n", atomic_read(&kranal_data.kra_npeers)); cfs_pause(cfs_time_seconds(1)); } /* fall through */ case RANAL_INIT_DATA: break; } /* Peer state all cleaned up BEFORE setting shutdown, so threads don't * have to worry about shutdown races. NB connections may be created * while there are still active connds, but these will be temporary * since peer creation always fails after the listener has started to * shut down. */ LASSERT (atomic_read(&kranal_data.kra_npeers) == 0); /* Flag threads to terminate */ kranal_data.kra_shutdown = 1; for (i = 0; i < kranal_data.kra_ndevs; i++) { kra_device_t *dev = &kranal_data.kra_devices[i]; spin_lock_irqsave(&dev->rad_lock, flags); wake_up(&dev->rad_waitq); spin_unlock_irqrestore(&dev->rad_lock, flags); } spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags); wake_up_all(&kranal_data.kra_reaper_waitq); spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags); LASSERT (list_empty(&kranal_data.kra_connd_peers)); spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); wake_up_all(&kranal_data.kra_connd_waitq); spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); /* Wait for threads to exit */ i = 2; while (atomic_read(&kranal_data.kra_nthreads) != 0) { i++; CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ "Waiting for %d threads to terminate\n", atomic_read(&kranal_data.kra_nthreads)); cfs_pause(cfs_time_seconds(1)); } LASSERT (atomic_read(&kranal_data.kra_npeers) == 0); if (kranal_data.kra_peers != NULL) { for (i = 0; i < kranal_data.kra_peer_hash_size; i++) LASSERT (list_empty(&kranal_data.kra_peers[i])); LIBCFS_FREE(kranal_data.kra_peers, sizeof (struct list_head) * kranal_data.kra_peer_hash_size); } LASSERT (atomic_read(&kranal_data.kra_nconns) == 0); if (kranal_data.kra_conns != NULL) { for (i = 0; i < kranal_data.kra_conn_hash_size; i++) LASSERT (list_empty(&kranal_data.kra_conns[i])); LIBCFS_FREE(kranal_data.kra_conns, sizeof (struct list_head) * kranal_data.kra_conn_hash_size); } for (i = 0; i < kranal_data.kra_ndevs; i++) kranal_device_fini(&kranal_data.kra_devices[i]); kranal_free_txdescs(&kranal_data.kra_idle_txs); CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", atomic_read(&libcfs_kmemory)); kranal_data.kra_init = RANAL_INIT_NOTHING; PORTAL_MODULE_UNUSE;}intkranal_startup (lnet_ni_t *ni){ struct timeval tv; int pkmem = atomic_read(&libcfs_kmemory); int rc; int i; kra_device_t *dev; LASSERT (ni->ni_lnd == &the_kralnd); /* Only 1 instance supported */ if (kranal_data.kra_init != RANAL_INIT_NOTHING) { CERROR ("Only 1 instance supported\n"); return -EPERM; } if (lnet_set_ip_niaddr(ni) != 0) { CERROR ("Can't determine my NID\n"); return -EPERM; } if (*kranal_tunables.kra_credits > *kranal_tunables.kra_ntx) { CERROR ("Can't set credits(%d) > ntx(%d)\n", *kranal_tunables.kra_credits, *kranal_tunables.kra_ntx); return -EINVAL; } memset(&kranal_data, 0, sizeof(kranal_data)); /* zero pointers, flags etc */ ni->ni_maxtxcredits = *kranal_tunables.kra_credits; ni->ni_peertxcredits = *kranal_tunables.kra_peercredits; ni->ni_data = &kranal_data; kranal_data.kra_ni = ni; /* CAVEAT EMPTOR: Every 'Fma' message includes the sender's NID and * a unique (for all time) connstamp so we can uniquely identify * the sender. The connstamp is an incrementing counter * initialised with seconds + microseconds at startup time. So we * rely on NOT creating connections more frequently on average than * 1MHz to ensure we don't use old connstamps when we reboot. */ do_gettimeofday(&tv); kranal_data.kra_connstamp = kranal_data.kra_peerstamp = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; rwlock_init(&kranal_data.kra_global_lock); for (i = 0; i < RANAL_MAXDEVS; i++ ) { kra_device_t *dev = &kranal_data.kra_devices[i]; dev->rad_idx = i; INIT_LIST_HEAD(&dev->rad_ready_conns); INIT_LIST_HEAD(&dev->rad_new_conns); init_waitqueue_head(&dev->rad_waitq); spin_lock_init(&dev->rad_lock); } kranal_data.kra_new_min_timeout = MAX_SCHEDULE_TIMEOUT; init_waitqueue_head(&kranal_data.kra_reaper_waitq); spin_lock_init(&kranal_data.kra_reaper_lock); INIT_LIST_HEAD(&kranal_data.kra_connd_acceptq); INIT_LIST_HEAD(&kranal_data.kra_connd_peers); init_waitqueue_head(&kranal_data.kra_connd_waitq); spin_lock_init(&kranal_data.kra_connd_lock); INIT_LIST_HEAD(&kranal_data.kra_idle_txs); spin_lock_init(&kranal_data.kra_tx_lock); /* OK to call kranal_api_shutdown() to cleanup now */ kranal_data.kra_init = RANAL_INIT_DATA; PORTAL_MODULE_USE; kranal_data.kra_peer_hash_size = RANAL_PEER_HASH_SIZE; LIBCFS_ALLOC(kranal_data.kra_peers, sizeof(struct list_head) * kranal_data.kra_peer_hash_size); if (kranal_data.kra_peers == NULL) goto failed; for (i = 0; i < kranal_data.kra_peer_hash_size; i++) INIT_LIST_HEAD(&kranal_data.kra_peers[i]); kranal_data.kra_conn_hash_size = RANAL_PEER_HASH_SIZE; LIBCFS_ALLOC(kranal_data.kra_conns, sizeof(struct list_head) * kranal_data.kra_conn_hash_size); if (kranal_data.kra_conns == NULL) goto failed; for (i = 0; i < kranal_data.kra_conn_hash_size; i++) INIT_LIST_HEAD(&kranal_data.kra_conns[i]); rc = kranal_alloc_txdescs(&kranal_data.kra_idle_txs, *kranal_tunables.kra_ntx); if (rc != 0) goto failed; rc = kranal_thread_start(kranal_reaper, NULL); if (rc != 0) { CERROR("Can't spawn ranal reaper: %d\n", rc); goto failed; } for (i = 0; i < *kranal_tunables.kra_n_connd; i++) { rc = kranal_thread_start(kranal_connd, (void *)(unsigned long)i); if (rc != 0) { CERROR("Can't spawn ranal connd[%d]: %d\n", i, rc); goto failed; } } LASSERT (kranal_data.kra_ndevs == 0); /* Use all available RapidArray devices */ for (i = 0; i < RANAL_MAXDEVS; i++) { dev = &kranal_data.kra_devices[kranal_data.kra_ndevs]; rc = kranal_device_init(kranal_devids[i], dev); if (rc == 0) kranal_data.kra_ndevs++; } if (kranal_data.kra_ndevs == 0) { CERROR("Can't initialise any RapidArray devices\n"); goto failed; } for (i = 0; i < kranal_data.kra_ndevs; i++) { dev = &kranal_data.kra_devices[i]; rc = kranal_thread_start(kranal_scheduler, dev); if (rc != 0) { CERROR("Can't spawn ranal scheduler[%d]: %d\n", i, rc); goto failed; } } /* flag everything initialised */ kranal_data.kra_init = RANAL_INIT_ALL; /*****************************************************/ CDEBUG(D_MALLOC, "initial kmem %d\n", pkmem); return 0; failed: kranal_shutdown(ni); return -ENETDOWN;}void __exitkranal_module_fini (void){ lnet_unregister_lnd(&the_kralnd); kranal_tunables_fini();}int __initkranal_module_init (void){ int rc; rc = kranal_tunables_init(); if (rc != 0) return rc; lnet_register_lnd(&the_kralnd); return 0;}MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");MODULE_DESCRIPTION("Kernel RapidArray LND v0.01");MODULE_LICENSE("GPL");module_init(kranal_module_init);module_exit(kranal_module_fini);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -