📄 o2iblnd.c
字号:
for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { LASSERT (list_empty(&kiblnd_data.kib_peers[i])); } LASSERT (list_empty(&kiblnd_data.kib_connd_zombies)); LASSERT (list_empty(&kiblnd_data.kib_connd_conns)); /* flag threads to terminate; wake and wait for them to die */ kiblnd_data.kib_shutdown = 1; wake_up_all(&kiblnd_data.kib_sched_waitq); wake_up_all(&kiblnd_data.kib_connd_waitq); i = 2; while (atomic_read(&kiblnd_data.kib_nthreads) != 0) { i++; CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ "Waiting for %d threads to terminate\n", atomic_read(&kiblnd_data.kib_nthreads)); cfs_pause(cfs_time_seconds(1)); } /* fall through */ case IBLND_INIT_NOTHING: break; } if (kiblnd_data.kib_peers != NULL) LIBCFS_FREE(kiblnd_data.kib_peers, sizeof(struct list_head) * kiblnd_data.kib_peer_hash_size); CDEBUG(D_MALLOC, "after LND base cleanup: kmem %d\n", atomic_read(&libcfs_kmemory)); kiblnd_data.kib_init = IBLND_INIT_NOTHING; PORTAL_MODULE_UNUSE;}voidkiblnd_shutdown (lnet_ni_t *ni){ kib_net_t *net = ni->ni_data; rwlock_t *g_lock = &kiblnd_data.kib_global_lock; int i; unsigned long flags; LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL); if (net == NULL) goto out; CDEBUG(D_MALLOC, "before LND net cleanup: kmem %d\n", atomic_read(&libcfs_kmemory)); write_lock_irqsave(g_lock, flags); net->ibn_shutdown = 1; write_unlock_irqrestore(g_lock, flags); switch (net->ibn_init) { default: LBUG(); case IBLND_INIT_ALL: /* nuke all existing peers within this net */ kiblnd_del_peer(ni, LNET_NID_ANY); /* Wait for all peer state to clean up */ i = 2; while (atomic_read(&net->ibn_npeers) != 0) { i++; CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */ "%s: waiting for %d peers to disconnect\n", libcfs_nid2str(ni->ni_nid), atomic_read(&net->ibn_npeers)); cfs_pause(cfs_time_seconds(1)); } kiblnd_unmap_tx_descs(ni); LASSERT (net->ibn_dev->ibd_nnets > 0); net->ibn_dev->ibd_nnets--; /* fall through */ case IBLND_INIT_NOTHING: LASSERT (atomic_read(&net->ibn_nconns) == 0);#if IBLND_MAP_ON_DEMAND if (net->ibn_fmrpool != NULL) ib_destroy_fmr_pool(net->ibn_fmrpool);#endif if (net->ibn_dev != NULL && net->ibn_dev->ibd_nnets == 0) kiblnd_destroy_dev(net->ibn_dev); break; } kiblnd_free_tx_descs(ni); CDEBUG(D_MALLOC, "after LND net cleanup: kmem %d\n", atomic_read(&libcfs_kmemory)); net->ibn_init = IBLND_INIT_NOTHING; ni->ni_data = NULL; LIBCFS_FREE(net, sizeof(*net));out: if (list_empty(&kiblnd_data.kib_devs)) kiblnd_base_shutdown(); return;}intkiblnd_base_startup (void){ int rc; int i; LASSERT (kiblnd_data.kib_init == IBLND_INIT_NOTHING); if (*kiblnd_tunables.kib_credits > *kiblnd_tunables.kib_ntx) { CERROR("Can't set credits(%d) > ntx(%d)\n", *kiblnd_tunables.kib_credits, *kiblnd_tunables.kib_ntx); return -EINVAL; } PORTAL_MODULE_USE; memset(&kiblnd_data, 0, sizeof(kiblnd_data)); /* zero pointers, flags etc */ rwlock_init(&kiblnd_data.kib_global_lock); INIT_LIST_HEAD(&kiblnd_data.kib_devs); kiblnd_data.kib_peer_hash_size = IBLND_PEER_HASH_SIZE; LIBCFS_ALLOC(kiblnd_data.kib_peers, sizeof(struct list_head) * kiblnd_data.kib_peer_hash_size); if (kiblnd_data.kib_peers == NULL) { goto failed; } for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) INIT_LIST_HEAD(&kiblnd_data.kib_peers[i]); spin_lock_init(&kiblnd_data.kib_connd_lock); INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns); INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies); init_waitqueue_head(&kiblnd_data.kib_connd_waitq); spin_lock_init(&kiblnd_data.kib_sched_lock); INIT_LIST_HEAD(&kiblnd_data.kib_sched_conns); init_waitqueue_head(&kiblnd_data.kib_sched_waitq); kiblnd_data.kib_error_qpa.qp_state = IB_QPS_ERR; /* lists/ptrs/locks initialised */ kiblnd_data.kib_init = IBLND_INIT_DATA; /*****************************************************/ for (i = 0; i < IBLND_N_SCHED; i++) { rc = kiblnd_thread_start(kiblnd_scheduler, (void *)((long)i)); if (rc != 0) { CERROR("Can't spawn o2iblnd scheduler[%d]: %d\n", i, rc); goto failed; } } rc = kiblnd_thread_start(kiblnd_connd, NULL); if (rc != 0) { CERROR("Can't spawn o2iblnd connd: %d\n", rc); goto failed; } /* flag everything initialised */ kiblnd_data.kib_init = IBLND_INIT_ALL; /*****************************************************/ return 0; failed: kiblnd_base_shutdown(); return -ENETDOWN;}intkiblnd_startup (lnet_ni_t *ni){ char *ifname; kib_net_t *net; kib_dev_t *ibdev; struct list_head *tmp; struct timeval tv; int rc; LASSERT (ni->ni_lnd == &the_kiblnd); if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) { rc = kiblnd_base_startup(); if (rc != 0) return rc; } LIBCFS_ALLOC(net, sizeof(*net)); ni->ni_data = net; if (net == NULL) goto failed; memset(net, 0, sizeof(*net)); do_gettimeofday(&tv); net->ibn_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; ni->ni_maxtxcredits = *kiblnd_tunables.kib_credits; ni->ni_peertxcredits = *kiblnd_tunables.kib_peercredits; spin_lock_init(&net->ibn_tx_lock); INIT_LIST_HEAD(&net->ibn_idle_txs); rc = kiblnd_alloc_tx_descs(ni); if (rc != 0) { CERROR("Can't allocate tx descs\n"); goto failed; } if (ni->ni_interfaces[0] != NULL) { /* Use the IPoIB interface specified in 'networks=' */ CLASSERT (LNET_MAX_INTERFACES > 1); if (ni->ni_interfaces[1] != NULL) { CERROR("Multiple interfaces not supported\n"); goto failed; } ifname = ni->ni_interfaces[0]; } else { ifname = *kiblnd_tunables.kib_default_ipif; } if (strlen(ifname) >= sizeof(ibdev->ibd_ifname)) { CERROR("IPoIB interface name too long: %s\n", ifname); goto failed; } ibdev = NULL; list_for_each (tmp, &kiblnd_data.kib_devs) { ibdev = list_entry(tmp, kib_dev_t, ibd_list); if (!strcmp(&ibdev->ibd_ifname[0], ifname)) break; ibdev = NULL; } if (ibdev == NULL) { __u32 ip; __u32 netmask; int up; struct rdma_cm_id *id; struct ib_pd *pd; struct ib_mr *mr; struct sockaddr_in addr; rc = libcfs_ipif_query(ifname, &up, &ip, &netmask); if (rc != 0) { CERROR("Can't query IPoIB interface %s: %d\n", ifname, rc); goto failed; } if (!up) { CERROR("Can't query IPoIB interface %s: it's down\n", ifname); goto failed; } LIBCFS_ALLOC(ibdev, sizeof(*ibdev)); if (ibdev == NULL) goto failed; memset(ibdev, 0, sizeof(*ibdev)); INIT_LIST_HEAD(&ibdev->ibd_list); /* not yet in kib_devs */ ibdev->ibd_ifip = ip; strcpy(&ibdev->ibd_ifname[0], ifname); id = rdma_create_id(kiblnd_cm_callback, ibdev, RDMA_PS_TCP); if (!IS_ERR(id)) { ibdev->ibd_cmid = id; } else { CERROR("Can't create listen ID: %ld\n", PTR_ERR(id)); goto failed; } memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_port = htons(*kiblnd_tunables.kib_service); addr.sin_addr.s_addr = htonl(ip); rc = rdma_bind_addr(id, (struct sockaddr *)&addr); if (rc != 0) { CERROR("Can't bind to %s: %d\n", ifname, rc); goto failed; } /* Binding should have assigned me an IB device */ LASSERT (id->device != NULL); pd = ib_alloc_pd(id->device); if (!IS_ERR(pd)) { ibdev->ibd_pd = pd; } else { CERROR("Can't allocate PD: %ld\n", PTR_ERR(pd)); goto failed; }#if IBLND_MAP_ON_DEMAND /* MR for sends and receives */ mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);#else /* MR for sends, recieves _and_ RDMA...........v */ mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);#endif if (!IS_ERR(mr)) { ibdev->ibd_mr = mr; } else { CERROR("Can't get MR: %ld\n", PTR_ERR(pd)); goto failed; } rc = rdma_listen(id, 0); if (rc != 0) { CERROR("Can't start listener: %d\n", rc); goto failed; } list_add_tail(&ibdev->ibd_list, &kiblnd_data.kib_devs); } ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip); net->ibn_dev = ibdev;#if IBLND_MAP_ON_DEMAND /* FMR pool for RDMA */ { struct ib_fmr_pool *fmrpool; struct ib_fmr_pool_param param = { .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE, .page_shift = PAGE_SHIFT, .access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE), .pool_size = *kiblnd_tunables.kib_fmr_pool_size, .dirty_watermark = *kiblnd_tunables.kib_fmr_flush_trigger, .flush_function = NULL, .flush_arg = NULL, .cache = *kiblnd_tunables.kib_fmr_cache}; if (*kiblnd_tunables.kib_fmr_pool_size < *kiblnd_tunables.kib_ntx) { CERROR("Can't set fmr pool size (%d) < ntx(%d)\n", *kiblnd_tunables.kib_fmr_pool_size, *kiblnd_tunables.kib_ntx); goto failed; } fmrpool = ib_create_fmr_pool(ibdev->ibd_pd, ¶m); if (!IS_ERR(fmrpool)) { net->ibn_fmrpool = fmrpool; } else { CERROR("Can't create FMR pool: %ld\n", PTR_ERR(fmrpool)); goto failed; } }#endif kiblnd_map_tx_descs(ni); ibdev->ibd_nnets++; net->ibn_init = IBLND_INIT_ALL; return 0;failed: kiblnd_shutdown(ni); CDEBUG(D_NET, "kiblnd_startup failed\n"); return -ENETDOWN;}void __exitkiblnd_module_fini (void){ lnet_unregister_lnd(&the_kiblnd); kiblnd_tunables_fini();}int __initkiblnd_module_init (void){ int rc; CLASSERT (sizeof(kib_msg_t) <= IBLND_MSG_SIZE);#if !IBLND_MAP_ON_DEMAND CLASSERT (offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) <= IBLND_MSG_SIZE); CLASSERT (offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) <= IBLND_MSG_SIZE);#endif rc = kiblnd_tunables_init(); if (rc != 0) return rc; lnet_register_lnd(&the_kiblnd); return 0;}MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");MODULE_DESCRIPTION("Kernel OpenIB gen2 LND v1.00");MODULE_LICENSE("GPL");module_init(kiblnd_module_init);module_exit(kiblnd_module_fini);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -