📄 ptllnd.c
字号:
CDEBUG(D_NET, "nposted_buffers = %d (before)\n",plni->plni_nposted_buffers); CDEBUG(D_NET, "nbuffers = %d (before)\n",plni->plni_nbuffers); plni->plni_nmsgs += delta; LASSERT(plni->plni_nmsgs >= 0); nmsgs = plni->plni_nmsgs + plni->plni_msgs_spare; nbufs = (nmsgs * plni->plni_max_msg_size + plni->plni_buffer_size - 1) / plni->plni_buffer_size; while (nbufs > plni->plni_nbuffers) { buf = ptllnd_create_buffer(ni); if (buf == NULL) return -ENOMEM; rc = ptllnd_post_buffer(buf); if (rc != 0) { /* TODO - this path seems to orpahn the buffer * in a state where its not posted and will never be * However it does not leak the buffer as it's * already been put onto the global buffer list * and will be cleaned up */ return rc; } } CDEBUG(D_NET, "nposted_buffers = %d (after)\n",plni->plni_nposted_buffers); CDEBUG(D_NET, "nbuffers = %d (after)\n",plni->plni_nbuffers); return 0;}voidptllnd_destroy_buffers (lnet_ni_t *ni){ ptllnd_ni_t *plni = ni->ni_data; ptllnd_buffer_t *buf; struct list_head *tmp; struct list_head *nxt; CDEBUG(D_NET, "nposted_buffers = %d (before)\n",plni->plni_nposted_buffers); CDEBUG(D_NET, "nbuffers = %d (before)\n",plni->plni_nbuffers); list_for_each_safe(tmp, nxt, &plni->plni_buffers) { buf = list_entry(tmp, ptllnd_buffer_t, plb_list); //CDEBUG(D_NET, "buf=%p posted=%d\n",buf,buf->plb_posted); LASSERT (plni->plni_nbuffers > 0); if (buf->plb_posted) { time_t start = cfs_time_current_sec(); int w = plni->plni_long_wait; LASSERT (plni->plni_nposted_buffers > 0);#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS (void) PtlMDUnlink(buf->plb_md); while (buf->plb_posted) { if (w > 0 && cfs_time_current_sec() > start + w/1000) { CWARN("Waited %ds to unlink buffer\n", (int)(cfs_time_current_sec() - start)); w *= 2; } ptllnd_wait(ni, w); }#else while (buf->plb_posted) { rc = PtlMDUnlink(buf->plb_md); if (rc == PTL_OK) { buf->plb_posted = 0; plni->plni_nposted_buffers--; break; } LASSERT (rc == PTL_MD_IN_USE); if (w > 0 && cfs_time_current_sec() > start + w/1000) { CWARN("Waited %ds to unlink buffer\n", cfs_time_current_sec() - start); w *= 2; } ptllnd_wait(ni, w); }#endif } ptllnd_destroy_buffer(buf); } CDEBUG(D_NET, "nposted_buffers = %d (after)\n",plni->plni_nposted_buffers); CDEBUG(D_NET, "nbuffers = %d (after)\n",plni->plni_nbuffers); LASSERT (plni->plni_nposted_buffers == 0); LASSERT (plni->plni_nbuffers == 0);}intptllnd_create_peer_hash (lnet_ni_t *ni){ ptllnd_ni_t *plni = ni->ni_data; int i; plni->plni_npeers = 0; LIBCFS_ALLOC(plni->plni_peer_hash, plni->plni_peer_hash_size * sizeof(*plni->plni_peer_hash)); if (plni->plni_peer_hash == NULL) { CERROR("Can't allocate ptllnd peer hash (size %d)\n", plni->plni_peer_hash_size); return -ENOMEM; } for (i = 0; i < plni->plni_peer_hash_size; i++) CFS_INIT_LIST_HEAD(&plni->plni_peer_hash[i]); return 0;}voidptllnd_destroy_peer_hash (lnet_ni_t *ni){ ptllnd_ni_t *plni = ni->ni_data; int i; LASSERT( plni->plni_npeers == 0); for (i = 0; i < plni->plni_peer_hash_size; i++) LASSERT (list_empty(&plni->plni_peer_hash[i])); LIBCFS_FREE(plni->plni_peer_hash, plni->plni_peer_hash_size * sizeof(*plni->plni_peer_hash));}voidptllnd_close_peers (lnet_ni_t *ni){ ptllnd_ni_t *plni = ni->ni_data; ptllnd_peer_t *plp; int i; for (i = 0; i < plni->plni_peer_hash_size; i++) while (!list_empty(&plni->plni_peer_hash[i])) { plp = list_entry(plni->plni_peer_hash[i].next, ptllnd_peer_t, plp_list); ptllnd_close_peer(plp, 0); }}intptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg){ switch (cmd) { case IOC_LIBCFS_DEBUG_PEER: ptllnd_dump_debug(ni, *((lnet_process_id_t *)arg)); return 0; default: return -EINVAL; }}__u64ptllnd_get_timestamp(void){ struct timeval tv; int rc = gettimeofday(&tv, NULL); LASSERT (rc == 0); return ((__u64)tv.tv_sec) * 1000000 + tv.tv_usec;}voidptllnd_shutdown (lnet_ni_t *ni){ ptllnd_ni_t *plni = ni->ni_data; int rc; time_t start = cfs_time_current_sec(); int w = plni->plni_long_wait; LASSERT (ptllnd_ni_count == 1); plni->plni_max_tx_history = 0; ptllnd_cull_tx_history(plni); ptllnd_close_peers(ni); ptllnd_destroy_buffers(ni); while (plni->plni_npeers > 0) { if (w > 0 && cfs_time_current_sec() > start + w/1000) { CWARN("Waited %ds for peers to shutdown\n", (int)(cfs_time_current_sec() - start)); w *= 2; } ptllnd_wait(ni, w); } LASSERT (plni->plni_ntxs == 0); LASSERT (plni->plni_nrxs == 0); rc = PtlEQFree(plni->plni_eqh); LASSERT (rc == PTL_OK); rc = PtlNIFini(plni->plni_nih); LASSERT (rc == PTL_OK); ptllnd_destroy_peer_hash(ni); LIBCFS_FREE(plni, sizeof(*plni)); ptllnd_ni_count--;}intptllnd_startup (lnet_ni_t *ni){ ptllnd_ni_t *plni; int rc; /* could get limits from portals I guess... */ ni->ni_maxtxcredits = ni->ni_peertxcredits = 1000; if (ptllnd_ni_count != 0) { CERROR("Can't have > 1 instance of ptllnd\n"); return -EPERM; } ptllnd_ni_count++; rc = ptllnd_history_init(); if (rc != 0) { CERROR("Can't init history\n"); goto failed0; } LIBCFS_ALLOC(plni, sizeof(*plni)); if (plni == NULL) { CERROR("Can't allocate ptllnd state\n"); rc = -ENOMEM; goto failed0; } ni->ni_data = plni; plni->plni_stamp = ptllnd_get_timestamp(); plni->plni_nrxs = 0; plni->plni_ntxs = 0; plni->plni_ntx_history = 0; plni->plni_watchdog_peeridx = 0; plni->plni_watchdog_nextt = cfs_time_current_sec(); CFS_INIT_LIST_HEAD(&plni->plni_zombie_txs); CFS_INIT_LIST_HEAD(&plni->plni_tx_history); /* * Initilize buffer related data structures */ CFS_INIT_LIST_HEAD(&plni->plni_buffers); plni->plni_nbuffers = 0; plni->plni_nposted_buffers = 0; rc = ptllnd_get_tunables(ni); if (rc != 0) goto failed1; rc = ptllnd_create_peer_hash(ni); if (rc != 0) goto failed1; /* NB I most probably won't get the PID I requested here. It doesn't * matter because I don't need a fixed PID (only connection acceptors * need a "well known" PID). */ rc = PtlNIInit(PTL_IFACE_DEFAULT, plni->plni_ptllnd_pid, NULL, NULL, &plni->plni_nih); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { CERROR("PtlNIInit failed: %s(%d)\n", ptllnd_errtype2str(rc), rc); rc = -ENODEV; goto failed2; } rc = PtlEQAlloc(plni->plni_nih, plni->plni_eq_size, PTL_EQ_HANDLER_NONE, &plni->plni_eqh); if (rc != PTL_OK) { CERROR("PtlEQAlloc failed: %s(%d)\n", ptllnd_errtype2str(rc), rc); rc = -ENODEV; goto failed3; } /* * Fetch the Portals NID */ rc = PtlGetId(plni->plni_nih, &plni->plni_portals_id); if (rc != PTL_OK) { CERROR ("PtlGetID failed : %s(%d)\n", ptllnd_errtype2str(rc), rc); rc = -EINVAL; goto failed4; } /* * Create the new NID. Based on the LND network type * and the lower ni's address data. */ ni->ni_nid = ptllnd_ptl2lnetnid(ni, plni->plni_portals_id.nid); CDEBUG(D_NET, "ptl id =%s\n", ptllnd_ptlid2str(plni->plni_portals_id)); CDEBUG(D_NET, "lnet id =%s (passed back)\n", libcfs_id2str((lnet_process_id_t) { .nid = ni->ni_nid, .pid = the_lnet.ln_pid})); rc = ptllnd_size_buffers(ni, 0); if (rc != 0) goto failed4; return 0; failed4: ptllnd_destroy_buffers(ni); PtlEQFree(plni->plni_eqh); failed3: PtlNIFini(plni->plni_nih); failed2: ptllnd_destroy_peer_hash(ni); failed1: LIBCFS_FREE(plni, sizeof(*plni)); failed0: ptllnd_history_fini(); ptllnd_ni_count--; CDEBUG(D_NET, "<<< rc=%d\n",rc); return rc;}const char *ptllnd_evtype2str(int type){#define DO_TYPE(x) case x: return #x; switch(type) { DO_TYPE(PTL_EVENT_GET_START); DO_TYPE(PTL_EVENT_GET_END); DO_TYPE(PTL_EVENT_PUT_START); DO_TYPE(PTL_EVENT_PUT_END); DO_TYPE(PTL_EVENT_REPLY_START); DO_TYPE(PTL_EVENT_REPLY_END); DO_TYPE(PTL_EVENT_ACK); DO_TYPE(PTL_EVENT_SEND_START); DO_TYPE(PTL_EVENT_SEND_END); DO_TYPE(PTL_EVENT_UNLINK); default: return "<unknown event type>"; }#undef DO_TYPE}const char *ptllnd_msgtype2str(int type){#define DO_TYPE(x) case x: return #x; switch(type) { DO_TYPE(PTLLND_MSG_TYPE_INVALID); DO_TYPE(PTLLND_MSG_TYPE_PUT); DO_TYPE(PTLLND_MSG_TYPE_GET); DO_TYPE(PTLLND_MSG_TYPE_IMMEDIATE); DO_TYPE(PTLLND_MSG_TYPE_HELLO); DO_TYPE(PTLLND_MSG_TYPE_NOOP); DO_TYPE(PTLLND_MSG_TYPE_NAK); default: return "<unknown msg type>"; }#undef DO_TYPE}const char *ptllnd_errtype2str(int type){#define DO_TYPE(x) case x: return #x; switch(type) { DO_TYPE(PTL_OK); DO_TYPE(PTL_SEGV); DO_TYPE(PTL_NO_SPACE); DO_TYPE(PTL_ME_IN_USE); DO_TYPE(PTL_NAL_FAILED); DO_TYPE(PTL_NO_INIT); DO_TYPE(PTL_IFACE_DUP); DO_TYPE(PTL_IFACE_INVALID); DO_TYPE(PTL_HANDLE_INVALID); DO_TYPE(PTL_MD_INVALID); DO_TYPE(PTL_ME_INVALID); DO_TYPE(PTL_PROCESS_INVALID); DO_TYPE(PTL_PT_INDEX_INVALID); DO_TYPE(PTL_SR_INDEX_INVALID); DO_TYPE(PTL_EQ_INVALID); DO_TYPE(PTL_EQ_DROPPED); DO_TYPE(PTL_EQ_EMPTY); DO_TYPE(PTL_MD_NO_UPDATE); DO_TYPE(PTL_FAIL); DO_TYPE(PTL_AC_INDEX_INVALID); DO_TYPE(PTL_MD_ILLEGAL); DO_TYPE(PTL_ME_LIST_TOO_LONG); DO_TYPE(PTL_MD_IN_USE); DO_TYPE(PTL_NI_INVALID); DO_TYPE(PTL_PID_INVALID); DO_TYPE(PTL_PT_FULL); DO_TYPE(PTL_VAL_FAILED); DO_TYPE(PTL_NOT_IMPLEMENTED); DO_TYPE(PTL_NO_ACK); DO_TYPE(PTL_EQ_IN_USE); DO_TYPE(PTL_PID_IN_USE); DO_TYPE(PTL_INV_EQ_SIZE); DO_TYPE(PTL_AGAIN); default: return "<unknown error type>"; }#undef DO_TYPE}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -