📄 mpid_nem_init.c
字号:
return mpi_errno; fn_fail: /* --BEGIN ERROR HANDLING-- */ MPIU_CHKPMEM_REAP(); goto fn_exit; /* --END ERROR HANDLING-- */}/* get_local_procs() determines which processes are local and should use shared memory OUT num_local -- number of local processes local_procs -- array of global ranks of local processes local_rank -- our local rank This uses PMI to get all of the processes that have the same hostname, and puts them into local_procs sorted by global rank.*/#undef FUNCNAME#define FUNCNAME get_local_procs#undef FCNAME#define FCNAME MPIDI_QUOTE(FUNCNAME)intget_local_procs (int global_rank, int num_global, int *num_local_p, int **local_procs_p, int *local_rank_p, int *num_nodes_p, int **node_ids_p){#if defined (ENABLED_NO_LOCAL) /* used for debugging only */ /* return an array as if there are no other processes on this processor */ int mpi_errno = MPI_SUCCESS; int i; MPIU_CHKPMEM_DECL(2); *num_local_p = 1; *local_rank_p = 0; *num_nodes_p = num_global; MPIU_CHKPMEM_MALLOC (*local_procs_p, int *, *num_local_p * sizeof (int), mpi_errno, "local proc array"); **local_procs_p = global_rank; MPIU_CHKPMEM_MALLOC (*node_ids_p, int *, num_global * sizeof (int), mpi_errno, "node_ids array"); for (i = 0; i < num_global; ++i) (*node_ids_p)[i] = i; MPIU_CHKPMEM_COMMIT(); fn_exit: return mpi_errno; fn_fail: /* --BEGIN ERROR HANDLING-- */ MPIU_CHKPMEM_REAP(); goto fn_exit; /* --END ERROR HANDLING-- */#elif 0 /* PMI_Get_clique_(size)|(ranks) don't work with mpd */#error PMI_Get_clique doesnt work with mpd int mpi_errno = MPI_SUCCESS; int pmi_errno; int *lrank_p; MPIU_CHKPMEM_DECL(1); /* get an array of all processes on this node */ pmi_errno = PMI_Get_clique_size (num_local_p); MPIU_ERR_CHKANDJUMP1 (pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_get_clique_size", "**pmi_get_clique_size %d", pmi_errno); MPIU_CHKPMEM_MALLOC (*local_procs_p, int *, *num_local_p * sizeof (int), mpi_errno, "local proc array"); pmi_errno = PMI_Get_clique_ranks (*local_procs_p, *num_local_p); MPIU_ERR_CHKANDJUMP1 (pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_get_clique_ranks", "**pmi_get_clique_ranks %d", pmi_errno); /* make sure it's sorted so that ranks are consistent between processes */ qsort (*local_procs_p, *num_local_p, sizeof (**local_procs_p), intcompar); /* find our local rank */ lrank_p = bsearch (&global_rank, *local_procs_p, *num_local_p, sizeof (**local_procs_p), intcompar); MPIU_ERR_CHKANDJUMP (lrank_p == NULL, mpi_errno, MPI_ERR_OTHER, "**not_in_local_ranks"); *local_rank_p = lrank_p - *local_procs_p; MPIU_CHKPMEM_COMMIT(); fn_exit: return mpi_errno; fn_fail: /* --BEGIN ERROR HANDLING-- */ MPIU_CHKPMEM_REAP(); goto fn_exit; /* --END ERROR HANDLING-- */ #else int mpi_errno = MPI_SUCCESS; int pmi_errno; int *procs; int i, j; char key[MPID_NEM_MAX_KEY_VAL_LEN]; char *kvs_name; char **node_names; char *node_name_buf; int *node_ids; int num_nodes; int num_local; MPIU_CHKPMEM_DECL(2); MPIU_CHKLMEM_DECL(2); mpi_errno = MPIDI_PG_GetConnKVSname (&kvs_name); if (mpi_errno) MPIU_ERR_POP (mpi_errno); /* Put my hostname id */ if (num_global > 1) { memset (key, 0, MPID_NEM_MAX_KEY_VAL_LEN); MPIU_Snprintf (key, MPID_NEM_MAX_KEY_VAL_LEN, "hostname[%d]", global_rank); pmi_errno = PMI_KVS_Put (kvs_name, key, MPID_nem_hostname); MPIU_ERR_CHKANDJUMP1 (pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_put", "**pmi_kvs_put %d", pmi_errno); pmi_errno = PMI_KVS_Commit (kvs_name); MPIU_ERR_CHKANDJUMP1 (pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_commit", "**pmi_kvs_commit %d", pmi_errno); pmi_errno = PMI_Barrier(); MPIU_ERR_CHKANDJUMP1 (pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_barrier", "**pmi_barrier %d", pmi_errno); } /* allocate structures */ MPIU_CHKPMEM_MALLOC (procs, int *, num_global * sizeof (int), mpi_errno, "local process index array"); MPIU_CHKPMEM_MALLOC (node_ids, int *, num_global * sizeof (int), mpi_errno, "node_ids"); MPIU_CHKLMEM_MALLOC (node_names, char **, num_global * sizeof (char*), mpi_errno, "node_names"); MPIU_CHKLMEM_MALLOC (node_name_buf, char *, num_global * MPID_NEM_MAX_KEY_VAL_LEN * sizeof(char), mpi_errno, "node_name_buf"); /* Gather hostnames */ for (i = 0; i < num_global; ++i) { node_names[i] = &node_name_buf[i * MPID_NEM_MAX_KEY_VAL_LEN]; node_names[i][0] = '\0'; } num_nodes = 0; num_local = 0; for (i = 0; i < num_global; ++i) { if (i == global_rank) { /* This is us, no need to perform a get */ MPIU_Snprintf(node_names[num_nodes], MPID_NEM_MAX_KEY_VAL_LEN, "%s", MPID_nem_hostname); } else { memset (key, 0, MPID_NEM_MAX_KEY_VAL_LEN); MPIU_Snprintf (key, MPID_NEM_MAX_KEY_VAL_LEN, "hostname[%d]", i); pmi_errno = PMI_KVS_Get (kvs_name, key, node_names[num_nodes], MPID_NEM_MAX_KEY_VAL_LEN); MPIU_ERR_CHKANDJUMP1 (pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get", "**pmi_kvs_get %d", pmi_errno); } if (!strncmp (MPID_nem_hostname, node_names[num_nodes], MPID_NEM_MAX_KEY_VAL_LEN)#if defined (ENABLED_ODD_EVEN_CLIQUES) /* Used for debugging on a single machine: Odd procs on a node are seen as local to each other, and even procs on a node are seen as local to each other. */ && ((global_rank & 0x1) == (i & 0x1))#endif ) { if (i == global_rank) *local_rank_p = num_local; procs[num_local] = i; ++num_local; } /* find the node_id for this process, or create a new one */ /* FIXME: need a better algorithm -- this one does O(N^2) strncmp()s! */ for (j = 0; j < num_nodes; ++j) if (!strncmp (node_names[j], node_names[num_nodes], MPID_NEM_MAX_KEY_VAL_LEN)) break; if (j == num_nodes) ++num_nodes; else node_names[num_nodes][0] = '\0'; node_ids[i] = j; }#if defined (ENABLED_ODD_EVEN_CLIQUES) /* create new processes for all odd numbered processes */ /* this may leave nodes ids with no processes assigned to them, but I think this is OK*/ for (i = 0; i < num_global; ++i) if (i & 0x1) node_ids[i] += num_nodes; num_nodes *= 2;#endif MPIU_Assert (num_local > 0); /* there's always at least one process */ /* reduce size of local process array */ *local_procs_p = MPIU_Realloc (procs, num_local * sizeof (int)); /* --BEGIN ERROR HANDLING-- */ if (*local_procs_p == NULL) { MPIU_CHKMEM_SETERR (mpi_errno, num_local * sizeof (int), "local process index array"); goto fn_fail; } /* --END ERROR HANDLING-- */ *num_local_p = num_local; *node_ids_p = node_ids; *num_nodes_p = num_nodes; MPIU_CHKPMEM_COMMIT(); fn_exit: MPIU_CHKLMEM_FREEALL(); return mpi_errno; fn_fail: /* --BEGIN ERROR HANDLING-- */ MPIU_CHKPMEM_REAP(); goto fn_exit; /* --END ERROR HANDLING-- */#endif}/* MPID_nem_vc_init initialize nemesis' part of the vc */#undef FUNCNAME#define FUNCNAME MPID_nem_vc_init#undef FCNAME#define FCNAME MPIDI_QUOTE(FUNCNAME)intMPID_nem_vc_init (MPIDI_VC_t *vc, const char *business_card){ int mpi_errno = MPI_SUCCESS; MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private; MPIU_CHKPMEM_DECL(1); MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_VC_INIT); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_VC_INIT); vc_ch->send_seqno = 0; vc_ch->pending_pkt_len = 0; MPIU_CHKPMEM_MALLOC (vc_ch->pending_pkt, MPIDI_CH3_PktGeneric_t *, sizeof (MPIDI_CH3_PktGeneric_t), mpi_errno, "pending_pkt"); /* We do different things for vcs in the COMM_WORLD pg vs other pgs COMM_WORLD vcs may use shared memory, and already have queues allocated */ if (vc->lpid < MPID_nem_mem_region.num_procs) { /* This vc is in COMM_WORLD */ vc_ch->is_local = MPID_NEM_IS_LOCAL (vc->lpid); vc_ch->free_queue = MPID_nem_mem_region.FreeQ[vc->lpid]; /* networks and local procs have free queues */ vc_ch->node_id = MPID_nem_mem_region.node_ids[vc->lpid]; } else { /* this vc is the result of a connect */ vc_ch->is_local = 0; vc_ch->free_queue = net_free_queue; vc_ch->node_id = -1; /* we're not using shared memory, so assume we're on our own node */ } if (vc_ch->is_local) { vc_ch->fbox_out = &MPID_nem_mem_region.mailboxes.out[MPID_nem_mem_region.local_ranks[vc->lpid]]->mpich2; vc_ch->fbox_in = &MPID_nem_mem_region.mailboxes.in[MPID_nem_mem_region.local_ranks[vc->lpid]]->mpich2; vc_ch->recv_queue = MPID_nem_mem_region.RecvQ[vc->lpid]; /* override nocontig send function */ vc->sendNoncontig_fn = MPIDI_CH3I_SendNoncontig; /* local processes use the default method */ vc_ch->iStartContigMsg = NULL; vc_ch->iSendContig = NULL; vc_ch->lmt_initiate_lmt = MPID_nem_lmt_shm_initiate_lmt; vc_ch->lmt_start_recv = MPID_nem_lmt_shm_start_recv; vc_ch->lmt_start_send = MPID_nem_lmt_shm_start_send; vc_ch->lmt_handle_cookie = MPID_nem_lmt_shm_handle_cookie; vc_ch->lmt_done_send = MPID_nem_lmt_shm_done_send; vc_ch->lmt_done_recv = MPID_nem_lmt_shm_done_recv; vc_ch->lmt_copy_buf = NULL; vc_ch->lmt_copy_buf_handle = NULL; vc_ch->lmt_queue.head = NULL; vc_ch->lmt_queue.tail = NULL; vc_ch->lmt_active_lmt = NULL; vc_ch->lmt_enqueued = FALSE; vc->eager_max_msg_sz = MPID_NEM_MPICH2_DATA_LEN - sizeof(MPIDI_CH3_Pkt_t); } else { vc_ch->fbox_out = NULL; vc_ch->fbox_in = NULL; vc_ch->recv_queue = NULL; vc_ch->lmt_initiate_lmt = NULL; vc_ch->lmt_start_recv = NULL; vc_ch->lmt_start_send = NULL; vc_ch->lmt_handle_cookie = NULL; vc_ch->lmt_done_send = NULL; vc_ch->lmt_done_recv = NULL; /* FIXME: DARIUS set these to default for now */ vc_ch->iStartContigMsg = NULL; vc_ch->iSendContig = NULL; mpi_errno = MPID_nem_net_module_vc_init (vc, business_card); if (mpi_errno) MPIU_ERR_POP(mpi_errno);/* FIXME: DARIUS -- enable this assert once these functions are implemented *//* /\* iStartContigMsg iSendContig and sendNoncontig_fn must *//* be set for nonlocal processes. Default functions only *//* support shared-memory communication. *\/ *//* MPIU_Assert(vc_ch->iStartContigMsg && vc_ch->iSendContig && vc->sendNoncontig_fn); */ } /* override rendezvous functions */ vc->rndvSend_fn = MPID_nem_lmt_RndvSend; vc->rndvRecv_fn = MPID_nem_lmt_RndvRecv; /* FIXME: ch3 assumes there is a field called sendq_head in the ch portion of the vc. This is unused in nemesis and should be set to NULL */ vc_ch->sendq_head = NULL; MPIU_CHKPMEM_COMMIT();fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_VC_INIT); return mpi_errno; fn_fail: MPIU_CHKPMEM_REAP(); goto fn_exit;}#undef FUNCNAME#define FUNCNAME MPID_nem_vc_destroy#undef FCNAME#define FCNAME MPIDI_QUOTE(FUNCNAME)intMPID_nem_vc_destroy(MPIDI_VC_t *vc){ int mpi_errno = MPI_SUCCESS; MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_VC_DESTROY); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_VC_DESTROY); MPIU_Free(vc_ch->pending_pkt); mpi_errno = MPID_nem_net_module_vc_destroy(vc); if (mpi_errno) MPIU_ERR_POP(mpi_errno); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_VC_DESTROY); return mpi_errno; fn_fail: goto fn_exit;}intMPID_nem_get_business_card (int my_rank, char *value, int length){ return MPID_nem_net_module_get_business_card (my_rank, &value, &length);}int MPID_nem_connect_to_root (const char *business_card, MPIDI_VC_t *new_vc){ return MPID_nem_net_module_connect_to_root (business_card, new_vc);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -