📄 smpd_ipmi.c
字号:
pmi_process.smpd_id = 1; pmi_process.rpmi = PMI_TRUE; pmi_process.iproc = 0; pmi_process.nproc = 1; smpd_process.nproc = 1; smpd_process.is_singleton_client = SMPD_TRUE; /* Get passphrase for PM */ result = smpd_get_smpd_data("phrase", smpd_process.passphrase, SMPD_PASSPHRASE_MAX_LENGTH); if(result != SMPD_SUCCESS){ PMII_ERR_SETPRINTANDJUMP("Unable to obtain the smpd passphrase\n", result); } result = MPIDU_Sock_create_set(&pmi_process.set); if (result != MPI_SUCCESS){ MPIU_Snprintf(err_msg, PMII_MAX_ERR_MSG_LENGTH, "MPIDU_Sock_create_set failed: unable to create a sock set, error: %d\n", result); PMII_ERR_SETPRINTANDJUMP(err_msg, result); } /* Connect to PM */ result = uPMI_ConnectToHost(pmi_process.root_host, pmi_process.root_port, SMPD_CONNECTING_RPMI); if (result != SMPD_SUCCESS){ MPIU_Snprintf(err_msg, PMII_MAX_ERR_MSG_LENGTH, "uPMI_ConnectToHost failed: error: %s\n", result); PMII_ERR_SETPRINTANDJUMP(err_msg, result); } /* FIXME: Reduce size of rank_str & size_str */ MPIU_Snprintf(rank_str, PMI_MAX_STR_VAL_LENGTH, "%d", pmi_process.iproc); MPIU_Snprintf(size_str, PMI_MAX_STR_VAL_LENGTH, "%d", pmi_process.nproc); result = pmi_create_post_command("init", pmi_process.kvs_name, rank_str, size_str); if (result != PMI_SUCCESS){ pmi_err_printf("PMIi_InitSingleton failed: unable to create an init command.\n"); return PMI_FAIL; } /* parse the result of the command */ if (MPIU_Str_get_string_arg(pmi_process.context->read_cmd.cmd, "result", str, PMI_MAX_STR_VAL_LENGTH) != MPIU_STR_SUCCESS){ pmi_err_printf("PMIi_InitSingleton failed: no result string in the 'init' result command.\n"); return PMI_FAIL; } if (strncmp(str, SMPD_SUCCESS_STR, PMI_MAX_STR_VAL_LENGTH)){ pmi_err_printf("PMIi_InitSingleton failed: %s\n", str); return PMI_FAIL; } /* Send info about the process to PM */ result = pmi_create_post_command("proc_info", pmi_process.kvs_name, rank_str, size_str); if (result != PMI_SUCCESS){ pmi_err_printf("PMIi_InitSingleton failed: unable to create a 'proc_info' command.\n"); return PMI_FAIL; } /* parse the result of the command */ if (MPIU_Str_get_string_arg(pmi_process.context->read_cmd.cmd, "result", str, PMI_MAX_STR_VAL_LENGTH) != MPIU_STR_SUCCESS){ pmi_err_printf("PMIi_InitSingleton failed: no result string in the 'proc_info' result command.\n"); return PMI_FAIL; } if (strncmp(str, SMPD_SUCCESS_STR, PMI_MAX_STR_VAL_LENGTH)){ pmi_err_printf("PMIi_InitSingleton failed: %s\n", str); return PMI_FAIL; } pmi_process.init_finalized = PMI_INITIALIZED; } else{ MPIU_Snprintf(err_msg, PMII_MAX_ERR_MSG_LENGTH, "No mechanism specified for connecting to the process manager - host %s or port %d provided.\n", pmi_process.host, pmi_process.port); PMII_ERR_SETPRINTANDJUMP(err_msg, result); }fn_exit: if(singleton_client_set){ result = MPIDU_Sock_destroy_set(singleton_client_set); if(result != MPI_SUCCESS){ pmi_err_printf("MPIDU_Sock_destroy_set failed: unable to destroy a sock set, error: %d\n", result); } } /* Make sure we return the error code set within the funcn */ return retval;fn_fail: /* FIXME : Make sure the newly created mpiexec process is also killed in the case of an error */ if(p_singleton_context){ result = smpd_free_context(p_singleton_context); if(result != SMPD_SUCCESS){ pmi_err_printf("smpd_free_context failed, error = %d\n", result); } } goto fn_exit;}int iPMI_Initialized(PMI_BOOL *initialized){ if (initialized == NULL) return PMI_ERR_INVALID_ARG; if (pmi_process.init_finalized == PMI_INITIALIZED) { *initialized = PMI_TRUE; } else { *initialized = PMI_FALSE; } return PMI_SUCCESS;}static int parse_clique(const char *str_orig){ int count, i; char *str, *token; int first, last; /* count clique */ count = 0; str = MPIU_Strdup(str_orig); if (str == NULL) return PMI_FAIL; token = strtok(str, ","); while (token) { first = atoi(token); while (isdigit(*token)) token++; if (*token == '\0') count++; else { if (*token == '.') { token++; token++; last = atoi(token); count += last - first + 1; } else { pmi_err_printf("unexpected clique token: '%s'\n", token); MPIU_Free(str); return PMI_FAIL; } } token = strtok(NULL, ","); } MPIU_Free(str); /* allocate array */ pmi_process.clique_ranks = (int*)MPIU_Malloc(count * sizeof(int)); if (pmi_process.clique_ranks == NULL) return PMI_FAIL; pmi_process.clique_size = count; /* populate array */ count = 0; str = MPIU_Strdup(str_orig); if (str == NULL) return PMI_FAIL; token = strtok(str, ","); while (token) { first = atoi(token); while (isdigit(*token)) token++; if (*token == '\0') { pmi_process.clique_ranks[count] = first; count++; } else { if (*token == '.') { token++; token++; last = atoi(token); for (i=first; i<=last; i++) { pmi_process.clique_ranks[count] = i; count++; } } else { pmi_err_printf("unexpected clique token: '%s'\n", token); MPIU_Free(str); return PMI_FAIL; } } token = strtok(NULL, ","); } MPIU_Free(str); /* printf("clique: %d [", pmi_process.iproc); for (i=0; i<pmi_process.clique_size; i++) { printf("%d,", pmi_process.clique_ranks[i]); } printf("]\n"); fflush(stdout); */ return PMI_SUCCESS;}static int rPMI_Init(int *spawned){ char *p; int result; char rank_str[100], size_str[100]; char str[1024]; if (spawned == NULL) return PMI_ERR_INVALID_ARG; /* initialize to defaults */ smpd_process.id = 1; pmi_process.smpd_id = 1; pmi_process.rpmi = PMI_TRUE; pmi_process.iproc = 0; pmi_process.nproc = 1; p = getenv("PMI_ROOT_HOST"); if (p == NULL) { pmi_err_printf("unable to initialize the rPMI library: no PMI_ROOT_HOST specified.\n"); return PMI_FAIL; } strncpy(pmi_process.root_host, p, 100); p = getenv("PMI_ROOT_PORT"); if (p == NULL) { /* set to default port? */ pmi_err_printf("unable to initialize the rPMI library: no PMI_ROOT_PORT specified.\n"); return PMI_FAIL; } pmi_process.root_port = atoi(p); if (pmi_process.root_port < 1) { pmi_err_printf("invalid root port specified: %s\n", p); return PMI_FAIL; } smpd_process.port = pmi_process.root_port; strcpy(smpd_process.host, pmi_process.root_host); p = getenv("PMI_SPAWN"); if (p) { *spawned = atoi(p); } else { *spawned = 0; } p = getenv("PMI_KVS"); if (p != NULL) { /* use specified kvs name */ strncpy(pmi_process.kvs_name, p, PMI_MAX_KVS_NAME_LENGTH); strncpy(smpd_process.kvs_name, p, PMI_MAX_KVS_NAME_LENGTH); } else { /* use default kvs name */ strncpy(pmi_process.kvs_name, "default_mpich_kvs_name", PMI_MAX_KVS_NAME_LENGTH); strncpy(smpd_process.kvs_name, "default_mpich_kvs_name", PMI_MAX_KVS_NAME_LENGTH); } p = getenv("PMI_DOMAIN"); if (p != NULL) { strncpy(pmi_process.domain_name, p, PMI_MAX_KVS_NAME_LENGTH); strncpy(smpd_process.domain_name, p, PMI_MAX_KVS_NAME_LENGTH); } else { strncpy(pmi_process.domain_name, "mpich2", PMI_MAX_KVS_NAME_LENGTH); strncpy(smpd_process.domain_name, "mpich2", PMI_MAX_KVS_NAME_LENGTH); } p = getenv("PMI_RANK"); if (p != NULL) { pmi_process.iproc = atoi(p); if (pmi_process.iproc < 0) { pmi_err_printf("invalid rank %d\n", pmi_process.iproc); return PMI_FAIL; } } p = getenv("PMI_SIZE"); if (p != NULL) { pmi_process.nproc = atoi(p); if (pmi_process.nproc < 1) { pmi_err_printf("invalid size %d\n", pmi_process.nproc); return PMI_FAIL; } } smpd_process.nproc = pmi_process.nproc;#ifdef SINGLE_PROCESS_OPTIMIZATION/* leave this code #ifdef'd out so we can test rPMI stuff with one process */ if (pmi_process.nproc == 1) { pmi_process.local_kvs = PMI_TRUE; result = smpd_dbs_init(); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to initialize the local dbs engine.\n"); return PMI_FAIL; } result = smpd_dbs_create(pmi_process.kvs_name); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to create the process group kvs\n"); return PMI_FAIL; } pmi_process.init_finalized = PMI_INITIALIZED; return PMI_SUCCESS; }#endif p = getenv("PMI_CLIQUE"); if (p != NULL) { parse_clique(p); } /* printf("PMI_ROOT_HOST=%s PMI_ROOT_PORT=%s PMI_RANK=%s PMI_SIZE=%s PMI_KVS=%s PMI_CLIQUE=%s\n", getenv("PMI_ROOT_HOST"), getenv("PMI_ROOT_PORT"), getenv("PMI_RANK"), getenv("PMI_SIZE"), getenv("PMI_KVS"), getenv("PMI_CLIQUE")); fflush(stdout); */ if (pmi_process.iproc == 0) { p = getenv("PMI_ROOT_LOCAL"); if (p && strcmp(p, "1") == 0) {#ifdef HAVE_WINDOWS_H pmi_process.hRootThreadReadyEvent = CreateEvent(NULL, TRUE, FALSE, NULL); if (pmi_process.hRootThreadReadyEvent == NULL) { pmi_err_printf("unable to create the root listener synchronization event, error: %d\n", GetLastError()); return PMI_FAIL; } pmi_process.hRootThread = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)root_smpd, NULL, 0, NULL); if (pmi_process.hRootThread == NULL) { pmi_err_printf("unable to create the root listener thread: error %d\n", GetLastError()); return PMI_FAIL; } if (WaitForSingleObject(pmi_process.hRootThreadReadyEvent, 60000) != WAIT_OBJECT_0) { pmi_err_printf("the root process thread failed to initialize.\n"); return PMI_FAIL; }#else result = fork(); if (result == -1) { pmi_err_printf("unable to fork the root listener, errno %d\n", errno); return PMI_FAIL; } if (result == 0) { root_smpd(NULL); exit(0); } pmi_process.root_pid = result;#endif } } /* connect to the root */ result = MPIDU_Sock_create_set(&pmi_process.set); if (result != MPI_SUCCESS) { pmi_err_printf("PMI_Init failed: unable to create a sock set, error: %d\n", result); return PMI_FAIL; } result = uPMI_ConnectToHost(pmi_process.root_host, pmi_process.root_port, SMPD_CONNECTING_RPMI); if (result != SMPD_SUCCESS) { pmi_err_printf("PMI_Init failed.\n"); return PMI_FAIL; } pmi_process.init_finalized = PMI_INITIALIZED; sprintf(rank_str, "%d", pmi_process.iproc); sprintf(size_str, "%d", pmi_process.nproc); result = pmi_create_post_command("init", pmi_process.kvs_name, rank_str, size_str); if (result != PMI_SUCCESS) { pmi_err_printf("PMI_Init failed: unable to create an init command.\n"); return PMI_FAIL; } /* parse the result of the command */ if (MPIU_Str_get_string_arg(pmi_process.context->read_cmd.cmd, "result", str, 1024) != MPIU_STR_SUCCESS) { pmi_err_printf("PMI_Init failed: no result string in the result command.\n"); return PMI_FAIL; } if (strcmp(str, SMPD_SUCCESS_STR)) { pmi_err_printf("PMI_Init failed: %s\n", str); return PMI_FAIL; } return PMI_SUCCESS;}static int rPMI_Finalize(){ int result; char rank_str[100]; char str[1024];#ifndef HAVE_WINDOWS_H int status;#endif if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_SUCCESS; if(pmi_process.init_finalized < PMI_INITIALIZED) return PMI_SUCCESS; if (pmi_process.local_kvs) { smpd_dbs_finalize(); if(pmi_process.singleton_mpiexec_fd != PMII_PROCESS_INVALID_HANDLE){#ifdef HAVE_WINDOWS_H WaitForSingleObject(pmi_process.singleton_mpiexec_fd, INFINITE);#else waitpid(pmi_process.singleton_mpiexec_fd, &status, WUNTRACED);#endif } result = MPIDU_Sock_finalize(); pmi_process.init_finalized = PMI_FINALIZED; return PMI_SUCCESS; } sprintf(rank_str, "%d", pmi_process.iproc); result = pmi_create_post_command("finalize", pmi_process.kvs_name, rank_str, NULL); if (result != PMI_SUCCESS)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -