📄 smpd_ipmi.c
字号:
{ pmi_err_printf("PMI_Finalize failed: unable to create an finalize command.\n"); return PMI_FAIL; } /* parse the result of the command */ if (MPIU_Str_get_string_arg(pmi_process.context->read_cmd.cmd, "result", str, 1024) != MPIU_STR_SUCCESS) { pmi_err_printf("PMI_Finalize failed: no result string in the result command.\n"); return PMI_FAIL; } if (strcmp(str, SMPD_SUCCESS_STR)) { pmi_err_printf("PMI_Finalize failed: %s\n", str); return PMI_FAIL; } if (pmi_process.iproc == 0) { /* the root process tells the root to exit when all the pmi contexts have exited */ result = pmi_create_post_command("exit_on_done", NULL, NULL, NULL); if (result != PMI_SUCCESS) { pmi_err_printf("exit_on_done command failed.\n"); return PMI_FAIL; } /*printf("exit_on_done command returned successfully.\n");fflush(stdout);*/ } /*printf("entering finalize pmi_barrier.\n");fflush(stdout);*/ PMI_Barrier(); /*printf("after finalize pmi_barrier, posting done command.\n");fflush(stdout);*/ /* post a done command to close the pmi context */ result = pmi_create_post_command("done", NULL, NULL, NULL); if (result != PMI_SUCCESS) { pmi_err_printf("failed.\n"); return PMI_FAIL; } if (pmi_process.iproc == 0) {#ifdef HAVE_WINDOWS_H WaitForSingleObject(pmi_process.hRootThread, INFINITE); if(pmi_process.singleton_mpiexec_fd != PMII_PROCESS_INVALID_HANDLE){ WaitForSingleObject(pmi_process.singleton_mpiexec_fd, INFINITE); }#else waitpid(pmi_process.root_pid, &status, WUNTRACED); if(pmi_process.singleton_mpiexec_fd != PMII_PROCESS_INVALID_HANDLE){ waitpid(pmi_process.singleton_mpiexec_fd, &status, WUNTRACED); }#endif } /*if (pmi_process.sock != MPIDU_SOCK_INVALID_SOCK)*/ { result = MPIDU_Sock_finalize(); if (result != MPI_SUCCESS) { /*pmi_err_printf("MPIDU_Sock_finalize failed, error: %d\n", result);*/ } } pmi_process.init_finalized = PMI_FINALIZED; return PMI_SUCCESS;}int iPMI_Init(int *spawned){ char *p; int result; char rank_str[100], size_str[100]; char str[1024]; if (spawned == NULL){ return PMI_ERR_INVALID_ARG; } /* Enable smpd state machine tracing */ /* smpd_process.verbose = SMPD_TRUE; smpd_process.dbg_state |= SMPD_DBG_STATE_ERROUT | SMPD_DBG_STATE_STDOUT | SMPD_DBG_STATE_TRACE; */ /* don't allow pmi_init to be called more than once */ if (pmi_process.init_finalized == PMI_INITIALIZED) return PMI_SUCCESS; /* initialize to defaults */ result = MPIDU_Sock_init(); if (result != MPI_SUCCESS) { pmi_err_printf("MPIDU_Sock_init failed,\nsock error: %s\n", get_sock_error_string(result)); return PMI_FAIL; } result = smpd_init_process(); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to initialize the smpd global process structure.\n"); return PMI_FAIL; } p = getenv("PMI_ROOT_HOST"); if (p != NULL) { return rPMI_Init(spawned); } pmi_process.iproc = 0; pmi_process.nproc = 1; p = getenv("PMI_SPAWN"); if (p) { *spawned = atoi(p); } else { *spawned = 0; } p = getenv("PMI_APPNUM"); if (p) { pmi_process.appnum = atoi(p); } else { pmi_process.appnum = 0; } /* Determine If singleton */ p = getenv("PMI_SMPD_FD"); if( p == NULL){ p = getenv("PMI_HOST"); if( p == NULL){ p = getenv("PMI_KVS"); if(p == NULL){ /* Assume singleton. Setup the PMI service when required i.e., later */ pmi_process.init_finalized = PMI_SINGLETON_INIT_BUT_NO_PM; /* Rank = 0 & Nprocs = 1 initialized by default above */ return PMI_SUCCESS; } } } else{ /* decode PMI_SMPD_FD */#ifdef HAVE_WINDOWS_H pmi_process.smpd_fd = smpd_decode_handle(p);#else pmi_process.smpd_fd = (MPIDU_SOCK_NATIVE_FD)atoi(p);#endif if(pmi_process.smpd_fd <= 0){ pmi_process.smpd_fd = 0; putenv("PMI_SMPD_FD="); } } p = getenv("PMI_KVS"); if (p != NULL) { strncpy(pmi_process.kvs_name, p, PMI_MAX_KVS_NAME_LENGTH); } else { pmi_process.local_kvs = PMI_TRUE; result = smpd_dbs_init(); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to initialize the local dbs engine.\n"); return PMI_FAIL; } result = smpd_dbs_create(pmi_process.kvs_name); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to create the process group kvs\n"); return PMI_FAIL; } strncpy(pmi_process.domain_name, smpd_process.domain_name, PMI_MAX_KVS_NAME_LENGTH); pmi_process.init_finalized = PMI_INITIALIZED; return PMI_SUCCESS; } p = getenv("PMI_DOMAIN"); if (p != NULL) { strncpy(pmi_process.domain_name, p, PMI_MAX_KVS_NAME_LENGTH); } else { strncpy(pmi_process.domain_name, "mpich2", PMI_MAX_KVS_NAME_LENGTH); } p = getenv("PMI_RANK"); if (p != NULL) { pmi_process.iproc = atoi(p); if (pmi_process.iproc < 0) { pmi_err_printf("invalid rank %d, setting to 0\n", pmi_process.iproc); pmi_process.iproc = 0; } } p = getenv("PMI_SIZE"); if (p != NULL) { pmi_process.nproc = atoi(p); if (pmi_process.nproc < 1) { pmi_err_printf("invalid size %d, setting to 1\n", pmi_process.nproc); pmi_process.nproc = 1; } } p = getenv("PMI_SMPD_ID"); if (p != NULL){ pmi_process.smpd_id = atoi(p); smpd_process.id = pmi_process.smpd_id; } p = getenv("PMI_SMPD_KEY"); if (p != NULL) { pmi_process.smpd_key = atoi(p); } p = getenv("PMI_SMPD_FD"); if (p != NULL) { result = MPIDU_Sock_create_set(&pmi_process.set); if (result != MPI_SUCCESS) { pmi_err_printf("PMI_Init failed: unable to create a sock set, error:\n%s\n", get_sock_error_string(result)); return PMI_FAIL; } /* pmi_process.smpd_fd is decoded when checking for Singleton Init */ result = MPIDU_Sock_native_to_sock(pmi_process.set, pmi_process.smpd_fd, NULL, &pmi_process.sock); if (result != MPI_SUCCESS) { pmi_err_printf("MPIDU_Sock_native_to_sock failed, error %s\n", get_sock_error_string(result)); return PMI_FAIL; } result = smpd_create_context(SMPD_CONTEXT_PMI, pmi_process.set, pmi_process.sock, pmi_process.smpd_id, &pmi_process.context); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to create a pmi context.\n"); return PMI_FAIL; } } else { p = getenv("PMI_HOST"); if (p != NULL) { strncpy(pmi_process.host, p, PMI_MAX_HOST_NAME_LENGTH); p = getenv("PMI_PORT"); if (p != NULL) { pmi_process.port = atoi(p); result = MPIDU_Sock_create_set(&pmi_process.set); if (result != MPI_SUCCESS) { pmi_err_printf("PMI_Init failed: unable to create a sock set, error: %d\n", result); return PMI_FAIL; } result = uPMI_ConnectToHost(pmi_process.host, pmi_process.port, SMPD_CONNECTING_PMI); if (result != SMPD_SUCCESS) { pmi_err_printf("PMI_Init failed.\n"); return PMI_FAIL; } } else { pmi_err_printf("No mechanism specified for connecting to the process manager - host %s but no port provided.\n", pmi_process.host); return PMI_FAIL; } } else { /* SINGLETON: Assume singleton here and initialize to SINGLETON_INIT_BUT_NO_PM * Also set PMI_KVS & PMI_DOMAIN after this step... */ pmi_err_printf("No mechanism specified for connecting to the process manager.\n"); return PMI_FAIL; } } p = getenv("PMI_CLIQUE"); if (p != NULL) { parse_clique(p); } /* printf("PMI_RANK=%s PMI_SIZE=%s PMI_KVS=%s PMI_SMPD_ID=%s PMI_SMPD_FD=%s PMI_SMPD_KEY=%s\n PMI_SPAWN=%s", getenv("PMI_RANK"), getenv("PMI_SIZE"), getenv("PMI_KVS"), getenv("PMI_SMPD_ID"), getenv("PMI_SMPD_FD"), getenv("PMI_SMPD_KEY"), getenv("PMI_SPAWN")); fflush(stdout); */ pmi_process.init_finalized = PMI_INITIALIZED; sprintf(rank_str, "%d", pmi_process.iproc); sprintf(size_str, "%d", pmi_process.nproc); result = pmi_create_post_command("init", pmi_process.kvs_name, rank_str, size_str); if (result != PMI_SUCCESS) { pmi_err_printf("PMI_Init failed: unable to create an init command.\n"); return PMI_FAIL; } /* parse the result of the command */ if (MPIU_Str_get_string_arg(pmi_process.context->read_cmd.cmd, "result", str, 1024) != MPIU_STR_SUCCESS) { pmi_err_printf("PMI_Init failed: no result string in the result command.\n"); return PMI_FAIL; } if (strcmp(str, SMPD_SUCCESS_STR)) { pmi_err_printf("PMI_Init failed: %s\n", str); return PMI_FAIL; } /* if (*spawned && pmi_process.iproc == 0) { char key[1024], val[8192]; key[0] = '\0'; result = PMI_KVS_Iter_first(pmi_process.kvs_name, key, 1024, val, 8192); if (result != PMI_SUCCESS || key[0] == '\0') { printf("No preput values in %s\n", pmi_process.kvs_name); } while (result == PMI_SUCCESS && key[0] != '\0') { printf("PREPUT key=%s, val=%s\n", key, val); result = PMI_KVS_Iter_next(pmi_process.kvs_name, key, 1024, val, 8192); } fflush(stdout); } iPMI_Barrier(); */ /*printf("iPMI_Init returning success.\n");fflush(stdout);*/ return PMI_SUCCESS;}int iPMI_Finalize(){ int result; char rank_str[100]; char str[1024]; if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_SUCCESS; if (pmi_process.rpmi) { return rPMI_Finalize(); } if(pmi_process.init_finalized < PMI_INITIALIZED) return PMI_SUCCESS; if (pmi_process.local_kvs) { smpd_dbs_finalize(); result = MPIDU_Sock_finalize(); pmi_process.init_finalized = PMI_FINALIZED; return PMI_SUCCESS; } sprintf(rank_str, "%d", pmi_process.iproc); result = pmi_create_post_command("finalize", pmi_process.kvs_name, rank_str, NULL); if (result != PMI_SUCCESS) { pmi_err_printf("PMI_Finalize failed: unable to create an finalize command.\n"); goto fn_fail; } /* parse the result of the command */ if (MPIU_Str_get_string_arg(pmi_process.context->read_cmd.cmd, "result", str, 1024) != MPIU_STR_SUCCESS) { pmi_err_printf("PMI_Finalize failed: no result string in the result command.\n"); goto fn_fail; } if (strcmp(str, SMPD_SUCCESS_STR)) { pmi_err_printf("PMI_Finalize failed: %s\n", str); goto fn_fail; } PMI_Barrier(); /* post the done command and wait for the result */ result = pmi_create_post_command("done", NULL, NULL, NULL); if (result != PMI_SUCCESS) { pmi_err_printf("failed.\n"); goto fn_fail; } /*if (pmi_process.sock != MPIDU_SOCK_INVALID_SOCK)*/ { result = MPIDU_Sock_finalize(); if (result != MPI_SUCCESS) { /*pmi_err_printf("MPIDU_Sock_finalize failed,\nsock error: %s\n", get_sock_error_string(result));*/ } } pmi_process.init_finalized = PMI_FINALIZED; /*printf("iPMI_Finalize success.\n");fflush(stdout);*/ return PMI_SUCCESS;fn_fail: /* set the state to finalized so PMI_Abort will not dereference mangled structures due to a failure */ pmi_process.init_finalized = PMI_FINALIZED; return PMI_FAIL;}int iPMI_Abort(int exit_code, const char error_msg[]){ int result; smpd_command_t *cmd_ptr; /* flush any output before aborting */ /* This doesn't work because it flushes output from the mpich dll but does not flush the main module's output */ fflush(stdout); fflush(stderr); if (pmi_process.init_finalized == PMI_FINALIZED) { printf("PMI_Abort called after PMI_Finalize, error message:\n%s\n", error_msg); fflush(stdout);#ifdef HAVE_WINDOWS_H /* ExitProcess(exit_code); */ TerminateProcess(GetCurrentProcess(), exit_code);#else exit(exit_code); return PMI_FAIL;#endif } if (pmi_process.local_kvs) { if (smpd_process.verbose_abort_output) { printf("\njob aborted:\n"); printf("process: node: exit code: error message:\n");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -