📄 smpd_ipmi.c
字号:
/* post the done command and wait for the result */ result = pmi_create_post_command("done", NULL, NULL, NULL); if (result != PMI_SUCCESS) { pmi_err_printf("failed.\n"); goto fn_fail; } if (pmi_process.sock != MPIDU_SOCK_INVALID_SOCK) { result = MPIDU_Sock_finalize(); if (result != MPI_SUCCESS) { pmi_err_printf("MPIDU_Sock_finalize failed,\nsock error: %s\n", get_sock_error_string(result)); } } pmi_process.init_finalized = PMI_FINALIZED; /*printf("iPMI_Finalize success.\n");fflush(stdout);*/ return PMI_SUCCESS;fn_fail: /* set the state to finalized so PMI_Abort will not dereference mangled structures due to a failure */ pmi_process.init_finalized = PMI_FINALIZED; return PMI_FAIL;}int iPMI_Abort(int exit_code, const char error_msg[]){ int result; smpd_command_t *cmd_ptr; /* flush any output before aborting */ /* This doesn't work because it flushes output from the mpich dll but does not flush the main module's output */ fflush(stdout); fflush(stderr); if (pmi_process.init_finalized == PMI_FINALIZED) { printf("PMI_Abort called after PMI_Finalize, error message:\n%s\n", error_msg); fflush(stdout);#ifdef HAVE_WINDOWS_H ExitProcess(exit_code);#else exit(exit_code); return PMI_FAIL;#endif } if (pmi_process.local_kvs) { if (smpd_process.verbose_abort_output) { printf("\njob aborted:\n"); printf("process: node: exit code: error message:\n"); printf("0: localhost: %d", exit_code); if (error_msg != NULL) { printf(": %s", error_msg); } printf("\n"); } else { if (error_msg != NULL) { printf("%s\n", error_msg); } } fflush(stdout); smpd_dbs_finalize(); pmi_process.init_finalized = PMI_FINALIZED;#ifdef HAVE_WINDOWS_H ExitProcess(exit_code);#else exit(exit_code); return PMI_FAIL;#endif } result = smpd_create_command("abort_job", pmi_process.smpd_id, 0, SMPD_FALSE, &cmd_ptr); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to create an abort command.\n"); return PMI_FAIL; } result = smpd_add_command_arg(cmd_ptr, "name", pmi_process.kvs_name); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to add the kvs name('%s') to the abort command.\n", pmi_process.kvs_name); return PMI_FAIL; } result = smpd_add_command_int_arg(cmd_ptr, "rank", pmi_process.iproc); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to add the rank %d to the abort command.\n", pmi_process.iproc); return PMI_FAIL; } result = smpd_add_command_arg(cmd_ptr, "error", (char*)error_msg); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to add the error message('%s') to the abort command.\n", error_msg); return PMI_FAIL; } result = smpd_add_command_int_arg(cmd_ptr, "exit_code", exit_code); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to add the exit code(%d) to the abort command.\n", exit_code); return PMI_FAIL; } /* post the write of the command */ result = smpd_post_write_command(pmi_process.context, cmd_ptr); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to post a write of the abort command.\n"); return PMI_FAIL; } /* and post a read for the result */ /* result = smpd_post_read_command(pmi_process.context); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to post a read of the next command on the pmi context.\n"); return PMI_FAIL; } */ /* let the state machine send the command and receive the result */ result = smpd_enter_at_state(pmi_process.set, SMPD_WRITING_CMD); if (result != SMPD_SUCCESS) { pmi_err_printf("the state machine logic failed to handle the abort command.\n"); return PMI_FAIL; }#ifdef HAVE_WINDOWS_H ExitProcess(exit_code);#else exit(exit_code); return PMI_FAIL;#endif}int iPMI_Get_size(int *size){ if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_ERR_INIT; if (size == NULL) return PMI_ERR_INVALID_ARG; *size = pmi_process.nproc; return PMI_SUCCESS;}int iPMI_Get_rank(int *rank){ if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_ERR_INIT; if (rank == NULL) return PMI_ERR_INVALID_ARG; *rank = pmi_process.iproc; return PMI_SUCCESS;}int iPMI_Get_universe_size(int *size){ if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_ERR_INIT; if (size == NULL) return PMI_ERR_INVALID_ARG; *size = -1; return PMI_SUCCESS;}int iPMI_Get_appnum(int *appnum){ if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_ERR_INIT; if (appnum == NULL) return PMI_ERR_INVALID_ARG; *appnum = pmi_process.appnum; return PMI_SUCCESS;}int iPMI_Get_clique_size( int *size ){ if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_ERR_INIT; if (size == NULL) return PMI_ERR_INVALID_ARG; if (pmi_process.clique_size == 0) *size = 1; else *size = pmi_process.clique_size; return PMI_SUCCESS;}int iPMI_Get_clique_ranks( int ranks[], int length ){ int i; if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_ERR_INIT; if (ranks == NULL) return PMI_ERR_INVALID_ARG; if (length < pmi_process.clique_size) return PMI_ERR_INVALID_LENGTH; if (pmi_process.clique_size == 0) { *ranks = 0; } else { for (i=0; i<pmi_process.clique_size; i++) { ranks[i] = pmi_process.clique_ranks[i]; } } return PMI_SUCCESS;}int iPMI_Get_id( char id_str[], int length ){ return iPMI_KVS_Get_my_name(id_str, length);}int iPMI_Get_id_length_max(int *maxlen){ return iPMI_KVS_Get_name_length_max(maxlen);}int iPMI_Get_kvs_domain_id(char id_str[], int length){ if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_ERR_INIT; if (id_str == NULL) return PMI_ERR_INVALID_ARG; if (length < PMI_MAX_KVS_NAME_LENGTH) return PMI_ERR_INVALID_LENGTH; strncpy(id_str, pmi_process.domain_name, length); return PMI_SUCCESS;}int iPMI_Barrier(){ int result; char count_str[20]; char str[1024]; if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_ERR_INIT; if (pmi_process.nproc == 1) return PMI_SUCCESS; /*printf("entering barrier %d, %s\n", pmi_process.nproc, pmi_process.kvs_name);fflush(stdout);*/ /* encode the size of the barrier */ snprintf(count_str, 20, "%d", pmi_process.nproc); /* post the command and wait for the result */ result = pmi_create_post_command("barrier", pmi_process.kvs_name, NULL, count_str); if (result != PMI_SUCCESS) { pmi_err_printf("PMI_Barrier failed.\n"); return PMI_FAIL; } /* interpret the result */ if (MPIU_Str_get_string_arg(pmi_process.context->read_cmd.cmd, "result", str, 1024) != MPIU_STR_SUCCESS) { pmi_err_printf("PMI_Barrier failed: no result string in the result command.\n"); return PMI_FAIL; } if (strcmp(str, DBS_SUCCESS_STR)) { pmi_err_printf("PMI_Barrier failed: '%s'\n", str); return PMI_FAIL; } /*printf("iPMI_Barrier success.\n");fflush(stdout);*/ return PMI_SUCCESS;}int iPMI_KVS_Get_my_name(char kvsname[], int length){ if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_ERR_INIT; if (kvsname == NULL) return PMI_ERR_INVALID_ARG; if (length < PMI_MAX_KVS_NAME_LENGTH) return PMI_ERR_INVALID_LENGTH; strncpy(kvsname, pmi_process.kvs_name, length); /* printf("my kvs name is %s\n", kvsname);fflush(stdout); */ return PMI_SUCCESS;}int iPMI_KVS_Get_name_length_max(int *maxlen){ if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_ERR_INIT; if (maxlen == NULL) return PMI_ERR_INVALID_ARG; *maxlen = PMI_MAX_KVS_NAME_LENGTH; return PMI_SUCCESS;}int iPMI_KVS_Get_key_length_max(int *maxlen){ if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_ERR_INIT; if (maxlen == NULL) return PMI_ERR_INVALID_ARG; *maxlen = PMI_MAX_KEY_LEN; return PMI_SUCCESS;}int iPMI_KVS_Get_value_length_max(int *maxlen){ if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_ERR_INIT; if (maxlen == NULL) return PMI_ERR_INVALID_ARG; *maxlen = PMI_MAX_VALUE_LEN; return PMI_SUCCESS;}int iPMI_KVS_Create(char kvsname[], int length){ int result; char str[1024]; if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_ERR_INIT; if (kvsname == NULL) return PMI_ERR_INVALID_ARG; if (length < PMI_MAX_KVS_NAME_LENGTH) return PMI_ERR_INVALID_LENGTH; if (pmi_process.local_kvs) { result = smpd_dbs_create(kvsname); return (result == SMPD_SUCCESS) ? PMI_SUCCESS : PMI_FAIL; } result = pmi_create_post_command("dbcreate", NULL, NULL, NULL); if (result != PMI_SUCCESS) { pmi_err_printf("PMI_KVS_Create failed: unable to create a pmi kvs space.\n"); return PMI_FAIL; } /* parse the result of the command */ if (MPIU_Str_get_string_arg(pmi_process.context->read_cmd.cmd, "result", str, 1024) != MPIU_STR_SUCCESS) { pmi_err_printf("PMI_KVS_Create failed: no result string in the result command.\n"); return PMI_FAIL; } if (strcmp(str, DBS_SUCCESS_STR)) { pmi_err_printf("PMI_KVS_Create failed: %s\n", str); return PMI_FAIL; } if (MPIU_Str_get_string_arg(pmi_process.context->read_cmd.cmd, "name", str, 1024) != MPIU_STR_SUCCESS) { pmi_err_printf("PMI_KVS_Create failed: no kvs name in the dbcreate result command.\n"); return PMI_FAIL; } strncpy(kvsname, str, PMI_MAX_KVS_NAME_LENGTH); /*printf("iPMI_KVS_Create success.\n");fflush(stdout);*/ return PMI_SUCCESS;}int iPMI_KVS_Destroy(const char kvsname[]){ int result; char str[1024]; if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_ERR_INIT; if (kvsname == NULL) return PMI_ERR_INVALID_ARG; if (pmi_process.local_kvs) { result = smpd_dbs_destroy(kvsname); return (result == SMPD_SUCCESS) ? PMI_SUCCESS : PMI_FAIL; } result = pmi_create_post_command("dbdestroy", kvsname, NULL, NULL); if (result != PMI_SUCCESS) { pmi_err_printf("PMI_KVS_Destroy failed: unable to destroy the pmi kvs space named '%s'.\n", kvsname); return PMI_FAIL; } /* parse the result of the command */ if (MPIU_Str_get_string_arg(pmi_process.context->read_cmd.cmd, "result", str, 1024) != MPIU_STR_SUCCESS) { pmi_err_printf("PMI_KVS_Destroy failed: no result string in the result command.\n"); return PMI_FAIL; } if (strcmp(str, DBS_SUCCESS_STR)) { pmi_err_printf("PMI_KVS_Destroy failed: %s\n", str); return PMI_FAIL; } return PMI_SUCCESS;}int iPMI_KVS_Put(const char kvsname[], const char key[], const char value[]){ int result; char str[1024]; if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_ERR_INIT; if (kvsname == NULL) return PMI_ERR_INVALID_ARG; if (key == NULL) return PMI_ERR_INVALID_KEY; if (value == NULL) return PMI_ERR_INVALID_VAL; /*printf("putting <%s><%s><%s>\n", kvsname, key, value);fflush(stdout);*/ if (pmi_process.local_kvs) { result = smpd_dbs_put(kvsname, key, value); return (result == SMPD_SUCCESS) ? PMI_SUCCESS : PMI_FAIL; } result = pmi_create_post_command("dbput", kvsname, key, value); if (result != PMI_SUCCESS) { pmi_err_printf("PMI_KVS_Put failed: unable to put '%s:%s:%s'\n", kvsname, key, value); return PMI_FAIL; } /* parse the result of the command */ if (MPIU_Str_get_string_arg(pmi_process.context->read_cmd.cmd, "result", str, 1024) != MPIU_STR_SUCCESS) { pmi_err_printf("PMI_KVS_Put failed: no result string in the result command.\n"); return PMI_FAIL; } if (strcmp(str, DBS_SUCCESS_STR)) { pmi_err_printf("PMI_KVS_Put failed: '%s'\n", str); return PMI_FAIL; } /*printf("iPMI_KVS_Put success.\n");fflush(stdout);*/ return PMI_SUCCESS;}int iPMI_KVS_Commit(const char kvsname[]){ if (pmi_process.init_finalized == PMI_FINALIZED) return PMI_ERR_INIT; if (kvsname == NULL) return PMI_ERR_INVALID_ARG; if (pmi_process.local_kvs) { return PMI_SUCCESS; } /* Make the puts return when the commands are written but not acknowledged. Then have this function wait until all outstanding puts are acknowledged. */ return PMI_SUCCESS;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -