📄 smpd_ipmi.c
字号:
/* -*- Mode: C; c-basic-offset:4 ; -*- *//* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */#include "ipmi.h"#ifdef HAVE_CTYPE_H#include <ctype.h>#endif/* pmiimpl.h */static int root_smpd(void *p);/* Define to prevent an smpd root thread or process from being created when there is only one process. *//* Currently, defining this prevents the use of the spawn command. *//*#define SINGLE_PROCESS_OPTIMIZATION*/#define PMI_MAX_KEY_LEN 256#define PMI_MAX_VALUE_LEN 8192#define PMI_MAX_KVS_NAME_LENGTH 100#define PMI_MAX_HOST_NAME_LENGTH 100#define PMI_MAX_STR_VAL_LENGTH 100#ifdef HAVE_WINDOWS_H #define PMII_PROCESS_HANDLE_TYPE HANDLE #define PMII_PROCESS_INVALID_HANDLE NULL#else #define PMII_PROCESS_HANDLE_TYPE int #define PMII_PROCESS_INVALID_HANDLE -1#endiftypedef enum {PMI_UNINITIALIZED, PMI_SINGLETON_INIT_BUT_NO_PM, PMI_SINGLETON_INIT_WITH_PM, PMI_INITIALIZED, PMI_FINALIZED} PMIState;/*#define PMI_INITIALIZED 0#define PMI_FINALIZED 1*/#define PMI_TRUE 1#define PMI_FALSE 0typedef struct pmi_process_t{ int rpmi;#ifdef HAVE_WINDOWS_H HANDLE hRootThread; HANDLE hRootThreadReadyEvent;#else int root_pid;#endif char root_host[100]; int root_port; int local_kvs; char kvs_name[PMI_MAX_KVS_NAME_LENGTH]; char domain_name[PMI_MAX_KVS_NAME_LENGTH]; MPIDU_Sock_t sock; MPIDU_Sock_set_t set; int iproc; int nproc; PMIState init_finalized; int smpd_id; MPIDU_SOCK_NATIVE_FD smpd_fd; int smpd_key; smpd_context_t *context; int clique_size; int *clique_ranks; char host[PMI_MAX_HOST_NAME_LENGTH]; int port; int appnum; PMII_PROCESS_HANDLE_TYPE singleton_mpiexec_fd;} pmi_process_t;/* global variables */static pmi_process_t pmi_process ={ PMI_FALSE, /* rpmi */#ifdef HAVE_WINDOWS_H NULL, /* root thread */ NULL, /* hRootThreadReadyEvent */#else 0, /* root pid */#endif "", /* root host */ 0, /* root port */ PMI_FALSE, /* local_kvs */ "", /* kvs_name */ "", /* domain_name */ MPIDU_SOCK_INVALID_SOCK, /* sock */ MPIDU_SOCK_INVALID_SET, /* set */ -1, /* iproc */ -1, /* nproc */ PMI_UNINITIALIZED, /* init_finalized */ -1, /* smpd_id */ 0, /* smpd_fd */ 0, /* smpd_key */ NULL, /* context */ 0, /* clique_size */ NULL, /* clique_ranks */ "", /* host */ -1, /* port */ 0, /* appnum */ PMII_PROCESS_INVALID_HANDLE /* singleton mpiexec proc handle/pid */};static int silence = 0;static int pmi_err_printf(char *str, ...){ int n=0; va_list list; if (!silence) { printf("[%d] ", pmi_process.iproc); va_start(list, str); n = vprintf(str, list); va_end(list); fflush(stdout); } return n;}static int pmi_mpi_err_printf(int mpi_errno, char *fmt, ... ){ int n; va_list list; /* convert the error code to a string */ printf("mpi_errno: %d\n", mpi_errno); printf("[%d] ", pmi_process.iproc); va_start(list, fmt); n = vprintf(fmt, list); va_end(list); fflush(stdout); MPIR_Err_return_comm(NULL, "", mpi_errno); return n;}static int pmi_create_post_command(const char *command, const char *name, const char *key, const char *value){ int result; smpd_command_t *cmd_ptr; int dest = 1; int add_id = 0; if (!pmi_process.rpmi) { if (strcmp(command, "done") == 0) { /* done commands go to the immediate smpd, not the root */ dest = pmi_process.smpd_id; } } if ((strcmp(command, "init") == 0) || (strcmp(command, "finalize") == 0)) { add_id = 1; dest = 0; } result = smpd_create_command((char*)command, pmi_process.smpd_id, dest, SMPD_TRUE, &cmd_ptr); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to create a %s command.\n", command); return PMI_FAIL; } result = smpd_add_command_int_arg(cmd_ptr, "ctx_key", pmi_process.smpd_key); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to add the key to the %s command.\n", command); return PMI_FAIL; } if (name != NULL) { result = smpd_add_command_arg(cmd_ptr, "name", (char*)name); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to add the kvs name('%s') to the %s command.\n", name, command); return PMI_FAIL; } } if (key != NULL) { result = smpd_add_command_arg(cmd_ptr, "key", (char*)key); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to add the key('%s') to the %s command.\n", key, command); return PMI_FAIL; } } if (value != NULL) { result = smpd_add_command_arg(cmd_ptr, "value", (char*)value); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to add the value('%s') to the %s command.\n", value, command); return PMI_FAIL; } } if (add_id) { result = smpd_add_command_int_arg(cmd_ptr, "node_id", pmi_process.smpd_id); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to add the node_id(%d) to the %s command.\n", pmi_process.smpd_id, command); return PMI_FAIL; } } /* post the write of the command */ /* printf("posting write of dbs command to %s context, sock %d: '%s'\n", smpd_get_context_str(pmi_process.context), MPIDU_Sock_getid(pmi_process.context->sock), cmd_ptr->cmd); fflush(stdout); */ /* If proc_info command add the proc_info args */ if(strcmp(command, "proc_info") == 0){ /* FIXME - Send the actual exe name */ result = smpd_add_command_arg(cmd_ptr, "c", "singleton_client"); if(result != SMPD_SUCCESS){ smpd_err_printf("Unable to add executable name to 'proc_info' cmd\n"); } result = smpd_add_command_int_arg(cmd_ptr, "i", pmi_process.iproc); if(result != SMPD_SUCCESS){ smpd_err_printf("Unable to add rank to 'proc_info' cmd\n"); } result = smpd_add_command_int_arg(cmd_ptr, "n", pmi_process.nproc); if(result != SMPD_SUCCESS){ smpd_err_printf("Unable to add nprocs to 'proc_info' cmd\n"); } result = smpd_add_command_int_arg(cmd_ptr, "s", smpd_process.is_singleton_client ? 1 : 0); if(result != SMPD_SUCCESS){ smpd_err_printf("Unable to add 'is_singleton_client' to 'proc_info' cmd\n"); }#ifndef HAVE_WINDOWS_H /* For non-windows systems send the PID in 'proc_info' */ /* FIXME: Can we send a pid_t as an int ? */ result = smpd_add_command_int_arg(cmd_ptr, "p", getpid()); if(result != SMPD_SUCCESS){ smpd_err_printf("Unable to add PID to 'proc_info' cmd \n"); }#endif } result = smpd_post_write_command(pmi_process.context, cmd_ptr); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to post a write of the %s command.\n", command); return PMI_FAIL; } if (strcmp(command, "done")) { /* and post a read for the result if it is not a done command */ result = smpd_post_read_command(pmi_process.context); if (result != SMPD_SUCCESS) { pmi_err_printf("unable to post a read of the next command on the pmi context.\n"); return PMI_FAIL; } } /* let the state machine send the command and receive the result */ result = smpd_enter_at_state(pmi_process.set, SMPD_WRITING_CMD); if (result != SMPD_SUCCESS) { pmi_err_printf("the state machine logic failed to get the result of the %s command.\n", command); return PMI_FAIL; } return PMI_SUCCESS;}static int uPMI_ConnectToHost(char *host, int port, smpd_state_t state){ int result; char error_msg[MPI_MAX_ERROR_STRING]; int len; /*printf("posting a connect to %s:%d\n", host, port);fflush(stdout);*/ result = smpd_create_context(SMPD_CONTEXT_PMI, pmi_process.set, MPIDU_SOCK_INVALID_SOCK/*pmi_process.sock*/, smpd_process.id, &pmi_process.context); if (result != SMPD_SUCCESS) { pmi_err_printf("PMI_ConnectToHost failed: unable to create a context to connect to %s:%d with.\n", host, port); return PMI_FAIL; } result = MPIDU_Sock_post_connect(pmi_process.set, pmi_process.context, host, port, &pmi_process.sock); if (result != MPI_SUCCESS) { printf("MPIDU_Sock_post_connect failed.\n");fflush(stdout); len = MPI_MAX_ERROR_STRING; PMPI_Error_string(result, error_msg, &len); pmi_err_printf("PMI_ConnectToHost failed: unable to post a connect to %s:%d, error: %s\n", host, port, error_msg); printf("uPMI_ConnectToHost returning PMI_FAIL\n");fflush(stdout); return PMI_FAIL; } pmi_process.context->sock = pmi_process.sock; pmi_process.context->state = state; result = smpd_enter_at_state(pmi_process.set, state); if (result != MPI_SUCCESS) { pmi_mpi_err_printf(result, "PMI_ConnectToHost failed: unable to connect to %s:%d.\n", host, port); return PMI_FAIL; } if (state == SMPD_CONNECTING_RPMI) { /* remote pmi processes receive their smpd_key when they connect to the smpd pmi server */ pmi_process.smpd_key = atoi(pmi_process.context->session); } return SMPD_SUCCESS;}/* Launch an instance of mpiexec which will connect to SMPD and start a PMI service. * This instance of mpiexec will connect back using the portNo specified in the "-port" option * and provide info about the new PMI service. */static PMII_PROCESS_HANDLE_TYPE launch_mpiexec_process(int portNo){#ifdef HAVE_WINDOWS_H#define PMII_MAX_MPIEXEC_CMD_STR_LENGTH 100 char progName[PMII_MAX_MPIEXEC_CMD_STR_LENGTH]; STARTUPINFO sInfo;
PROCESS_INFORMATION pInfo = { 0 };
ZeroMemory(&sInfo, sizeof(sInfo));
sInfo.cb = sizeof(sInfo);
ZeroMemory(&pInfo, sizeof(pInfo));
snprintf(progName, PMII_MAX_MPIEXEC_CMD_STR_LENGTH,
"mpiexec -pmiserver 1 -port %d", portNo);
if(!CreateProcess(NULL, progName, NULL, NULL, TRUE,
NORMAL_PRIORITY_CLASS, NULL, NULL, &sInfo, &pInfo)){
pmi_err_printf("Error creating mpiexec process...%d\n", GetLastError());
return PMII_PROCESS_INVALID_HANDLE;
}
return pInfo.hProcess;
#else#define PMII_MPIEXEC_CMDLINE_ARGV_SIZE 6 int pid, rc; char *mpiexecArgv[PMII_MPIEXEC_CMDLINE_ARGV_SIZE]; char port[16]; pid = fork(); if(pid < 0){ pmi_err_printf("Error creating mpiexec process...\n"); return PMII_PROCESS_INVALID_HANDLE; } else if(pid == 0){ MPIU_Snprintf(port, sizeof(port), "%d", portNo); mpiexecArgv[0] = "mpiexec"; mpiexecArgv[1] = "-pmiserver"; mpiexecArgv[2] = "1"; mpiexecArgv[3] = "-port"; mpiexecArgv[4] = port; mpiexecArgv[5] = NULL; rc = execvp(mpiexecArgv[0], mpiexecArgv); pmi_err_printf("Error Singinit execv'ing mpiexec failed\n"); return PMII_PROCESS_INVALID_HANDLE; } else{ return pid; }#endif}#define PMII_ERR_SETPRINTANDJUMP(msg, errcode) { pmi_err_printf("%s", msg); retval = errcode; goto fn_fail; }#define PMII_MAX_ERR_MSG_LENGTH 100static int PMIi_InitSingleton(void ){ MPIDU_Sock_set_t singleton_client_set; MPIDU_Sock_t singleton_client_sock; smpd_context_t *p_singleton_context=NULL; char err_msg[PMII_MAX_ERR_MSG_LENGTH]; int singleton_client_lport; int result, retval = PMI_SUCCESS; char rank_str[PMI_MAX_STR_VAL_LENGTH], size_str[PMI_MAX_STR_VAL_LENGTH]; char str[PMI_MAX_STR_VAL_LENGTH]; /* Enable singleton_init state machine tracing */ /* smpd_process.verbose = SMPD_TRUE; smpd_process.dbg_state |= SMPD_DBG_STATE_ERROUT | SMPD_DBG_STATE_STDOUT | SMPD_DBG_STATE_TRACE; */ result = MPIDU_Sock_create_set(&singleton_client_set); if(result != MPI_SUCCESS){ MPIU_Snprintf(err_msg, PMII_MAX_ERR_MSG_LENGTH, "MPIDU_Sock_create_set failed: unable to create a sock set, error: %d\n", result); PMII_ERR_SETPRINTANDJUMP(err_msg, result); } /* Assign an ephemeral port */ singleton_client_lport = 0; result = MPIDU_Sock_listen(singleton_client_set, NULL, &singleton_client_lport, &singleton_client_sock); if (result != MPI_SUCCESS){ MPIU_Snprintf(err_msg, PMII_MAX_ERR_MSG_LENGTH, "MPIDU_Sock_listen failed,\nsock error: %s\n", get_sock_error_string(result)); PMII_ERR_SETPRINTANDJUMP(err_msg, result); } result = smpd_create_context(SMPD_CONTEXT_SINGLETON_INIT_CLIENT, singleton_client_set, singleton_client_sock, -1, &p_singleton_context); if (result != SMPD_SUCCESS){ MPIU_Snprintf(err_msg, PMII_MAX_ERR_MSG_LENGTH, "smpd_create_context failed, error = %d\n", result); PMII_ERR_SETPRINTANDJUMP(err_msg, result); } result = MPIDU_Sock_set_user_ptr(singleton_client_sock, p_singleton_context); if (result != MPI_SUCCESS){ MPIU_Snprintf(err_msg, PMII_MAX_ERR_MSG_LENGTH, "MPIDU_Sock_set_user_ptr failed,\nsock error: %s\n", get_sock_error_string(result)); PMII_ERR_SETPRINTANDJUMP(err_msg, result); } p_singleton_context->state = SMPD_SINGLETON_CLIENT_LISTENING; /* Create an instance of mpiexec that will connect back and give us information about the PM to connect to */ pmi_process.singleton_mpiexec_fd = launch_mpiexec_process(singleton_client_lport); if(pmi_process.singleton_mpiexec_fd == PMII_PROCESS_INVALID_HANDLE){ result = -1; MPIU_Snprintf(err_msg, PMII_MAX_ERR_MSG_LENGTH, "launchMpiexecProcess failed\n"); PMII_ERR_SETPRINTANDJUMP(err_msg, result); } /* SMPD state machine will accept connection from mpiexec & get information about the PM */ result = smpd_enter_at_state(singleton_client_set, SMPD_SINGLETON_CLIENT_LISTENING); if (result != SMPD_SUCCESS) { MPIU_Snprintf(err_msg, PMII_MAX_ERR_MSG_LENGTH, "smpd state machine failed, error = %d\n", result); PMII_ERR_SETPRINTANDJUMP(err_msg, result); } /* SMPD state machine has set the PMI info for smpd_process */ /* Now we have PMI_KVS, PMI_HOST and PMI_PORT info */ if ((smpd_process.port > 0) && (strlen(smpd_process.host) > 0) && (strlen(smpd_process.kvs_name) > 0)){ strncpy(pmi_process.kvs_name, smpd_process.kvs_name, PMI_MAX_KVS_NAME_LENGTH); strncpy(pmi_process.domain_name, smpd_process.kvs_name, PMI_MAX_KVS_NAME_LENGTH); strncpy(smpd_process.domain_name, smpd_process.kvs_name, PMI_MAX_KVS_NAME_LENGTH); strncpy(pmi_process.host, smpd_process.host, PMI_MAX_HOST_NAME_LENGTH); strncpy(pmi_process.root_host, smpd_process.host, PMI_MAX_HOST_NAME_LENGTH); pmi_process.root_port = smpd_process.port; pmi_process.port = smpd_process.port; /* printf("Received:\nkvs_name = %s\nhost = %s\nport = %d\n", pmi_process.kvs_name, pmi_process.host, pmi_process.port); fflush(stdout); */ smpd_process.id = 1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -