📄 smpd_launch_process.c
字号:
smpd_dbg_printf("env: PMI_SPAWN=%s\n", str); setenv("PMI_SPAWN", str, 1); sprintf(str, "%d", process->appnum); smpd_dbg_printf("env: PMI_APPNUM=%s\n", str); setenv("PMI_APPNUM", str, 1); sprintf(str, "%s", process->clique); smpd_dbg_printf("env: PMI_CLIQUE=%s\n", str); setenv("PMI_CLIQUE", str, 1); } set_environment_variables(process->env);#else pLastEnv = pPutEnv; env_count = 0; env_size = get_env_size(process->env, &env_count) + 1024; env_count += 10; pPutEnv = (char*)malloc(env_size * sizeof(char)); pEnvArray = (char**)malloc(env_count * sizeof(char*)); env_iter = pPutEnv; pEnvArray[0] = env_iter; env_iter += sprintf(env_iter, "PMI_RANK=%d", process->rank) + 1; pEnvArray[1] = env_iter; env_iter += sprintf(env_iter, "PMI_SIZE=%d", process->nproc) + 1; pEnvArray[2] = env_iter; env_iter += sprintf(env_iter, "PMI_KVS=%s", process->kvs_name) + 1; pEnvArray[3] = env_iter; env_iter += sprintf(env_iter, "PMI_DOMAIN=%s", process->domain_name) + 1; pEnvArray[4] = env_iter; env_iter += sprintf(env_iter, "PMI_SMPD_FD=%d", pmi_pipe_fds[1]) + 1; pEnvArray[5] = env_iter; env_iter += sprintf(env_iter, "PMI_SMPD_ID=%d", smpd_process.id) + 1; pEnvArray[6] = env_iter; env_iter += sprintf(env_iter, "PMI_SMPD_KEY=%d", process->id) + 1; pEnvArray[7] = env_iter; env_iter += sprintf(env_iter, "PMI_SPAWN=%d", process->spawned) + 1; pEnvArray[8] = env_iter; env_iter += sprintf(env_iter, "PMI_APPNUM=%d", process->appnum) + 1; pEnvArray[9] = env_iter; env_iter += sprintf(env_iter, "PMI_CLIQUE=%s", process->clique) + 1; add_environment_variables(env_iter, &pEnvArray[10], process->env); for (i=0; i<env_count; i++) { result = putenv(pEnvArray[i]); if (result != 0) { smpd_err_printf("putenv failed: %d\n", errno); } } if (pLastEnv != NULL) free(pLastEnv);#endif result = dup2(stdin_pipe_fds[0], 0); /* dup a new stdin */ if (result == -1) { smpd_err_printf("dup2 stdin failed: %d\n", errno); } close(stdin_pipe_fds[0]); close(stdin_pipe_fds[1]); result = dup2(stdout_pipe_fds[1], 1); /* dup a new stdout */ if (result == -1) { smpd_err_printf("dup2 stdout failed: %d\n", errno); } close(stdout_pipe_fds[0]); close(stdout_pipe_fds[1]); result = dup2(stderr_pipe_fds[1], 2); /* dup a new stderr */ if (result == -1) { smpd_err_printf("dup2 stderr failed: %d\n", errno); } close(stderr_pipe_fds[0]); close(stderr_pipe_fds[1]); if (process->pmi != NULL) { close(pmi_pipe_fds[0]); /* close the other end */ } /* change the working directory */ result = -1; if (process->dir[0] != '\0') result = chdir( process->dir ); if (result < 0) chdir( getenv( "HOME" ) ); /* reset the file mode creation mask */ umask(0); result = execvp( argv[0], argv ); result = errno; { char myhostname[SMPD_MAX_HOST_LENGTH]; smpd_get_hostname(myhostname, SMPD_MAX_HOST_LENGTH); snprintf(process->err_msg, SMPD_MAX_ERROR_LEN, "Unable to exec '%s' on %s. Error %d - %s\n", process->exe, myhostname, result, strerror(result)); /*sprintf(process->err_msg, "Error %d - %s", result, strerror(result));*/ } if (process->pmi != NULL) { /* create the abort command */ result = smpd_create_command("abort", smpd_process.id, 0, SMPD_FALSE, &cmd_ptr); if (result != SMPD_SUCCESS) { smpd_err_printf("unable to create an abort command in response to failed launch command: '%s'\n", process->exe); exit(-1); } /* launch process should provide a reason for the error, for now just return FAIL */ result = smpd_add_command_arg(cmd_ptr, "result", SMPD_FAIL_STR); if (result != SMPD_SUCCESS) { smpd_err_printf("unable to add the result field to the result command in response to launch command: '%s'\n", process->exe); exit(-1); } if (process->err_msg[0] != '\0') { result = smpd_add_command_arg(cmd_ptr, "error", process->err_msg); if (result != SMPD_SUCCESS) { smpd_err_printf("unable to add the error field to the abort command in response to failed launch command: '%s'\n", process->exe); exit(-1); } } /* send the result back */ smpd_package_command(cmd_ptr); result = write(pmi_pipe_fds[1], cmd_ptr->cmd_hdr_str, SMPD_CMD_HDR_LENGTH); if (result != SMPD_CMD_HDR_LENGTH) { smpd_err_printf("unable to write the abort command header in response to failed launch command: '%s'\n", process->exe); exit(-1); } result = write(pmi_pipe_fds[1], cmd_ptr->cmd, cmd_ptr->length); if (result != cmd_ptr->length) { smpd_err_printf("unable to write the abort command in response to failed launch command: '%s'\n", process->exe); exit(-1); } /* send a closed message on the pmi socket? */ } exit(result); } /* parent process */ process->pid = pid; close(stdin_pipe_fds[0]); close(stdout_pipe_fds[1]); close(stderr_pipe_fds[1]); if (process->pmi != NULL) { close(pmi_pipe_fds[1]); } /* make sock structures out of the sockets */ result = MPIDU_Sock_native_to_sock(set, stdin_pipe_fds[1], NULL, &sock_in); if (result != MPI_SUCCESS) { smpd_err_printf("MPIDU_Sock_native_to_sock failed, error %s\n", get_sock_error_string(result)); } result = MPIDU_Sock_native_to_sock(set, stdout_pipe_fds[0], NULL, &sock_out); if (result != MPI_SUCCESS) { smpd_err_printf("MPIDU_Sock_native_to_sock failed, error %s\n", get_sock_error_string(result)); } result = MPIDU_Sock_native_to_sock(set, stderr_pipe_fds[0], NULL, &sock_err); if (result != MPI_SUCCESS) { smpd_err_printf("MPIDU_Sock_native_to_sock failed, error %s\n", get_sock_error_string(result)); } if (process->pmi != NULL) { result = MPIDU_Sock_native_to_sock(set, pmi_pipe_fds[0], NULL, &sock_pmi); if (result != MPI_SUCCESS) { smpd_err_printf("MPIDU_Sock_native_to_sock failed, error %s\n", get_sock_error_string(result)); } } process->in->sock = sock_in; process->out->sock = sock_out; process->err->sock = sock_err; if (process->pmi != NULL) { process->pmi->sock = sock_pmi; } process->pid = process->in->id = process->out->id = process->err->id = pid; result = MPIDU_Sock_set_user_ptr(sock_in, process->in); if (result != MPI_SUCCESS) { smpd_err_printf("MPIDU_Sock_set_user_ptr failed, error %s\n", get_sock_error_string(result)); } result = MPIDU_Sock_set_user_ptr(sock_out, process->out); if (result != MPI_SUCCESS) { smpd_err_printf("MPIDU_Sock_set_user_ptr failed, error %s\n", get_sock_error_string(result)); } result = MPIDU_Sock_set_user_ptr(sock_err, process->err); if (result != MPI_SUCCESS) { smpd_err_printf("MPIDU_Sock_set_user_ptr failed, error %s\n", get_sock_error_string(result)); } if (process->pmi != NULL) { result = MPIDU_Sock_set_user_ptr(sock_pmi, process->pmi); if (result != MPI_SUCCESS) { smpd_err_printf("MPIDU_Sock_set_user_ptr failed, error %s\n", get_sock_error_string(result)); } } process->context_refcount = (process->pmi != NULL) ? 3 : 2; process->out->read_state = SMPD_READING_STDOUT; result = MPIDU_Sock_post_read(sock_out, process->out->read_cmd.cmd, 1, 1, NULL); if (result != MPI_SUCCESS) { smpd_err_printf("posting first read from stdout context failed, sock error: %s\n", get_sock_error_string(result)); smpd_exit_fn(FCNAME); return SMPD_FAIL; } process->err->read_state = SMPD_READING_STDERR; result = MPIDU_Sock_post_read(sock_err, process->err->read_cmd.cmd, 1, 1, NULL); if (result != MPI_SUCCESS) { smpd_err_printf("posting first read from stderr context failed, sock error: %s\n", get_sock_error_string(result)); smpd_exit_fn(FCNAME); return SMPD_FAIL; } if (process->pmi != NULL) { result = smpd_post_read_command(process->pmi); if (result != SMPD_SUCCESS) { smpd_err_printf("unable to post a read of the first command on the pmi control context.\n"); smpd_exit_fn(FCNAME); return SMPD_FAIL; } } process->wait = process->in->wait = process->out->wait = process->err->wait = pid; smpd_exit_fn(FCNAME); return SMPD_SUCCESS;}#endif#undef FCNAME#define FCNAME "smpd_wait_process"int smpd_wait_process(smpd_pwait_t wait, int *exit_code_ptr){#ifdef HAVE_WINDOWS_H int result; DWORD exit_code; smpd_enter_fn(FCNAME); if (wait.hProcess == INVALID_HANDLE_VALUE || wait.hProcess == NULL) { smpd_dbg_printf("No process to wait for.\n"); *exit_code_ptr = -1; smpd_exit_fn(FCNAME); return SMPD_SUCCESS; } if (WaitForSingleObject(wait.hProcess, INFINITE) != WAIT_OBJECT_0) { smpd_err_printf("WaitForSingleObject failed, error %d\n", GetLastError()); *exit_code_ptr = -1; smpd_exit_fn(FCNAME); return SMPD_FAIL; } result = GetExitCodeProcess(wait.hProcess, &exit_code); if (!result) { smpd_err_printf("GetExitCodeProcess failed, error %d\n", GetLastError()); *exit_code_ptr = -1; smpd_exit_fn(FCNAME); return SMPD_FAIL; } CloseHandle(wait.hProcess); CloseHandle(wait.hThread); *exit_code_ptr = exit_code; smpd_exit_fn(FCNAME); return SMPD_SUCCESS;#else int status; smpd_pwait_t result; smpd_enter_fn(FCNAME); smpd_dbg_printf("waiting for process %d\n", wait); result = -1; while (result == -1) { result = waitpid(wait, &status, WUNTRACED); if (result == -1) { switch (errno) { case EINTR: break; case ECHILD:#ifdef USE_PTHREAD_STDIN_REDIRECTION /* On the Macs where stdout/err redirection hangs a SIGCHLD * handler has been set up so ignore ECHILD errors. */#else smpd_err_printf("waitpid(%d) returned ECHILD\n", wait); *exit_code_ptr = -10;#endif smpd_exit_fn(FCNAME); return SMPD_SUCCESS; break; case EINVAL: smpd_err_printf("waitpid(%d) returned EINVAL\n", wait); *exit_code_ptr = -11; smpd_exit_fn(FCNAME); return SMPD_SUCCESS; break; default: smpd_err_printf("waitpid(%d) returned %d\n", wait, errno); *exit_code_ptr = -12; smpd_exit_fn(FCNAME); return SMPD_SUCCESS; break; } } } if (WIFEXITED(status)) { *exit_code_ptr = WEXITSTATUS(status); } else { smpd_err_printf("WIFEXITED(%d) failed, setting exit code to -1\n", wait); *exit_code_ptr = -1; if (WIFSIGNALED(status)) { *exit_code_ptr = -2; } if (WIFSTOPPED(status)) { *exit_code_ptr = -3; }#ifdef WCOREDUMP if (WCOREDUMP(status)) { *exit_code_ptr = -4; }#endif } smpd_exit_fn(FCNAME); return SMPD_SUCCESS;#endif}#undef FCNAME#define FCNAME "smpd_suspend_process"int smpd_suspend_process(smpd_process_t *process){#ifdef HAVE_WINDOWS_H int result = SMPD_SUCCESS; smpd_enter_fn(FCNAME); if (SuspendThread(process->wait.hThread) == -1) { result = GetLastError(); smpd_err_printf("SuspendThread failed with error %d for process %d:%s:'%s'\n", result, process->rank, process->kvs_name, process->exe); } smpd_exit_fn(FCNAME); return result;#else smpd_enter_fn(FCNAME); smpd_dbg_printf("stopping process %d\n", process->wait); kill(process->wait, SIGSTOP); smpd_exit_fn(FCNAME); return SMPD_SUCCESS;#endif}#ifdef HAVE_WINDOWS_Hstatic BOOL SafeTerminateProcess(HANDLE hProcess, UINT uExitCode){ DWORD dwTID, dwCode, dwErr = 0; HANDLE hProcessDup = INVALID_HANDLE_VALUE; HANDLE hRT = NULL; HINSTANCE hKernel = GetModuleHandle("Kernel32"); BOOL bSuccess = FALSE; BOOL bDup = DuplicateHandle(GetCurrentProcess(), hProcess, GetCurrentProcess(), &hProcessDup, PROCESS_ALL_ACCESS, FALSE, 0); if (GetExitCodeProcess((bDup) ? hProcessDup : hProcess, &dwCode) && (dwCode == STILL_ACTIVE)) { FARPROC pfnExitProc; pfnExitProc = GetProcAddress(hKernel, "ExitProcess"); if (pfnExitProc) { hRT = CreateRemoteThread((bDup) ? hProcessDup : hProcess, NULL, 0, /* This relies on the probability that Kernel32.dll is mapped to the same place on all processes If it gets relocated, this function will produce spurious results */ (LPTHREAD_START_ROUTINE)pfnExitProc, UintToPtr(uExitCode)/*(LPVOID)uExitCode*/, 0, &dwTID); } if (hRT == NULL) dwErr = GetLastError(); } else { dwErr = ERROR_PROCESS_ABORTED; } if (hRT) { if (WaitForSingleObject((bDup) ? hProcessDup : hProcess, 30000) == WAIT_OBJECT_0) bSuccess = TRUE; else { dwErr = ERROR_TIMEOUT; bSuccess = FALSE; } CloseHandle(hRT); } if (bDup) CloseHandle(hProcessDup); if (!bSuccess) SetLastError(dwErr); return bSuccess;}#endif#undef FCNAME#define FCNAME "smpd_kill_process"int smpd_kill_process(smpd_process_t *process, int exit_code){#ifdef HAVE_WINDOWS_H smpd_enter_fn(FCNAME); smpd_process_from_registry(process); if (!SafeTerminateProcess(process->wait.hProcess, exit_code)){ if (GetLastError() != ERROR_PROCESS_ABORTED){ TerminateProcess(process->wait.h
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -