📄 mpiexec.c
字号:
fdtable[i].read = 0; fdtable[i].write = 0; fdtable[i].file = NULL; fdtable[i].handler = NOTSET; fdtable[i].name[0] = '\0'; if (i > maxfdentryInUse) maxfdentryInUse = i; return( i );}/* Fork numprocs processes, with the given PMI groupid and kvsname */void forkProcesses( int numprocs, char execname[], char *client_arg[], char *envp[], char wdirname[], int groupid, char kvsname[] ){ int i, j, idx, pid, rc; int client_pipe_fds[2]; for ( i = 0; i < numprocs; i++ ) { /* set up pipe to client for use by pmi */ socketpair( AF_UNIX, SOCK_STREAM, 0, client_pipe_fds ); idx = allocate_fdentry( ); fdtable[idx].fd = client_pipe_fds[0]; fdtable[idx].group = groupid; fdtable[idx].rank = i; /* pid set below, after fork */ fdtable[idx].state = UNKNOWN; fdtable[idx].read = 1; fdtable[idx].write = 0; fdtable[idx].file = NULL; fdtable[idx].handler = CLIENT; strncpy( fdtable[idx].name, "client", MAXNAMELEN ); strncpy( fdtable[idx].kvsname, kvsname, MAXNAMELEN ); pid = fork( ); if ( pid < 0 ) { fprintf( stderr, "mpiexec fork failed\n" ); KillChildren( ); exit( -1 ); } if ( pid == 0 ) { /*********************************** child, client */ char env_pmi_fd[MAXNAMELEN]; char env_pmi_rank[MAXNAMELEN]; char env_pmi_size[MAXNAMELEN]; char env_pmi_debug[MAXNAMELEN]; char *client_env[MAX_CLIENT_ENV]; /* Check to see if we should be traced */ CheckIfTraced( ); close( client_pipe_fds[0] ); /* build environment for client */ for ( j = 0; envp[j] && j < MAX_CLIENT_ENV-5; j++ ) client_env[j] = envp[j]; /* copy mpiexec environment */ if (j == MAX_CLIENT_ENV-5) { fprintf( stderr, "environment is too large (max is %d)\n", MAX_CLIENT_ENV-5); exit(-1); } snprintf( env_pmi_fd, MAXNAMELEN, "PMI_FD=%d" , client_pipe_fds[1] ); client_env[j++] = env_pmi_fd; snprintf( env_pmi_rank, MAXNAMELEN, "PMI_RANK=%d", i ); client_env[j++] = env_pmi_rank; snprintf( env_pmi_size, MAXNAMELEN, "PMI_SIZE=%d", numprocs ); client_env[j++] = env_pmi_size; snprintf( env_pmi_debug, MAXNAMELEN, "PMI_DEBUG=%d", debug ); client_env[j++] = env_pmi_debug; client_env[j] = NULL; for ( j = 0; client_env[j]; j++ ) if (putenv( client_env[j] )) { perror( "Could not set environment" ); exit( -1 ); } /* change working directory if specified, replace argv[0], and exec client */ rc = chdir( wdirname ); if (rc < 0) { /* We need an error message here */ chdir( getenv( "HOME" ) ); } client_arg[0] = execname; /* pathname argument should be used here */ rc = execvp( execname, client_arg ); if ( rc < 0 ) { fprintf( stderr, "mpiexec could not exec %s\n", execname ); perror( "Reason:" ); exit( -1 ); } } else { /********************************** parent, mpiexec */ fdtable[idx].pid = pid; close( client_pipe_fds[1] ); } } /* end of forking loop */}/* * This routine can be called to handle the result of a wait. * This is in a separate routine so that it can be used anywhere * waitpid or wait are called. */void HandleWaitStatus( pid_t pid, int client_stat, exit_state_t sigstate, int has_finalized ) { /* Get the status of the exited process */ if (WIFEXITED(client_stat)) { /* true if the process exited normally */ exitstatus[num_exited].rc = WEXITSTATUS(client_stat); } else { exitstatus[num_exited].rc = -1; /* For unknown, since valid returns in 0-255 */ } if (WIFSIGNALED(client_stat)) { exitstatus[num_exited].sig = WTERMSIG(client_stat); exitstatus[num_exited].exit_state = sigstate; num_aborted++; } else { exitstatus[num_exited].sig= 0; exitstatus[num_exited].exit_state = has_finalized ? NORMAL : NOFINALIZE; }}/* * Wait on the process in fdentry[idx]. Do a blocking wait if * requested. If sigstate is not "NORMAL", set the exit state for * the process to this value if it exits with a signal. This is used * to separate processes that died because mpiexec sent them a signal * from processes that died because they received a signal from a * different source (e.g., SIGFPE or SIGSEGV) */int waitOnProcess( int idx, int blocking, exit_state_t sigstate ){ int client_stat, rc, has_finalized; pid_t pid; /* Careful here: we may want to use WNOHANG; wait a little, then do something like kill the process */ if (debug) { fprintf( stderr, "Waiting on status of process %d\n", fdtable[idx].pid ); fflush( stderr ); } pid = fdtable[idx].pid; if (pid <= 0) return -1; if (blocking) rc = waitpid( pid, &client_stat, 0 ); else { rc = waitpid( pid, &client_stat, WNOHANG ); if (rc == 0) return 0; } if (rc < 0) { perror( "Error waiting for process!" ); return 0; } if (debug) { fprintf( stderr, "Wait on %d completed\n", fdtable[idx].pid ); fflush( stderr ); } has_finalized = fdtable[idx].state == FINALIZED; HandleWaitStatus( pid, client_stat, sigstate, has_finalized ); num_exited++; return 0;}/* * Process input from the socket connecting the mpiexec process to the * child process */int handle_input_fd ( int idx ){ int all_done = 0; int rc; char inbuf[PMIU_MAXLINE], outbuf[PMIU_MAXLINE], cmd[MAXNAMELEN]; if ( fdtable[idx].handler == CLIENT ) { /* printf( "handling client input for rank %d\n", fdtable[idx].rank ); */ if ( ( rc = PMIU_readline( fdtable[idx].fd, inbuf, PMIU_MAXLINE ) ) > 0 ) { PMIU_parse_keyvals( inbuf ); PMIU_getval( "cmd", cmd, MAXNAMELEN ); if ( strncmp( cmd, "barrier_in", MAXNAMELEN ) == 0 ) { fPMI_Handle_barrier( idx ); } else if ( strncmp( cmd, "finalize", MAXNAMELEN ) == 0 ) { fdtable[idx].state = FINALIZED; } else if ( strncmp( cmd, "abort", MAXNAMELEN ) == 0 ) { /* No PMI abort command has yet been implemented! */ KillChildren(); all_done = 1; } else if ( strncmp( cmd, "get_my_kvsname", MAXNAMELEN ) == 0 ) { fPMI_Handle_get_my_kvsname( idx ); } else if ( strncmp( cmd, "get_maxes", MAXNAMELEN ) == 0 ) { fPMI_Handle_get_maxes( idx ); } else if ( strncmp( cmd, "create_kvs", MAXNAMELEN ) == 0 ) { fPMI_Handle_create_kvs( idx ); } else if ( strncmp( cmd, "destroy_kvs", MAXNAMELEN ) == 0 ) { fPMI_Handle_destroy_kvs( idx ); } else if ( strncmp( cmd, "put", MAXNAMELEN ) == 0 ) { fPMI_Handle_put( idx ); } else if ( strncmp( cmd, "get", MAXNAMELEN ) == 0 ) { fPMI_Handle_get( idx ); } else if ( strncmp( cmd, "getbyidx", MAXNAMELEN ) == 0 ) { fPMI_Handle_getbyidx( idx ); } else { PMIU_printf( 1, "unknown cmd %s\n", cmd ); } } else { /* lost contact with client */ close( fdtable[idx].fd ); fdtable[idx].active = 0; rc = waitOnProcess( idx, 1, NORMAL ); if (rc) { fprintf( stderr, "Error waiting on process %d\n", fdtable[idx].pid ); } if (fdtable[idx].state != FINALIZED) { /* Process exited before finalize */ KillChildren(); all_done = 1; } if ( num_exited == numprocs ) { /* Set the global done flag */ all_done = 1; } } } else { fprintf( stderr, "unknown handler %d for fdtable entry %d\n", fdtable[idx].handler, idx); } return all_done;}#ifdef USE_SIGCHLD_HANDLER/* * Signal handler. Detect a SIGCHLD exit. The routines are * * setup_sigchild - Call to install the signal handler * handle_sigchild - This is the signal handler * * If a child exits with a non-zero return code, we may want to kill * the other children. In most cases, we'll want to kill the other * children if a child dies on a signal. * Sometimes we do *not* want to kill the children; particularly when * we are debugging. * */#if defined(USE_SIGNAL) || defined(USE_SIGACTION)#include <signal.h>#else#error no signal choice#endifint handle_sigchild( int sig ){ int prog_stat, pid, rc, sigval, i; if (debug) { fprintf( stderr, "Waiting for any child on signal\n" ); fflush( stderr ); } pid = waitpid( (pid_t)(-1), &prog_stat, WNOHANG ); if (pid > 0) { /* Receives a child failure or exit. If *failure*, kill the others */ if (debug) { fprintf( stderr, "Found process %d\n", pid ); fflush( stderr ); } rc = 0; if (WIFEXITED(prog_stat)) { rc = WEXITSTATUS(prog_stat); } sigval = 0; if (WIFSIGNALED(prog_stat)) { sigval = WTERMSIG(prog_stat); } if (sigval || rc) { /* Look up this pid in the exitstatus */ for (i=0; i<maxfdentryInUse ; i++) { if (fdtable[i].pid == pid) { if (debug) { fprintf( stderr, "Found process %d\n", pid ); fflush( stderr ); } fdtable[i].active = 0; exitstatus[i].rc = rc; exitstatus[i].sig = sigval; break; } } if (i == numprocs) { /* Did not find the matching pid */ ; } if (killOnAbort) KillChildren(); } } else { if (debug) { fprintf( stderr, "Did not find child process!\n" ); fflush( stderr ); } }}#ifdef USE_SIGACTIONvoid setup_sigchild( void ){ struct sigaction oldact; /* Get the old signal action, reset the function and if possible turn off the reset-handler-to-default bit, then set the new handler */ sigaction( SIGCHLD, (struct sigaction *)0, &oldact ); oldact.sa_handler = handle_sigchild;#ifdef SA_RESETHAND /* Note that if this feature is not supported, there is a race condition in the handling of signals, and the OS is fundementally flawed */ oldact.sa_flags = oldact.sa_flags & ~(SA_RESETHAND);#endif sigaddset( &oldact.sa_mask, SIGCHLD ); sigaction( SIGCHLD, &oldact, (struct sigaction *)0 );}#elif defined(USE_SIGNAL)void setup_sigchild( void ){ /* Set new handler; ignore old choice */ (void)signal( SIGCHLD, handle_sigchild );}#elsevoid setup_sigchild( void ){}#endif#endif/* Send a given signal to all processes */void SignalAllProcesses( int sig, const char msg[] ){ int i, rc; pid_t pid; for (i=0; i<=maxfdentryInUse; i++) { if (fdtable[i].active) { pid = fdtable[i].pid; if (pid > 0) { if (debug) { printf( "sig %d to %d\n", sig, pid ); fflush( stdout ); } rc = kill( pid, sig ); if (rc) { /* Check for errors */ if (errno != ESRCH) { perror( msg ); } } } } }}/* * Kill all processes. This is called when (a) a child dies with a non-zero * error code or with a signal *and* (b) the "kill-on-failure" feature is * selected (on by default). */void KillChildren( void ){ int i, pid, rc; /* fprintf( stderr, "Entering kill children\n" ); */ /* Indicate within KillChildren */ if (inKillChildren) return; inKillChildren = 1; /* Loop through the processes and try to kill them; gently first, * then brutally */ KillTracedProcesses( ); SignalAllProcesses( SIGINT, "Could not kill with sigint" ); /* We should wait here to give time for the processes to exit */ sleep( 1 ); SignalAllProcesses( SIGQUIT, "Could not kill with sigquit" ); /* Try to wait for the processes */ for (i=0; i<=maxfdentryInUse; i++) { if (fdtable[i].active) { pid = fdtable[i].pid; if (pid > 0) { if (debug) { printf( "Wait on %d\n", pid ); fflush( stdout ); } /* Nonblocking wait */ rc = waitOnProcess( i, 0, KILLED ); } } }}/* * Determine the exit status to return from mpiexec. The rule is: * * 1. If all processes exited normally (exit_state == NORMAL), then * return the max of the exit statuses * 2. If any process did not exit normally (but was not killed by mpiexec) * return the value */int GetExitStatus( void ){ int i, rc; /* If all exited normally, return the max of exitstatus */ rc = 0; for (i=0; i<num_exited; i++) { if (exitstatus[i].exit_state == NORMAL) { if (exitstatus[i].rc > rc) rc = exitstatus[i].rc; } else { break; } } if (i == num_exited) return rc; /* Abnormal exit. Look for status on any process that died */ for (i=0; i<num_exited; i++) { if (exitstatus[i].exit_state != NORMAL && exitstatus[i].exit_state != KILLED) { rc = exitstatus[i].rc; if (rc > 0) return rc; } } /* All processes gave 0 return codes, but some process exited abnormally. Return a non-zero code */ return -1;}/* * Report on the status of the complete processes. * * We could us an arg to select either a particular process or all processes */void PrintExitStatus( void ){ int i, rc, sig; /* fprintf( stderr, "%d processes aborted\n", num_aborted ); */ for (i=0; i<num_exited; i++) { rc = exitstatus[i].rc; sig = exitstatus[i].sig; if (sig && exitstatus[i].exit_state != KILLED) {#ifdef HAVE_STRSIGNAL fprintf( stderr, "Return code = %d, signaled with %s\n", rc, strsignal(sig) );#else fprintf( stderr, "Return code = %d, signaled with %d\n", rc, sig );#endif } else if (debug) { fprintf( stderr, "Return code = %d\n", rc ); } fflush( stderr ); }}/* ------------------------------------------------------------------------- * The following routines implement the PMI interface. They * manage the key-value space (kvs) and the process groups. * * ------------------------------------------------------------------------- */#define MAXGROUPS 256 /* max number of groups */#define MAXKEYLEN 64 /* max length of key in keyval space */#define MAXVALLEN 128 /* max length of value in keyval space */#define MAXPAIRS 1024 /* max number of pairs in a keyval space */#define MAXKVSS 16 /* max number of keyval spaces *//* * The following structures and arrays are used to implement the PMI
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -