📄 process.c
字号:
found, depending on various race conditions. Thus, we allow this case to happen without generating an error message */ /* Generate a debug message if we enter the handler but do not find a child */ DBG_EPRINTFCOND(MPIE_Debug && !foundChild, (stderr, "Did not find child process!\n") ); /* We need to reset errno since otherwise a system call being used in the main thread might see this errno and mistakenly decide that it suffered an error */ errno = 0; break; } foundChild = 1; /* Receives a child failure or exit. If *failure*, kill the others */ DBG_PRINTF(("Found process %d in sigchld handler\n", pid ) ); pState = MPIE_FindProcessByPid( pid ); if (pState) { ProcessStatus_t pstatus = pState->status; /* Original status */ MPIE_ProcessSetExitStatus( pState, prog_stat ); pState->status = PROCESS_GONE; /* If the exit wasn't NORMAL *AND* it didn't exit without finalize but never called PMI, invoke the OnAbend. */ if (pState->exitStatus.exitReason != EXIT_NORMAL && !(pState->exitStatus.exitReason == EXIT_NOFINALIZE && pstatus == PROCESS_ALIVE)) { /* Not a normal exit. We may want to abort all remaining processes */ DBG_PRINTF(("Calling OnAbend because exitReason was not normal (was %d)\n", pState->exitStatus.exitReason )); MPIE_OnAbend( &pUniv ); } /* Let the universe know that there are fewer processes */ pUniv.nLive--; if (pUniv.nLive == 0) { DBG_PRINTF(("All children have exited\n")); /* Invoke any special code for handling all processes exited (e.g., terminate a listener socket) */ if (pUniv.OnNone) { (*pUniv.OnNone)(); } } } else { /* Remember this process id and exit status for later */ DBG_PRINTF(("An unknown process (pid = %d) has exited\n", pid )); unexpectedExit[nUnexpected].pid = pid; unexpectedExit[nUnexpected].stat = prog_stat; } }#ifndef SA_RESETHAND /* If we can't clear the "reset handler bit", we must re-install the handler here */ MPIE_InstallSigHandler( SIGCHLD, handle_sigchild );#endif inHandler = 0;}/*@ MPIE_ProcessInit - Initialize the support for process creation Notes: The major chore of this routine is to set the 'SIGCHLD' signal handler @*/void MPIE_ProcessInit( void ){ MPIE_InstallSigHandler( SIGCHLD, handle_sigchild ); pUniv.worlds = 0; pUniv.nLive = 0; pUniv.OnNone = 0; pUniv.fromSingleton = 0;}/* * Wait upto timeout seconds for all processes to exit. * Because we are using a SIGCHLD handler to get the exit reason and * status from exiting children, this routine waits for those * signal handlers to return. (POSIX requires a SIGCHLD handler, and leaving * the signal handler in charge avoids race conditions and possible loss * of information). */int MPIE_WaitForProcesses( ProcessUniverse *pUniv, int timeout ){ ProcessWorld *world; ProcessApp *app; ProcessState *pState; int i, nactive; DBG_PRINTF(("Waiting for processes\n")); /* Determine the number of processes that we have left to wait on */ TimeoutInit( timeout ); nactive = 0; do { world = pUniv->worlds; while (world) { app = world->apps; while (app) { pState = app->pState; for (i=0; i<app->nProcess; i++) { if (pState[i].status != PROCESS_GONE && pState[i].pid > 0) nactive++; } app = app->nextApp; } world = world->nextWorld; } } while (nactive > 0 && TimeoutGetRemaining() > 0); DBG_PRINTF(("Done waiting for processes\n")); /* FIXME: Indicate whether all processes have exited. Then mpiexec programs can decide (probably based on a debugging flag) what to do if they have not all exited. */ return 0;}/* * Convert the ProcessList into an array of process states. * In the general case, * the mpiexec program will use a resource manager to provide this function; * the resource manager may use a list of host names or query a sophisticated * resource management system. Since the forker process manager runs all * processes on the same host, this function need only expand the * process list into a process table. * * * Updates the ProcessTable with the new processes, and updates the * number of processes. All processses are added to the end of the * current array. Only the "spec" part of the state element is initialized * * Return value is the number of processes added, or a negative value * if an error is encountered. * * We use a state array so that we can convert any plist into an array * of states. This allows use to use spawn during an mpiexec run. * * This routine could also check for inconsistent arguments, such as * a hostname that is not the calling host, or an architecture that does * not match the calling host's architecture. * * TODO: How do we handle the UNIVERSE_SIZE in this assignment (we * need at least one process from each appnum; that is, from each * requested set of processes. * *//*@ MPIE_InitWorldWithSoft - Initialize a process world from any soft specifications Input Parameter:. maxnp - The maximum number of processes to allow. Input/Output Parameter:. world - Process world. On return, the 'ProcessState' fields for any soft specifications have been initialized @*/int MPIE_InitWorldWithSoft( ProcessWorld *world, int maxnp ){ ProcessApp *app; int minNeeded, maxNeeded; int j; /* Compute the number of available processes */ maxnp -= world->nProcess; /* Compute the number of requested processes */ minNeeded = maxNeeded = 0; app = world->apps; while (app) { if (app->soft.nelm > 0 && app->nProcess == 0) { /* Found a soft spec */ for (j=0; j<app->soft.nelm; j++) { int *tuple, start, end, stride; tuple = app->soft.tuples[j]; start = tuple[0]; end = tuple[1]; stride = tuple[2]; if (stride > 0) { minNeeded += start; maxNeeded += start + stride * ( (start-end)/stride ); } else if (stride < 0) { minNeeded += start + stride * ( (end-start)/stride ); maxNeeded += start; } } } app = app->nextApp; } if (minNeeded > maxnp) { /* Requested more than there are available */ return 1; } if (maxNeeded > maxnp) { /* Must take fewer than the maximum. Take the minimum for now */ app = world->apps; while (app) { if (app->soft.nelm > 0 && app->nProcess == 0) { /* Found a soft spec */ for (j=0; j<app->soft.nelm; j++) { int *tuple, start, end, stride; tuple = app->soft.tuples[j]; start = tuple[0]; end = tuple[1]; stride = tuple[2]; if (stride > 0) { app->nProcess = start; } else if (stride < 0) { app->nProcess = start + stride * ( (end-start)/stride ); } } } app = app->nextApp; } /* If we wanted to get closer to the maximum number, we could iterative all stride to each set until we reached the limit. But this isn't necessary to conform to the standard */ } else { /* Take the maximum */ app = world->apps; while (app) { if (app->soft.nelm > 0 && app->nProcess == 0) { /* Found a soft spec */ for (j=0; j<app->soft.nelm; j++) { int *tuple, start, end, stride; tuple = app->soft.tuples[j]; start = tuple[0]; end = tuple[1]; stride = tuple[2]; /* Compute the "real" end */ if (stride > 0) { app->nProcess = start + stride * ( (start-end)/stride ); } else if (stride < 0) { app->nProcess = start; } } } app = app->nextApp; } } return 0;}/* ------------------------------------------------------------------------ *//* Routines to deliver signals to every process in a world *//* ------------------------------------------------------------------------ *//*@ MPIE_SignalWorld - Send a signal to every process in a world @*/int MPIE_SignalWorld( ProcessWorld *world, int signum ){ ProcessApp *app; ProcessState *pState; int np, i; app = world->apps; while (app) { pState = app->pState; np = app->nProcess; for (i=0; i<np; i++) { pid_t pid; pid = pState[i].pid; if (pid > 0 && pState[i].status != PROCESS_GONE) { /* Ignore error returns */ DBG_PRINTF(("Sending signal %d to pid %d\n",signum,pid)); kill( pid, signum ); } } app = app->nextApp; } return 0;} /* We use inKillWorld to avoid invoking KillWorld while within KillWorld. This could happen if kill world is called outside of the sigchild handler, which (as a result of the kill action) may invoke KillWorld if the */static int inKillWorld = 0;/*@ MPIE_KillWorld - Kill all of the processes in a world @*/int MPIE_KillWorld( ProcessWorld *world ){ if (inKillWorld) return 0; inKillWorld=1; DBG_PRINTF(("Entering KillWorld\n")); MPIE_SignalWorld( world, SIGINT ); /* We should wait here to give time for the processes to exit */ sleep( 1 ); MPIE_SignalWorld( world, SIGQUIT ); inKillWorld=0; return 0;}/*@ MPIE_KillUniverse - Kill all of the processes in a universe @*/int MPIE_KillUniverse( ProcessUniverse *pUniv ){ ProcessWorld *world; world = pUniv->worlds; while (world) { MPIE_KillWorld( world ); world = world->nextWorld; } return 0;}/* Print out the reasons for failure for any processes that did not exit cleanly */void MPIE_PrintFailureReasons( FILE *fp ){ int i; int rc, sig, order; ProcessExitState_t exitReason; ProcessWorld *world; ProcessApp *app; ProcessState *pState; int worldnum, wrank; world = pUniv.worlds; while (world) { worldnum = world->worldNum; app = world->apps; while (app) { pState = app->pState; for (i=0; i<app->nProcess; i++) { wrank = pState[i].wRank; rc = pState[i].exitStatus.exitStatus; sig = pState[i].exitStatus.exitSig; order = pState[i].exitStatus.exitOrder; exitReason = pState[i].exitStatus.exitReason; /* If signalled and we did not send the signal (INT or KILL)*/ if (sig && (exitReason != EXIT_KILLED || (sig != SIGKILL && sig != SIGINT))) {#ifdef HAVE_STRSIGNAL MPIU_Error_printf( "[%d]%d:Return code = %d, signaled with %s\n", worldnum, wrank, rc, strsignal(sig) );#else MPIU_Error_printf( "[%d]%d:Return code = %d, signaled with %d\n", worldnum, wrank, rc, sig );#endif } else if (MPIE_Debug || rc) { MPIU_Error_printf( "[%d]%d:Return code = %d\n", worldnum, wrank, rc ); } } app = app->nextApp; } world = world->nextWorld; }}/* */static void handle_forwardsig( int sig ){ ProcessWorld *world; world = pUniv.worlds; while (world) { MPIE_SignalWorld( world, sig ); world = world->nextWorld; } return;}int MPIE_ForwardSignal( int sig ){ MPIE_InstallSigHandler( sig, handle_forwardsig ); return 0;}/* * This routine contains the action to take on an abnormal exit from * a managed process. The normal action is to kill all of the other processes */static volatile int haveAbended = 0; int MPIE_OnAbend( ProcessUniverse *p ){ if (!p) p = &pUniv; /* Remember that we've abended (this allows an easy check outside of the signal handler that may invoke this) */ haveAbended = 1; MPIE_KillUniverse( p ); return 0;}int MPIE_HasAbended( void ){ return haveAbended;}int MPIE_ForwardCommonSignals( void ){ MPIE_ForwardSignal( SIGINT ); MPIE_ForwardSignal( SIGQUIT ); MPIE_ForwardSignal( SIGTERM );#ifdef SIGSTOP MPIE_ForwardSignal( SIGSTOP );#endif#ifdef SIGCONT MPIE_ForwardSignal( SIGCONT );#endif /* Do we want to forward usr1 and usr2? */ return 0;}/* Install a signal handler*/static void MPIE_InstallSigHandler( int sig, void (*handler)(int) ){#ifdef USE_SIGACTION struct sigaction oldact; /* Get the old signal action, reset the function and if possible turn off the reset-handler-to-default bit, then set the new handler */ sigaction( sig, (struct sigaction *)0, &oldact ); oldact.sa_handler = (void (*)(int))handler;#ifdef SA_RESETHAND /* Note that if this feature is not supported, there is a race condition in the handling of signals, and the OS is fundementally flawed */ oldact.sa_flags = oldact.sa_flags & ~(SA_RESETHAND);#endif sigaddset( &oldact.sa_mask, sig ); sigaction( sig, &oldact, (struct sigaction *)0 );#elif defined(USE_SIGNAL) /* Set new handler; ignore old choice */ (void)signal( sig, handler );#else /* No way to set up sigchld */#error "Unknown signal handling!"#endif}void MPIE_IgnoreSigPipe( void ){ MPIE_InstallSigHandler( SIGPIPE, SIG_IGN );}/* * Setup pUniv for a singleton init. That is a single pWorld with a * single app containing a single process. * * Note that MPIE_Args already allocated a pWorld. */int MPIE_SetupSingleton( ProcessUniverse *pUniv ){ ProcessApp *pApp; ProcessWorld *pWorld; ProcessState *pState; pWorld = &pUniv->worlds[0]; pWorld->nProcess = 1; pApp = (ProcessApp *) MPIU_Malloc( sizeof(ProcessApp) ); pApp->nextApp = 0; pWorld->nApps = 1; pWorld->apps = pApp; pApp->nProcess = 1; pApp->env = 0; pApp->exename = 0; pApp->arch = 0; pApp->path = 0; pApp->wdir = 0; pApp->hostname = 0; pApp->args = 0; pApp->nArgs = 0; pApp->myAppNum = 0; pState = (ProcessState *) MPIU_Malloc( sizeof(ProcessState) ); pApp->pState = pState; pState[0].app = pApp; pState[0].wRank = 0; pState[0].id = UniqId++; pState[0].initWithEnv = 0; pState[0].status = PROCESS_ALIVE; /* The process is already running */ pState[0].pid = pUniv->singletonPID; pState[0].exitStatus.exitReason = EXIT_NOTYET; return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -