📄 mpiexec.c

📁 刚才是说明现在是安装程序在 LINUX环境下进行编程的MPICH安装文件
💻 C
📖 第 1 页 / 共 3 页
字号:
    fdtable[i].read	= 0;    fdtable[i].write	= 0;    fdtable[i].file	= NULL;    fdtable[i].handler	= NOTSET;    fdtable[i].name[0]  = '\0';    if (i > maxfdentryInUse)	maxfdentryInUse = i;    return( i );}/* Fork numprocs processes, with the given PMI groupid and kvsname */void forkProcesses( int numprocs, char execname[], char *client_arg[], 		    char *envp[], char wdirname[], 		    int groupid, char kvsname[] ){    int i, j, idx, pid, rc;    int  client_pipe_fds[2];    for ( i = 0; i < numprocs; i++ ) {	/* set up pipe to client for use by pmi */	socketpair( AF_UNIX, SOCK_STREAM, 0, client_pipe_fds );	idx		     = allocate_fdentry( );	fdtable[idx].fd	     = client_pipe_fds[0];	fdtable[idx].group   = groupid;	fdtable[idx].rank    = i; /* pid set below, after fork */	fdtable[idx].state   = UNKNOWN;	fdtable[idx].read    = 1;	fdtable[idx].write   = 0;	fdtable[idx].file    = NULL;	fdtable[idx].handler = CLIENT;	strncpy( fdtable[idx].name, "client", MAXNAMELEN ); 	strncpy( fdtable[idx].kvsname, kvsname, MAXNAMELEN );	pid = fork( );	if ( pid < 0 ) {	    fprintf( stderr, "mpiexec fork failed\n" );	    KillChildren( );	    exit( -1 );	}	if ( pid == 0 ) {	/***********************************  child, client */	    char env_pmi_fd[MAXNAMELEN];	    char env_pmi_rank[MAXNAMELEN];	    char env_pmi_size[MAXNAMELEN];	    char env_pmi_debug[MAXNAMELEN];	    char *client_env[MAX_CLIENT_ENV];	    /* Check to see if we should be traced */	    CheckIfTraced( );	    close( client_pipe_fds[0] );	    /* build environment for client */	    for ( j = 0; envp[j] && j < MAX_CLIENT_ENV-5; j++ )		client_env[j] = envp[j]; /* copy mpiexec environment */	    if (j == MAX_CLIENT_ENV-5) {		fprintf( stderr, "environment is too large (max is %d)\n",			 MAX_CLIENT_ENV-5);		exit(-1);	    }	    snprintf( env_pmi_fd, MAXNAMELEN, "PMI_FD=%d" , client_pipe_fds[1] );	    client_env[j++] = env_pmi_fd;	    snprintf( env_pmi_rank, MAXNAMELEN, "PMI_RANK=%d", i );	    client_env[j++] = env_pmi_rank;	    snprintf( env_pmi_size, MAXNAMELEN, "PMI_SIZE=%d", numprocs );	    client_env[j++] = env_pmi_size;	    snprintf( env_pmi_debug, MAXNAMELEN, "PMI_DEBUG=%d", debug );	    client_env[j++] = env_pmi_debug;	    client_env[j] = NULL;	    for ( j = 0; client_env[j]; j++ )		if (putenv( client_env[j] )) {		    perror( "Could not set environment" );		    exit( -1 );		}	    /* change working directory if specified, replace argv[0], and exec client */	    rc = chdir( wdirname );	    if (rc < 0) {		/* We need an error message here */		chdir( getenv( "HOME" ) );	    }	    client_arg[0] = execname;	    /* pathname argument should be used here */	    rc = execvp( execname, client_arg );	    if ( rc < 0 ) {		fprintf( stderr, "mpiexec could not exec %s\n", execname );		perror( "Reason:" );		exit( -1 );	    }	}	else {			/********************************** parent, mpiexec */	    fdtable[idx].pid = pid;	    close( client_pipe_fds[1] );	}    } /* end of forking loop */}/*  * This routine can be called to handle the result of a wait.   * This is in a separate routine so that it can be used anywhere * waitpid or wait are called. */void HandleWaitStatus( pid_t pid, int client_stat, exit_state_t sigstate,		       int has_finalized ) {    /* Get the status of the exited process */    if (WIFEXITED(client_stat)) {	/* true if the process exited normally */	exitstatus[num_exited].rc = WEXITSTATUS(client_stat);    }    else {	exitstatus[num_exited].rc = -1; /* For unknown, since valid					   returns in 0-255 */    }	    if (WIFSIGNALED(client_stat)) {	exitstatus[num_exited].sig        = WTERMSIG(client_stat);	exitstatus[num_exited].exit_state = sigstate;	num_aborted++;    }    else {	exitstatus[num_exited].sig= 0;	exitstatus[num_exited].exit_state = 	    has_finalized ? NORMAL : NOFINALIZE;    }}/* * Wait on the process in fdentry[idx].  Do a blocking wait if  * requested.  If sigstate is not "NORMAL", set the exit state for  * the process to this value if it exits with a signal.  This is used * to separate processes that died because mpiexec sent them a signal * from processes that died because they received a signal from a  * different source (e.g., SIGFPE or SIGSEGV) */int waitOnProcess( int idx, int blocking, exit_state_t sigstate ){    int client_stat, rc, has_finalized;    pid_t pid;    /* Careful here: we may want to use WNOHANG; wait a little, then       do something like kill the process */    if (debug) {	fprintf( stderr, "Waiting on status of process %d\n",		 fdtable[idx].pid );	fflush( stderr );    }    pid = fdtable[idx].pid;    if (pid <= 0) return -1;    if (blocking)	rc = waitpid( pid, &client_stat, 0 );    else {	rc = waitpid( pid, &client_stat, WNOHANG );	if (rc == 0) return 0;    }    if (rc < 0) {	perror( "Error waiting for process!" );	return 0;    }    if (debug) {	fprintf( stderr, "Wait on %d completed\n", fdtable[idx].pid );	fflush( stderr );    }    has_finalized = fdtable[idx].state == FINALIZED;    HandleWaitStatus( pid, client_stat, sigstate, has_finalized );    num_exited++;    return 0;}/*  * Process input from the socket connecting the mpiexec process to the * child process  */int handle_input_fd ( int idx ){    int all_done = 0;    int  rc;    char inbuf[PMIU_MAXLINE], outbuf[PMIU_MAXLINE], cmd[MAXNAMELEN];    if ( fdtable[idx].handler == CLIENT ) {	/* printf( "handling client input for rank %d\n", fdtable[idx].rank ); */	if ( ( rc = PMIU_readline( fdtable[idx].fd, inbuf, PMIU_MAXLINE ) ) > 0 ) {	    PMIU_parse_keyvals( inbuf );	    PMIU_getval( "cmd", cmd, MAXNAMELEN );	    if ( strncmp( cmd, "barrier_in", MAXNAMELEN ) == 0 ) {		fPMI_Handle_barrier( idx );	    }	    else if ( strncmp( cmd, "finalize", MAXNAMELEN ) == 0 ) {		fdtable[idx].state = FINALIZED;	    }	    else if ( strncmp( cmd, "abort", MAXNAMELEN ) == 0 ) {		/* No PMI abort command has yet been implemented! */		KillChildren();		all_done = 1;	    }	    else if ( strncmp( cmd, "get_my_kvsname", MAXNAMELEN ) == 0 ) {		fPMI_Handle_get_my_kvsname( idx );	    }	    else if ( strncmp( cmd, "get_maxes", MAXNAMELEN ) == 0 ) {		fPMI_Handle_get_maxes( idx );	    }	    else if ( strncmp( cmd, "create_kvs", MAXNAMELEN ) == 0 ) {		fPMI_Handle_create_kvs( idx );	    }	    else if ( strncmp( cmd, "destroy_kvs", MAXNAMELEN ) == 0 ) {		fPMI_Handle_destroy_kvs( idx );	    }	    else if ( strncmp( cmd, "put", MAXNAMELEN ) == 0 ) {		fPMI_Handle_put( idx );	    }	    else if ( strncmp( cmd, "get", MAXNAMELEN ) == 0 ) {		fPMI_Handle_get( idx );	    }	    else if ( strncmp( cmd, "getbyidx", MAXNAMELEN ) == 0 ) {		fPMI_Handle_getbyidx( idx );	    }	    else {		PMIU_printf( 1, "unknown cmd %s\n", cmd );	    }	}	else {                        /* lost contact with client */	    close( fdtable[idx].fd ); 	    fdtable[idx].active = 0;	    rc = waitOnProcess( idx, 1, NORMAL );	    if (rc) {		fprintf( stderr, "Error waiting on process %d\n",			 fdtable[idx].pid );	    }	    if (fdtable[idx].state != FINALIZED) {		/* Process exited before finalize */		KillChildren();		all_done = 1;	    }	    if ( num_exited  == numprocs ) {		/* Set the global done flag */		all_done = 1;	    }	}    }    else {	fprintf( stderr, "unknown handler %d for fdtable entry %d\n",		 fdtable[idx].handler, idx);    }    return all_done;}#ifdef USE_SIGCHLD_HANDLER/*  * Signal handler.  Detect a SIGCHLD exit.  The routines are * * setup_sigchild - Call to install the signal handler * handle_sigchild - This is the signal handler * * If a child exits with a non-zero return code, we may want to kill  * the other children.  In most cases, we'll want to kill the other  * children if a child dies on a signal. * Sometimes we do *not* want to kill the children; particularly when * we are debugging. * */#if defined(USE_SIGNAL) || defined(USE_SIGACTION)#include <signal.h>#else#error no signal choice#endifint handle_sigchild( int sig ){    int prog_stat, pid, rc, sigval, i;    if (debug) {	fprintf( stderr, "Waiting for any child on signal\n" );	fflush( stderr );    }    pid = waitpid( (pid_t)(-1), &prog_stat, WNOHANG );        if (pid > 0) {	/* Receives a child failure or exit.  If *failure*, kill the others */	if (debug) {	    fprintf( stderr, "Found process %d\n", pid );	    fflush( stderr );	}	rc = 0;	if (WIFEXITED(prog_stat)) {	    rc = WEXITSTATUS(prog_stat);	}	sigval = 0;	if (WIFSIGNALED(prog_stat)) {	    sigval = WTERMSIG(prog_stat);	}	if (sigval || rc) {	    /* Look up this pid in the exitstatus */	    for (i=0; i<maxfdentryInUse ; i++) {		if (fdtable[i].pid == pid) {		    if (debug) {			fprintf( stderr, "Found process %d\n", pid );			fflush( stderr );		    }		    fdtable[i].active   = 0;		    exitstatus[i].rc  = rc;		    exitstatus[i].sig = sigval;		    break;		}	    }	    if (i == numprocs) {		/* Did not find the matching pid */		;	    }	    if (killOnAbort) 		KillChildren();	}    }    else {	if (debug) {	    fprintf( stderr, "Did not find child process!\n" );	    fflush( stderr );	}    }}#ifdef USE_SIGACTIONvoid setup_sigchild( void ){    struct sigaction oldact;    /* Get the old signal action, reset the function and        if possible turn off the reset-handler-to-default bit, then       set the new handler */    sigaction( SIGCHLD, (struct sigaction *)0, &oldact );    oldact.sa_handler = handle_sigchild;#ifdef SA_RESETHAND    /* Note that if this feature is not supported, there is a race       condition in the handling of signals, and the OS is fundementally       flawed */    oldact.sa_flags   = oldact.sa_flags & ~(SA_RESETHAND);#endif    sigaddset( &oldact.sa_mask, SIGCHLD );    sigaction( SIGCHLD, &oldact, (struct sigaction *)0 );}#elif defined(USE_SIGNAL)void setup_sigchild( void ){    /* Set new handler; ignore old choice */    (void)signal( SIGCHLD, handle_sigchild );}#elsevoid setup_sigchild( void ){}#endif#endif/* Send a given signal to all processes */void SignalAllProcesses( int sig, const char msg[] ){    int   i, rc;    pid_t pid;    for (i=0; i<=maxfdentryInUse; i++) {	if (fdtable[i].active) {	    pid = fdtable[i].pid;	    if (pid > 0) {		if (debug) {		    printf( "sig %d to %d\n", sig, pid ); fflush( stdout );		}		rc = kill( pid, sig );		if (rc) {		    /* Check for errors */		    if (errno != ESRCH) {			perror( msg );		    }		}	    }	}    }}/* * Kill all processes.  This is called when (a) a child dies with a non-zero  * error code or with a signal *and* (b) the "kill-on-failure" feature is * selected (on by default). */void KillChildren( void ){    int i, pid, rc;    /* fprintf( stderr, "Entering kill children\n" ); */    /* Indicate within KillChildren */    if (inKillChildren) return;    inKillChildren = 1;    /* Loop through the processes and try to kill them; gently first,     * then brutally      */        KillTracedProcesses( );    SignalAllProcesses( SIGINT, "Could not kill with sigint" );    /* We should wait here to give time for the processes to exit */        sleep( 1 );    SignalAllProcesses( SIGQUIT, "Could not kill with sigquit" );        /* Try to wait for the processes */    for (i=0; i<=maxfdentryInUse; i++) {	if (fdtable[i].active) {	    pid = fdtable[i].pid;	    if (pid > 0) {		if (debug) {		    printf( "Wait on %d\n", pid ); fflush( stdout );		}		/* Nonblocking wait */		rc = waitOnProcess( i, 0, KILLED );			    }	}    }}/*  * Determine the exit status to return from mpiexec.  The rule is: * * 1. If all processes exited normally (exit_state == NORMAL), then * return the max of the exit statuses * 2. If any process did not exit normally (but was not killed by mpiexec) * return the value  */int GetExitStatus( void ){    int i, rc;        /* If all exited normally, return the max of exitstatus */    rc = 0;    for (i=0; i<num_exited; i++) {	if (exitstatus[i].exit_state == NORMAL) {	    if (exitstatus[i].rc > rc) 		rc = exitstatus[i].rc;	}	else {	    break;	}    }    if (i == num_exited) return rc;    /* Abnormal exit.  Look for status on any process that died */    for (i=0; i<num_exited; i++) {	if (exitstatus[i].exit_state != NORMAL &&	    exitstatus[i].exit_state != KILLED) {	    rc = exitstatus[i].rc;	    if (rc > 0) return rc;	}    }    /* All processes gave 0 return codes, but some process exited        abnormally.  Return a non-zero code */    return -1;}/* * Report on the status of the complete processes. * * We could us an arg to select either a particular process or all processes */void PrintExitStatus( void ){    int i, rc, sig;    /* fprintf( stderr, "%d processes aborted\n", num_aborted ); */    for (i=0; i<num_exited; i++) {	rc  = exitstatus[i].rc;	sig = exitstatus[i].sig;	if (sig && exitstatus[i].exit_state != KILLED) {#ifdef HAVE_STRSIGNAL	    fprintf( stderr, 		     "Return code = %d, signaled with %s\n", rc, 		     strsignal(sig) );#else	    fprintf( stderr, 		     "Return code = %d, signaled with %d\n", rc, sig );#endif	}	else if (debug) {	    fprintf( stderr, "Return code = %d\n", rc );	}	fflush( stderr );    }}/* ------------------------------------------------------------------------- * The following routines implement the PMI interface.  They * manage the key-value space (kvs) and the process groups. * * ------------------------------------------------------------------------- */#define MAXGROUPS   256		/* max number of groups */#define MAXKEYLEN    64		/* max length of key in keyval space */#define MAXVALLEN   128		/* max length of value in keyval space */#define MAXPAIRS   1024		/* max number of pairs in a keyval space */#define MAXKVSS      16		/* max number of keyval spaces *//* * The following structures and arrays are used to implement the PMI
💿 文件大小 2695 K
👤 上传用户 smashup
📂 所属分类并行计算
🏷️ 相关标签

#LINUX #MPICH #安装程序 #环境
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -