⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pls_rsh_module.c

📁 MPI stands for the Message Passing Interface. Written by the MPI Forum (a large committee comprising
💻 C
📖 第 1 页 / 共 4 页
字号:
    /* setup ns contact info */    opal_argv_append(&argc, &argv, "--nsreplica");    if (NULL != orte_process_info.ns_replica_uri) {        uri = strdup(orte_process_info.ns_replica_uri);    } else {        uri = orte_rml.get_uri();    }    asprintf(&param, "\"%s\"", uri);    opal_argv_append(&argc, &argv, param);    free(uri);    free(param);    /* setup gpr contact info */    opal_argv_append(&argc, &argv, "--gprreplica");    if (NULL != orte_process_info.gpr_replica_uri) {        uri = strdup(orte_process_info.gpr_replica_uri);    } else {        uri = orte_rml.get_uri();    }    asprintf(&param, "\"%s\"", uri);    opal_argv_append(&argc, &argv, param);    free(uri);    free(param);    local_exec_index_end = argc;    if (!(remote_csh || remote_sh)) {        opal_argv_append(&argc, &argv, ")");    }    if (mca_pls_rsh_component.debug) {        param = opal_argv_join(argv, ' ');        if (NULL != param) {            opal_output(0, "pls:rsh: final template argv:");            opal_output(0, "pls:rsh:     %s", param);            free(param);        }    }    /* Figure out the basenames for the libdir and bindir.  This       requires some explanation:       - Use opal_install_dirs.libdir and opal_install_dirs.bindir instead of -D'ing some macros         in this directory's Makefile.am because it makes all the         dependencies work out correctly.  These are defined in         opal/install_dirs.h.       - After a discussion on the devel-core mailing list, the         developers decided that we should use the local directory         basenames as the basis for the prefix on the remote note.         This does not handle a few notable cases (e.g., f the         libdir/bindir is not simply a subdir under the prefix, if the         libdir/bindir basename is not the same on the remote node as         it is here in the local node, etc.), but we decided that         --prefix was meant to handle "the common case".  If you need         something more complex than this, a) edit your shell startup         files to set PATH/LD_LIBRARY_PATH properly on the remove         node, or b) use some new/to-be-defined options that         explicitly allow setting the bindir/libdir on the remote         node.  We decided to implement these options (e.g.,         --remote-bindir and --remote-libdir) to orterun when it         actually becomes a problem for someone (vs. a hypothetical         situation).       Hence, for now, we simply take the basename of this install's       libdir and bindir and use it to append this install's prefix       and use that on the remote node.    */    lib_base = opal_basename(opal_install_dirs.libdir);    bin_base = opal_basename(opal_install_dirs.bindir);    /*     * Iterate through each of the nodes     */    if (mca_pls_rsh_component.timing) {        /* allocate space to track the start times */        launchstart = (struct timeval*)malloc((num_nodes+vpid) * sizeof(struct timeval));    }        for(n_item =  opal_list_get_first(&map->nodes);        n_item != opal_list_get_end(&map->nodes);        n_item =  opal_list_get_next(n_item)) {        orte_process_name_t* name;        pid_t pid;        char *exec_path;        char **exec_argv;                rmaps_node = (orte_mapped_node_t*)n_item;                if (mca_pls_rsh_component.timing) {            if (0 != gettimeofday(&launchstart[vpid], NULL)) {                opal_output(0, "pls_rsh: could not obtain start time");            }        }                /* new daemon - setup to record its info */        dmn = OBJ_NEW(orte_pls_daemon_info_t);        dmn->active_job = jobid;        opal_list_append(&active_daemons, &dmn->super);                /* setup node name */        free(argv[node_name_index1]);        if (NULL != rmaps_node->username &&            0 != strlen (rmaps_node->username)) {            asprintf (&argv[node_name_index1], "%s@%s",                      rmaps_node->username, rmaps_node->nodename);        } else {            argv[node_name_index1] = strdup(rmaps_node->nodename);        }        free(argv[node_name_index2]);        argv[node_name_index2] = strdup(rmaps_node->nodename);                /* save it in the daemon info */        dmn->nodename = strdup(rmaps_node->nodename);        /* initialize daemons process name */        rc = orte_ns.create_process_name(&name, rmaps_node->cell, 0, vpid);        if (ORTE_SUCCESS != rc) {            ORTE_ERROR_LOG(rc);            goto cleanup;        }                /* save it in the daemon info */        dmn->cell = rmaps_node->cell;        if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {            ORTE_ERROR_LOG(rc);            goto cleanup;        }        /* fork a child to exec the rsh/ssh session */                /* set the process state to "launched" */        if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(name, ORTE_PROC_STATE_LAUNCHED, 0))) {            ORTE_ERROR_LOG(rc);            goto cleanup;        }        pid = fork();        if (pid < 0) {            rc = ORTE_ERR_OUT_OF_RESOURCE;            goto cleanup;        }        /* child */        if (pid == 0) {            char* name_string;            char** env;            char* var;            long fd, fdmax = sysconf(_SC_OPEN_MAX);            if (mca_pls_rsh_component.debug) {                opal_output(0, "pls:rsh: launching on node %s\n",                            rmaps_node->nodename);            }            /* We don't need to sense an oversubscribed condition and set the sched_yield             * for the node as we are only launching the daemons at this time. The daemons             * are now smart enough to set the oversubscribed condition themselves when             * they launch the local procs.             */            /* Is this a local launch?             *             * Not all node names may be resolvable (if we found             * localhost in the hostfile, for example).  So first             * check trivial case of node_name being same as the             * current nodename, which must be local.  If that doesn't             * match, check using ifislocal().             */            if (!mca_pls_rsh_component.force_rsh &&                (0 == strcmp(rmaps_node->nodename, orte_system_info.nodename) ||                opal_ifislocal(rmaps_node->nodename))) {                if (mca_pls_rsh_component.debug) {                    opal_output(0, "pls:rsh: %s is a LOCAL node\n",                                rmaps_node->nodename);                }                if (mca_pls_rsh_component.timing) {                    /* since this is a local launch, the daemon will never reach                     * the waitpid callback - so set the start value to                     * something nonsensical                     */                    launchstart[vpid].tv_sec = 0;                    launchstart[vpid].tv_usec = 0;                }                                exec_path = opal_path_findv(argv[local_exec_index], 0, environ, NULL);                if (NULL == exec_path && NULL == prefix_dir) {                    rc = orte_pls_rsh_fill_exec_path (&exec_path);                    if (ORTE_SUCCESS != rc) {                        exit(-1);  /* the forked process MUST exit */                    }                } else {                    if (NULL != prefix_dir) {                        exec_path = opal_os_path( false, prefix_dir, bin_base, "orted", NULL );                    }                    /* If we yet did not fill up the execpath, do so now */                    if (NULL == exec_path) {                        rc = orte_pls_rsh_fill_exec_path (&exec_path);                        if (ORTE_SUCCESS != rc) {                            exit(-1);  /* the forked process MUST exit */                        }                    }                }                /* If we have a prefix, then modify the PATH and                   LD_LIBRARY_PATH environment variables.  We're                   already in the child process, so it's ok to modify                   environ. */                if (NULL != prefix_dir) {                    char *oldenv, *newenv;                    /* Reset PATH */                    newenv = opal_os_path( false, prefix_dir, bin_base, NULL );                    oldenv = getenv("PATH");                    if (NULL != oldenv) {                        char *temp;                        asprintf(&temp, "%s:%s", newenv, oldenv );                        free( newenv );                        newenv = temp;                    }                    opal_setenv("PATH", newenv, true, &environ);                    if (mca_pls_rsh_component.debug) {                        opal_output(0, "pls:rsh: reset PATH: %s", newenv);                    }                    free(newenv);                    /* Reset LD_LIBRARY_PATH */                    newenv = opal_os_path( false, prefix_dir, lib_base, NULL );                    oldenv = getenv("LD_LIBRARY_PATH");                    if (NULL != oldenv) {                        char* temp;                        asprintf(&temp, "%s:%s", newenv, oldenv);                        free(newenv);                        newenv = temp;                    }                    opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ);                    if (mca_pls_rsh_component.debug) {                        opal_output(0, "pls:rsh: reset LD_LIBRARY_PATH: %s",                                    newenv);                    }                    free(newenv);                }                /* Since this is a local execution, we need to                   potentially whack the final ")" in the argv (if                   sh/csh conditionals, from above).  Note that we're                   modifying the argv[] in the child process, so                   there's no need to save this and restore it                   afterward -- the parent's argv[] is unmodified. */                if (NULL != argv[local_exec_index_end]) {                    free(argv[local_exec_index_end]);                    argv[local_exec_index_end] = NULL;                }                /* tell the daemon to setup its own process session/group */                opal_argv_append(&argc, &argv, "--set-sid");                exec_argv = &argv[local_exec_index];                                /* Finally, chdir($HOME) because we're making the                   assumption that this is what will happen on                   remote nodes (via rsh/ssh).  This allows a user                   to specify a path that is relative to $HOME for                   both the cwd and argv[0] and it will work on                   all nodes -- including the local nost.                   Otherwise, it would work on remote nodes and                   not the local node.  If the user does not start                   in $HOME on the remote nodes... well... let's                   hope they start in $HOME.  :-) */                var = getenv("HOME");                if (NULL != var) {                    if (mca_pls_rsh_component.debug) {                        opal_output(0, "pls:rsh: changing to directory %s", var);                    }                    /* Ignore errors -- what are we going to do?                       (and we ignore errors on the remote nodes                       in the fork pls, so this is consistent) */                    chdir(var);                }            } else {                if (mca_pls_rsh_component.debug) {                    opal_output(0, "pls:rsh: %s is a REMOTE node\n",                                rmaps_node->nodename);                }                exec_argv = argv;                exec_path = strdup(mca_pls_rsh_component.agent_path);                if (NULL != prefix_dir) {                    char *opal_prefix = getenv("OPAL_PREFIX");                    if (remote_sh) {                        asprintf (&argv[local_exec_index],                                  "%s%s%s PATH=%s/%s:$PATH ; export PATH ; "                                  "LD_LIBRARY_PATH=%s/%s:$LD_LIBRARY_PATH ; export LD_LIBRARY_PATH ; "                                  "%s/%s/%s",                                  (opal_prefix != NULL ? "OPAL_PREFIX=" : ""),                                  (opal_prefix != NULL ? opal_prefix : ""),                                  (opal_prefix != NULL ? " ;" : ""),                                  prefix_dir, bin_base,                                  prefix_dir, lib_base,                                  prefix_dir, bin_base,                                  mca_pls_rsh_component.orted);                    }                    if (remote_csh) {                        /* [t]csh is a bit more challenging -- we                           have to check whether LD_LIBRARY_PATH                           is already set before we try to set it.                           Must be very careful about obeying                           [t]csh's order of evaluation and not                           using a variable before it is defined.                           See this thread for more details:                           http://www.open-mpi.org/community/lists/users/2006/01/0517.php. */                        asprintf (&argv[local_exec_index],                                  "%s%s%s set path = ( %s/%s $path ) ; "                                  "if ( $?LD_LIBRARY_PATH == 1 ) "                                  "set OMPI_have_llp ; "                                  "if ( $?LD_LIBRARY_PATH == 0 ) "                                  "setenv LD_LIBRARY_PATH %s/%s ; "                                  "if ( $?OMPI_have_llp == 1 ) "                                  "setenv LD_LIBRARY_PATH %s/%s:$LD_LIBRARY_PATH ; "                                  "%s/%s/%s",                                  (opal_prefix != NULL ? "setenv OPAL_PREFIX " : ""),                                  (opal_prefix != NULL ? opal_prefix : ""),                                  (opal_prefix != NULL ? " ;" : ""),                                  prefix_dir, bin_base,                                  prefix_dir, lib_base,                                  prefix_dir, lib_base,                                  prefix_dir, bin_base,                                  mca_pls_rsh_component.orted);                    }                }            }            /* setup process name */            rc = orte_ns.get_proc_name_string(&name_string, name);            if (ORTE_SUCCESS != rc) {                opal_output(0, "orte_pls_rsh: unable to create process name");                exit(-1);            }            free(argv[proc_name_index]);            argv[proc_name_index] = strdup(name_string);            if (!mca_pls_rsh_component.debug) {                 /* setup stdin */                int fd = open("/dev/null", O_RDWR);                dup2(fd, 0);                close(fd);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -