⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 orte_setup_hnp.c

📁 MPI stands for the Message Passing Interface. Written by the MPI Forum (a large committee comprising
💻 C
📖 第 1 页 / 共 2 页
字号:
/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana *                         University Research and Technology *                         Corporation.  All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University *                         of Tennessee Research Foundation.  All rights *                         reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, *                         University of Stuttgart.  All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. *                         All rights reserved. * Copyright (c) 2007      Los Alamos National Security, LLC.  All rights *                         reserved.  * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ *//** * @file * * Establish a Head Node Process on a cluster's front end */#include "orte_config.h"#include <stdlib.h>#ifdef HAVE_UNISTD_H#include <unistd.h>#endif#include <errno.h>#include <string.h>#include <sys/types.h>#include <sys/stat.h>#ifdef HAVE_SYS_WAIT_H#include <sys/wait.h>#endif#include <fcntl.h>#include "orte/orte_constants.h"#include "opal/event/event.h"#include "opal/threads/mutex.h"#include "opal/threads/condition.h"#include "opal/util/argv.h"#include "opal/util/opal_environ.h"#include "opal/util/output.h"#include "opal/util/path.h"#include "opal/util/os_path.h"#include "opal/mca/base/mca_base_param.h"#include "orte/dss/dss.h"#include "orte/runtime/orte_wait.h"#include "orte/util/univ_info.h"#include "orte/util/sys_info.h"#include "orte/util/proc_info.h"#include "orte/util/session_dir.h"#include "orte/util/universe_setup_file_io.h"#include "orte/mca/smr/smr.h"#include "orte/mca/rml/rml.h"#include "orte/mca/rds/rds_types.h"#include "orte/mca/ns/ns.h"#include "orte/mca/gpr/gpr.h"#include "orte/mca/errmgr/errmgr.h"#include "orte/runtime/runtime.h"#include "orte/runtime/orte_setup_hnp.h"/* Local condition variables and mutex */static opal_mutex_t orte_setup_hnp_mutex;static opal_condition_t orte_setup_hnp_condition;/* Local return code */static int orte_setup_hnp_rc;/* Local uri storage */static char *orte_setup_hnp_orted_uri;static orte_setup_hnp_cb_data_t orte_setup_hnp_cbdata;/* * NON-BLOCKING RECEIVER */static void orte_setup_hnp_recv(int status, orte_process_name_t* sender,                                orte_buffer_t* buffer, orte_rml_tag_t tag,                                void* cbdata);/* * PID WAIT CALLBACK */static void orte_setup_hnp_wait(pid_t wpid, int status, void *data);/* * ORTE_SETUP_HNP */int orte_setup_hnp(char *target_cluster, char *headnode, char *username){    char **argv, *param, *uri, *uid, *hn=NULL;    char *path, *name_string, *orteprobe;    int argc, rc=ORTE_SUCCESS, id, intparam;    pid_t pid;    bool can_launch=false, on_gpr=false;    orte_cellid_t cellid=ORTE_CELLID_MAX, *cptr;    orte_jobid_t jobid;    orte_vpid_t vpid;    orte_std_cntr_t i, j, k, cnt=0;    orte_gpr_value_t **values=NULL, *value;    orte_gpr_keyval_t **keyvals;    char *keys[4], *tokens[3], *cellname;    struct timeval tv;    struct timespec ts;    bool infrastructure = true, *bptr, tf_flag;    /* get the nodename for the headnode of the target cluster */    if (NULL == headnode) {  /* not provided, so try to look it up */        tokens[0] = target_cluster;        tokens[1] = NULL;        keys[0] = ORTE_RDS_FE_NAME;        keys[1] = ORTE_RDS_FE_SSH;        keys[2] = ORTE_CELLID_KEY;        keys[3] = NULL;        if (ORTE_SUCCESS != (rc = orte_gpr.get(ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR,                                    ORTE_RESOURCE_SEGMENT,                                    tokens, keys, &cnt, &values))) {           ORTE_ERROR_LOG(rc);           return rc;        }        if (0 == cnt || 0 == values[0]->cnt) {  /* nothing found */            goto MOVEON;        }        on_gpr = true;        /* need to decide what to do if more than value found. Some         * clusters have more than one head node, so which one do         * we choose? For now, just take the first one returned.         */        keyvals = values[0]->keyvals;        for (i=0; i < values[0]->cnt; i++) {            if (0 == strcmp(keyvals[i]->key, ORTE_RDS_FE_NAME)) {                hn = strdup((const char*)keyvals[i]->value->data);                continue;            }            if (0 == strcmp(keyvals[i]->key, ORTE_RDS_FE_SSH)) {                if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&bptr, keyvals[i]->value, ORTE_BOOL))) {                    ORTE_ERROR_LOG(rc);                    return rc;                }                can_launch = *bptr;                continue;            }            if (0 == strcmp(keyvals[i]->key, ORTE_CELLID_KEY)) {                if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cptr, keyvals[i]->value, ORTE_CELLID))) {                    ORTE_ERROR_LOG(rc);                    return rc;                }                cellid = *cptr;                continue;            }        }        goto MOVEON;    } else {  /* lookup the headnode's cellid */        hn      = strdup(headnode);        keys[0] = ORTE_RDS_FE_NAME;        keys[1] = ORTE_RDS_FE_SSH;        keys[2] = ORTE_CELLID_KEY;        keys[3] = NULL;        rc = orte_gpr.get(ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR,                          ORTE_RESOURCE_SEGMENT,                          NULL, keys, &cnt, &values);        if (ORTE_SUCCESS != rc) {            ORTE_ERROR_LOG(rc);            return rc;        }        /* Nothing found */        if (0 == cnt || 0 == values[0]->cnt) {            goto MOVEON;        }        on_gpr = true;        for (i=0; i < cnt; i++) {            keyvals = values[i]->keyvals;            for (j=0; j < values[i]->cnt; j++) {                if ((0 == strcmp(keyvals[j]->key, ORTE_RDS_FE_NAME)) &&                     0 == strcmp((const char*)keyvals[j]->value->data, headnode)) {                    /* okay, this is the right cell - now need to find                     * the ssh flag (if provided) and cellid                     */                    for (k=0; k < values[i]->cnt; k++) {                        if (0 == strcmp(keyvals[k]->key, ORTE_RDS_FE_SSH)) {                            if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&bptr, keyvals[i]->value, ORTE_BOOL))) {                                ORTE_ERROR_LOG(rc);                                return rc;                            }                            can_launch = *bptr;                            continue;                        }                        if (0 == strcmp(keyvals[k]->key, ORTE_CELLID_KEY)) {                            if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cptr, keyvals[i]->value, ORTE_CELLID))) {                                ORTE_ERROR_LOG(rc);                                return rc;                            }                            cellid = *cptr;                            continue;                        }                    }                    goto MOVEON;                }            }        }    }MOVEON:    if (NULL != values) {        for (i=0; i < cnt; i++)            OBJ_RELEASE(values[i]);        free(values);    }    if (!on_gpr && (NULL != target_cluster || NULL != headnode)) {        /* if we couldn't find anything about this cell on the gpr, then         * we need to put the required headnode data on the registry. We need         * it to be there so other functions/processes can find it, if needed.         * User must provide either a target_cluster name (which then must be         * synonymous with the headnode name), a headnode name (on a named or         * unnamed target_cluster), or both.         */        /* get new cellid for this site/resource */        if (NULL != target_cluster) {            cellname = strdup(target_cluster);        } else {            /* if the target_cluster was NULL, then headnode CAN'T be NULL             * or else we wouldn't get here             */            cellname = strdup(headnode);        }        /* can't know the site name, so it becomes "unknown" */        rc = orte_ns.create_cellid(&cellid, "unknown", cellname);        if (ORTE_SUCCESS != rc ) {            ORTE_ERROR_LOG(rc);            free(cellname);            return rc;        }        /*         * Store the cell info on the resource segment of the registry         */        if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR,                                                        ORTE_RESOURCE_SEGMENT, 4, 0))) {            ORTE_ERROR_LOG(rc);            return rc;        }                rc = orte_schema.get_node_tokens(&(value->tokens), &(value->num_tokens), cellid, cellname);        if (ORTE_SUCCESS != rc) {            ORTE_ERROR_LOG(rc);            OBJ_RELEASE(value);            return rc;        }        /* Set Cell Name */        if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_RDS_NAME, ORTE_STRING, cellname))) {            ORTE_ERROR_LOG(rc);            OBJ_RELEASE(value);            return rc;        }        /* Set Cell ID */        if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), ORTE_CELLID_KEY, ORTE_CELLID, &cellid))) {            ORTE_ERROR_LOG(rc);            OBJ_RELEASE(value);            return rc;        }        /* Set Front End Name */        if (NULL == headnode) {            if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), ORTE_RDS_FE_NAME, ORTE_STRING, cellname))) {                ORTE_ERROR_LOG(rc);                OBJ_RELEASE(value);                return rc;            }        } else {            if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), ORTE_RDS_FE_NAME, ORTE_STRING, headnode))) {                ORTE_ERROR_LOG(rc);                OBJ_RELEASE(value);                return rc;            }        }        /* Asssume ability to ssh to front end node*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -