⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pls_base_dmn_registry_fns.c

📁 MPI stands for the Message Passing Interface. Written by the MPI Forum (a large committee comprising
💻 C
字号:
/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana *                         University Research and Technology *                         Corporation.  All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University *                         of Tennessee Research Foundation.  All rights *                         reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, *                         University of Stuttgart.  All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. *                         All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * */#include "orte_config.h"#include "orte/orte_constants.h"#include <string.h>#include "opal/util/output.h"#include "opal/util/argv.h"#include "orte/mca/ns/ns.h"#include "orte/mca/gpr/gpr.h"#include "orte/mca/errmgr/errmgr.h"#include "orte/mca/rmgr/rmgr.h"#include "orte/mca/pls/base/pls_private.h"static void orte_pls_daemon_info_construct(orte_pls_daemon_info_t* ptr){    ptr->cell = ORTE_CELLID_INVALID;    ptr->nodename = NULL;    ptr->name = NULL;    ptr->active_job = ORTE_JOBID_INVALID;}/* destructor - used to free any resources held by instance */static void orte_pls_daemon_info_destructor(orte_pls_daemon_info_t* ptr){    if (NULL != ptr->nodename) free(ptr->nodename);    if (NULL != ptr->name) free(ptr->name);}OBJ_CLASS_INSTANCE(orte_pls_daemon_info_t,  /* type name */                   opal_list_item_t, /* parent "class" name */                   orte_pls_daemon_info_construct, /* constructor */                   orte_pls_daemon_info_destructor); /* destructor */                   /* * Store the active daemons for a job */int orte_pls_base_store_active_daemons(opal_list_t *daemons){    orte_pls_daemon_info_t *dmn;    opal_list_item_t *item;    orte_gpr_value_t **values;    char *jobid_string, *key;    int rc, i, num_daemons;        /* determine the number of daemons */    num_daemons = (int)opal_list_get_size(daemons);    if (0 == num_daemons) {        return ORTE_SUCCESS;    }        /* since each daemon gets recorded in a separate node's container,     * we need to allocate space for num_daemons value objects     */    values = (orte_gpr_value_t**)malloc(num_daemons * sizeof(orte_gpr_value_t*));    if (NULL == values) {        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);        return ORTE_ERR_OUT_OF_RESOURCE;    }    memset(values, 0, num_daemons*sizeof(orte_gpr_value_t*)); /* NULL the array */        /* loop through the values and the list and create all the value objects */    item = opal_list_get_first(daemons);    for (i=0; i < num_daemons; i++) {        dmn = (orte_pls_daemon_info_t*)item;                if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&values[i],                                                        ORTE_GPR_OVERWRITE,                                                        ORTE_NODE_SEGMENT,                                                        1, 0))) {            ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);            goto CLEANUP;        }                if (ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&(values[i]->tokens), &(values[i]->num_tokens),                                                              dmn->cell, dmn->nodename))) {            ORTE_ERROR_LOG(rc);            goto CLEANUP;        }                /* setup the key */        if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, dmn->active_job))) {            ORTE_ERROR_LOG(rc);            OBJ_RELEASE(values[0]);            return rc;        }        asprintf(&key, "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string);        free(jobid_string);                if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[0]), key, ORTE_NAME, dmn->name))) {            ORTE_ERROR_LOG(rc);            goto CLEANUP;        }        free(key);        item = opal_list_get_next(item);    }        rc = orte_gpr.put(num_daemons, values);    if (ORTE_SUCCESS != rc) {        ORTE_ERROR_LOG(rc);    }CLEANUP:    for (i=0; i < num_daemons; i++) {        if (NULL != values[i]) OBJ_RELEASE(values[i]);    }    if (NULL != values) free(values);        return rc;}static int get_daemons(opal_list_t *daemons, orte_jobid_t job){    orte_gpr_value_t **values;    orte_gpr_keyval_t *kv;    orte_std_cntr_t cnt, i, j;    char* jobid_string;    char *keys[] = {        NULL, /* placeholder */        ORTE_NODE_NAME_KEY,        ORTE_CELLID_KEY,        NULL    };    orte_cellid_t *cell;    char *nodename;    orte_process_name_t *name;    orte_pls_daemon_info_t *dmn, *dmn2;    bool found_name, found_node, found_cell;    opal_list_item_t *item;    bool check_dups;    int rc;    /* check the list to see if there is anything already on it. If there is, then     * we will need to check for duplicate entries before we add something. If not,     * then this can go a lot faster     */    if (0 < opal_list_get_size(daemons)) {        check_dups = true;    } else {        check_dups = false;    }        /* setup the key */    if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, job))) {        ORTE_ERROR_LOG(rc);        return rc;    }    asprintf(&keys[0], "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string);    free(jobid_string);        /* query the daemon info */    if (ORTE_SUCCESS != (rc = orte_gpr.get(ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,                                           ORTE_NODE_SEGMENT,                                           NULL, /* all containers */                                           keys,                                           &cnt, &values))) {        ORTE_ERROR_LOG(rc);        free(keys[0]);        return rc;    }        /* loop through the answers and construct the list */    for (i=0; i < cnt; i++) {        /* for systems such as bproc, the node segment holds containers        * for nodes that we may not have launched upon. Each container        * will send us back a value object, so we have to ensure here        * that we only create daemon objects on the list for those nodes        * that DO provide a valid object        */        found_name = found_node = found_cell = false;        for (j=0; j < values[i]->cnt; j++) {            kv = values[i]->keyvals[j];            if (0 == strcmp(kv->key, keys[0])) {                if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&name, kv->value, ORTE_NAME))) {                    ORTE_ERROR_LOG(rc);                    goto CLEANUP;                }                found_name = true;                continue;                        }            if (0 == strcmp(kv->key, ORTE_NODE_NAME_KEY)) {                if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&nodename, kv->value, ORTE_STRING))) {                    ORTE_ERROR_LOG(rc);                    goto CLEANUP;                }                found_node = true;                continue;                        }            if (0 == strcmp(kv->key, ORTE_CELLID_KEY)) {                if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cell, kv->value, ORTE_CELLID))) {                    ORTE_ERROR_LOG(rc);                    goto CLEANUP;                }                found_cell = true;                continue;                        }        }        /* if we found everything, then this is a valid entry */        if (found_name && found_node && found_cell) {            /* first check if this name is ourself - if so, ignore it */            if (ORTE_EQUAL == orte_dss.compare(name, ORTE_PROC_MY_NAME, ORTE_NAME)) {                goto MOVEON;            }                        if (check_dups) {                /* see if this daemon is already on the list - if so, then we don't add it */                for (item = opal_list_get_first(daemons);                     item != opal_list_get_end(daemons);                     item = opal_list_get_next(item)) {                    dmn2 = (orte_pls_daemon_info_t*)item;                                        if (ORTE_EQUAL == orte_dss.compare(dmn2->name, name, ORTE_NAME)) {                        /* already on list - ignore it */                        goto MOVEON;                    }                }            }            dmn = OBJ_NEW(orte_pls_daemon_info_t);            if (NULL == dmn) {                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);                rc = ORTE_ERR_OUT_OF_RESOURCE;                goto CLEANUP;            }            if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {                ORTE_ERROR_LOG(rc);                OBJ_RELEASE(dmn);                goto CLEANUP;            }            dmn->cell = *cell;            if (NULL != nodename) {                dmn->nodename = strdup(nodename);            }                        /* add this daemon to the list */            opal_list_append(daemons, &dmn->super);        }MOVEON:        OBJ_RELEASE(values[i]);    }    CLEANUP:    for (i=0; i < cnt; i++) {        if (NULL != values[i]) OBJ_RELEASE(values[i]);    }    if (NULL != values) free(values);    free(keys[0]);        return rc;}/* * Retrieve a list of the active daemons for a job */int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job, opal_list_t *attrs){    orte_jobid_t *jobs;    orte_std_cntr_t njobs, i;    bool allocated;    int rc;        if (NULL != orte_rmgr.find_attribute(attrs, ORTE_NS_INCLUDE_DESCENDANTS)) {        /* need to include all descendants in list */        if (ORTE_SUCCESS != (rc = orte_ns.get_job_descendants(&jobs, &njobs, job))) {            ORTE_ERROR_LOG(rc);            return rc;        }        allocated = true;    } else if (NULL != orte_rmgr.find_attribute(attrs, ORTE_NS_INCLUDE_CHILDREN)) {        /* just include the direct children of the job */        if (ORTE_SUCCESS != (rc = orte_ns.get_job_children(&jobs, &njobs, job))) {            ORTE_ERROR_LOG(rc);            return rc;        }        allocated = true;    } else {        /* just want daemons for this one job */        jobs = &job;        njobs = 1;        allocated = false;    }        /* loop through all the jobs and get their info */    for (i=0; i < njobs; i++) {        if (ORTE_SUCCESS != (rc = get_daemons(daemons, jobs[i]))) {            ORTE_ERROR_LOG(rc);            goto CLEANUP;        }    }    CLEANUP:    if (allocated) free(jobs);        return ORTE_SUCCESS;}/* * Remove a daemon from the world of active daemons */int orte_pls_base_remove_daemon(orte_pls_daemon_info_t *info){    /* We need to do a registry     * delete function call targeting the entry     */    return ORTE_SUCCESS;}/* * Check for available daemons we can re-use */int orte_pls_base_check_avail_daemons(opal_list_t *daemons,                                      orte_jobid_t job){    orte_jobid_t root, *descendants;    orte_std_cntr_t i, ndesc;    int rc;        /* check for daemons belonging to any job in this job's family.     * Since the jobs in any family must exit together, it is reasonable     * for us to reuse any daemons that were spawned by any member     * of our extended family. We can find all of our family members     * by first finding our root job, and then getting all of its     * descendants     */    if (ORTE_SUCCESS != (rc = orte_ns.get_root_job(&root, job))) {        ORTE_ERROR_LOG(rc);        return rc;    }        if (ORTE_SUCCESS != (rc = orte_ns.get_job_descendants(&descendants, &ndesc, root))) {        ORTE_ERROR_LOG(rc);        return rc;    }        /* loop through the descendants, adding to the daemon list as we go */    for (i=0; i < ndesc; i++) {        if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(daemons, descendants[i], NULL))) {            ORTE_ERROR_LOG(rc);            free(descendants);            return rc;        }    }    free(descendants);  /* all done with these */        /* now add in any persistent daemons - they are tagged as bootproxies     * for jobid = 0     */    if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(daemons, 0, NULL))) {        ORTE_ERROR_LOG(rc);        return rc;    }            return ORTE_SUCCESS;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -