⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ras_slurm_module.c

📁 MPI stands for the Message Passing Interface. Written by the MPI Forum (a large committee comprising
💻 C
📖 第 1 页 / 共 2 页
字号:
/* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana *                         University Research and Technology *                         Corporation.  All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University *                         of Tennessee Research Foundation.  All rights *                         reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,  *                         University of Stuttgart.  All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. *                         All rights reserved. * $COPYRIGHT$ *  * Additional copyrights may follow *  * $HEADER$ */#include "orte_config.h"#include "orte/orte_constants.h"#include "orte/orte_types.h"#include <unistd.h>#include <string.h>#include <ctype.h>#include "opal/util/argv.h"#include "opal/util/output.h"#include "opal/util/show_help.h"#include "orte/dss/dss.h"#include "orte/mca/rmgr/rmgr.h"#include "orte/mca/errmgr/errmgr.h"#include "orte/mca/ras/base/ras_private.h"#include "ras_slurm.h"/* * Local functions */static int orte_ras_slurm_allocate(orte_jobid_t jobid, opal_list_t *attributes);static int orte_ras_slurm_deallocate(orte_jobid_t jobid);static int orte_ras_slurm_finalize(void);static int orte_ras_slurm_discover(char *regexp, char* tasks_per_node,                                   opal_list_t *nodelist);static int orte_ras_slurm_parse_ranges(char *base, char *ranges, char ***nodelist);static int orte_ras_slurm_parse_range(char *base, char *range, char ***nodelist);/* * Global variable */orte_ras_base_module_t orte_ras_slurm_module = {    orte_ras_slurm_allocate,    orte_ras_base_node_insert,    orte_ras_base_node_query,    orte_ras_base_node_query_alloc,    orte_ras_base_node_lookup,    orte_ras_slurm_deallocate,    orte_ras_slurm_finalize};/** * Discover available (pre-allocated) nodes.  Allocate the * requested number of nodes/process slots to the job. *   */static int orte_ras_slurm_allocate(orte_jobid_t jobid, opal_list_t *attributes){    int ret;    char *slurm_node_str, *regexp;    char *tasks_per_node, *node_tasks;    opal_list_t nodes;    opal_list_item_t* item;      slurm_node_str = getenv("SLURM_NODELIST");    if (NULL == slurm_node_str) {        opal_show_help("help-ras-slurm.txt", "slurm-env-var-not-found", 1,                       "SLURM_NODELIST");        return ORTE_ERR_NOT_FOUND;    }    regexp = strdup(slurm_node_str);        tasks_per_node = getenv("SLURM_TASKS_PER_NODE");    if (NULL == tasks_per_node) {        opal_show_help("help-ras-slurm.txt", "slurm-env-var-not-found", 1,                       "SLURM_TASKS_PER_NODE");        return ORTE_ERR_NOT_FOUND;    }    node_tasks = strdup(tasks_per_node);    if(NULL == regexp || NULL == node_tasks) {        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);        return ORTE_ERR_OUT_OF_RESOURCE;    }    OBJ_CONSTRUCT(&nodes, opal_list_t);     ret = orte_ras_slurm_discover(regexp, node_tasks, &nodes);    free(regexp);    free(node_tasks);    if (ORTE_SUCCESS != ret) {        opal_output(orte_ras_base.ras_output,                    "ras:slurm:allocate: discover failed!");        return ret;    }    ret = orte_ras_base_allocate_nodes(jobid, &nodes);    while (NULL != (item = opal_list_remove_first(&nodes))) {        OBJ_RELEASE(item);    }    OBJ_DESTRUCT(&nodes);    /* All done */    if (ORTE_SUCCESS == ret) {        opal_output(orte_ras_base.ras_output,                     "ras:slurm:allocate: success");    } else {        opal_output(orte_ras_base.ras_output,                     "ras:slurm:allocate: failure (base_allocate_nodes=%d)", ret);    }    return ret;}/* * There's really nothing to do here */static int orte_ras_slurm_deallocate(orte_jobid_t jobid){    opal_output(orte_ras_base.ras_output,                 "ras:slurm:deallocate: success (nothing to do)");    return ORTE_SUCCESS;}/* * There's really nothing to do here */static int orte_ras_slurm_finalize(void){    opal_output(orte_ras_base.ras_output,                 "ras:slurm:finalize: success (nothing to do)");    return ORTE_SUCCESS;}/** * Discover the available resources. *  * In order to fully support slurm, we need to be able to handle  * node regexp/task_per_node strings such as: * foo,bar    5,3 * foo        5 * foo[2-10,12,99-105],bar,foobar[3-11] 2(x10),5,100(x16) * * @param *regexp A node regular expression from SLURM (i.e. SLURM_NODELIST) * @param *tasks_per_node A tasks per node expression from SLURM *                        (i.e. SLURM_TASKS_PER_NODE) * @param *nodelist A list which has already been constucted to return *                  the found nodes in */static int orte_ras_slurm_discover(char *regexp, char *tasks_per_node,                                   opal_list_t* nodelist){    int i, j, len, ret, count, reps, num_nodes;    char *base, **names = NULL;    char *begptr, *endptr, *orig;    int *slots;    bool found_range = false;    bool more_to_come = false;        orig = base = strdup(regexp);    if (NULL == base) {        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);        return ORTE_ERR_OUT_OF_RESOURCE;    }    opal_output(orte_ras_base.ras_output,                 "ras:slurm:allocate:discover: checking nodelist: %s", regexp);        do {        /* Find the base */        len = strlen(base);        for (i = 0; i <= len; ++i) {            if (base[i] == '[') {                /* we found a range. this gets dealt with below */                base[i] = '\0';                found_range = true;                break;            }            if (base[i] == ',') {                /* we found a singleton node, and there are more to come */                base[i] = '\0';                found_range = false;                more_to_come = true;                break;            }            if (base[i] == '\0') {                /* we found a singleton node */                found_range = false;                more_to_come = false;                break;            }        }        if(i == 0) {            /* we found a special character at the beginning of the string */            opal_show_help("help-ras-slurm.txt", "slurm-env-var-bad-value",                           1, regexp, tasks_per_node, "SLURM_NODELIST");            ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);            free(orig);            return ORTE_ERR_BAD_PARAM;        }                if (found_range) {            /* If we found a range, now find the end of the range */            for (j = i; j < len; ++j) {                if (base[j] == ']') {                    base[j] = '\0';                    break;                }            }            if (j >= len) {                /* we didn't find the end of the range */                opal_show_help("help-ras-slurm.txt", "slurm-env-var-bad-value",                               1, regexp, tasks_per_node, "SLURM_NODELIST");                ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);                free(orig);                return ORTE_ERR_BAD_PARAM;            }                        ret = orte_ras_slurm_parse_ranges(base, base + i + 1, &names);            if(ORTE_SUCCESS != ret) {                opal_show_help("help-ras-slurm.txt", "slurm-env-var-bad-value",                               1, regexp, tasks_per_node, "SLURM_NODELIST");                ORTE_ERROR_LOG(ret);                free(orig);                return ret;            }                if(base[j + 1] == ',') {                more_to_come = true;                base = &base[j + 2];            } else {                more_to_come = false;            }        } else {            /* If we didn't find a range, just add the node */            opal_output(orte_ras_base.ras_output,                         "ras:slurm:allocate:discover: found node %s", base);            if(ORTE_SUCCESS != (ret = opal_argv_append_nosize(&names, base))) {                ORTE_ERROR_LOG(ret);                free(orig);                return ret;            }            /* set base equal to the (possible) next base to look at */            base = &base[i + 1];        }    } while(more_to_come);       free(orig);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -