⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rmaps_rr.c

📁 MPI stands for the Message Passing Interface. Written by the MPI Forum (a large committee comprising
💻 C
📖 第 1 页 / 共 3 页
字号:
        }        cur_node_item = next;    }    return ORTE_SUCCESS;}   /* * Process the attributes and push them into our local "global" */static int orte_rmaps_rr_process_attrs(opal_list_t *attributes){    int rc;    char *policy;    orte_attribute_t *attr;    orte_std_cntr_t *scptr;    bool policy_override;        mca_rmaps_round_robin_component.bynode = false;  /* set default mapping policy to byslot*/    policy_override = false;    mca_rmaps_round_robin_component.per_node = false;    if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_PERNODE))) {        /* was provided - set boolean accordingly */         mca_rmaps_round_robin_component.per_node = true;        /* indicate that we are going to map this job bynode */        mca_rmaps_round_robin_component.bynode = true;        /* indicate that this is to *be* the policy no matter what */        policy_override = true;    }        mca_rmaps_round_robin_component.n_per_node = false;    if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_N_PERNODE))) {        /* was provided - set boolean accordingly */        mca_rmaps_round_robin_component.n_per_node = true;        /* get the number of procs per node to launch */        if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&scptr, attr->value, ORTE_STD_CNTR))) {            ORTE_ERROR_LOG(rc);            return rc;        }        num_per_node = *scptr;        /* default to byslot mapping */        mca_rmaps_round_robin_component.bynode = false;    }        /* define the mapping policy. This *must* come after we process the pernode     * options since those set a default mapping policy - we want to be able     * to override that setting if requested     *     * NOTE: we don't do this step if the policy_override has been set!     */    if (!policy_override &&        NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_MAP_POLICY))) {        /* they specified a mapping policy - extract its name */        if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&policy, attr->value, ORTE_STRING))) {            ORTE_ERROR_LOG(rc);            return rc;        }        if (0 == strcmp(policy, "bynode")) {            mca_rmaps_round_robin_component.bynode = true;        } else {            mca_rmaps_round_robin_component.bynode = false;        }    }        mca_rmaps_round_robin_component.no_use_local = false;    if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_NO_USE_LOCAL))) {        /* was provided - set boolean accordingly */        mca_rmaps_round_robin_component.no_use_local = true;    }        mca_rmaps_round_robin_component.oversubscribe = true;    if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_NO_OVERSUB))) {        /* was provided - set boolean accordingly */        mca_rmaps_round_robin_component.oversubscribe = false;    }        mca_rmaps_round_robin_component.no_allocate_range = false;    if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_NO_ALLOC_RANGE))) {        /* was provided - set boolean accordingly */        mca_rmaps_round_robin_component.no_allocate_range = true;    }        return ORTE_SUCCESS;}/* * Create a round-robin mapping for the job. */static int orte_rmaps_rr_map(orte_jobid_t jobid, opal_list_t *attributes){    orte_app_context_t *app;    orte_job_map_t* map;    orte_std_cntr_t i;    opal_list_t master_node_list, mapped_node_list, max_used_nodes, *working_node_list;    opal_list_item_t *item, *item2;    orte_ras_node_t *node, *node2;    orte_mapped_node_t *mnode;    char *save_bookmark;    orte_vpid_t vpid_start;    orte_std_cntr_t num_procs = 0, total_num_slots, mapped_num_slots, num_nodes, num_slots;    int rc;    bool modify_app_context = false;    char *sptr;    orte_attribute_t *attr;    orte_std_cntr_t slots_per_node;    OPAL_TRACE(1);        /* setup the local environment from the attributes */    if (ORTE_SUCCESS != (rc = orte_rmaps_rr_process_attrs(attributes))) {        ORTE_ERROR_LOG(rc);        return rc;    }        /* create the map object */    map = OBJ_NEW(orte_job_map_t);    if (NULL == map) {        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);        return ORTE_ERR_OUT_OF_RESOURCE;    }        /* set the jobid */    map->job = jobid;        /* query for the application context and allocated nodes */    if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &(map->apps), &(map->num_apps)))) {        ORTE_ERROR_LOG(rc);        return rc;    }    /* query for all nodes allocated to this job - this will become our master list of     * nodes. From this, we will construct a working list of nodes based on any specified     * mappings from the user     */    OBJ_CONSTRUCT(&master_node_list, opal_list_t);    if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&master_node_list, jobid,                                                              &total_num_slots,                                                              mca_rmaps_round_robin_component.no_use_local))) {        ORTE_ERROR_LOG(rc);        OBJ_DESTRUCT(&master_node_list);        return rc;    }    /* if a bookmark exists from some prior mapping, set us to start there */    if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_BOOKMARK))) {        cur_node_item = NULL;        if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, attr->value, ORTE_STRING))) {            ORTE_ERROR_LOG(rc);            return rc;        }        /* find this node on the master list */        for (item = opal_list_get_first(&master_node_list);             item != opal_list_get_end(&master_node_list);             item = opal_list_get_next(item)) {            node = (orte_ras_node_t*)item;                        if (0 == strcmp(sptr, node->node_name)) {                cur_node_item = item;                break;            }        }        /* see if we found it - if not, just start at the beginning */        if (NULL == cur_node_item) {            cur_node_item = opal_list_get_first(&master_node_list);         }    } else {        /* if no bookmark, then just start at the beginning of the list */        cur_node_item = opal_list_get_first(&master_node_list);    }        /* save the node name for the bookmark just in case we don't do anything     * useful down below     */    save_bookmark = strdup(((orte_ras_node_t*)cur_node_item)->node_name);        /** construct the list to hold any nodes that get fully used during this     * mapping. We need to keep a record of these so we can update their     * information on the registry when we are done, but we want to remove     * them from our master_node_list as we go so we don't keep checking to     * see if we can still map something onto them.     */    OBJ_CONSTRUCT(&fully_used_nodes, opal_list_t);    /** construct an intermediate list that will hold the nodes that are fully     * used during any one pass through the mapper (i.e., for each app_context).     * we will join the results together to form the fully_used_nodes list. This     * allows us to more efficiently handle the cases where users specify     * the proc-to-node mapping themselves.     */    OBJ_CONSTRUCT(&max_used_nodes, opal_list_t);    /** construct a list to hold any nodes involved in a user-specified mapping */    OBJ_CONSTRUCT(&mapped_node_list, opal_list_t);        for(i=0; i < map->num_apps; i++) {        app = map->apps[i];        /** if the number of processes wasn't specified, then we know there can be only        * one app_context allowed in the launch, and that we are to launch it across        * all available slots. We'll double-check the single app_context rule first        */        if (0 == app->num_procs && 1 < map->num_apps) {            opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:multi-apps-and-zero-np",                           true, map->num_apps, NULL);            ORTE_ERROR_LOG(ORTE_ERR_INVALID_NUM_PROCS);            return ORTE_ERR_INVALID_NUM_PROCS;        }        if ( 0 < app->num_map ) {            /** If the user has specified a mapping for this app_context, then we            * create a working node list that contains only those nodes.            */            if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_mapped_targets(&mapped_node_list, app,                                                                         &master_node_list, &mapped_num_slots))) {                ORTE_ERROR_LOG(rc);                goto cleanup;            }            working_node_list = &mapped_node_list;           /* Set cur_node_item to point to the first node in the specified list to be used */            cur_node_item = opal_list_get_first(working_node_list);                        num_nodes = (orte_std_cntr_t)opal_list_get_size(&mapped_node_list);            num_slots = (orte_std_cntr_t)mapped_num_slots;        }        else {            /** no mapping was specified, so we are going to just use everything that was             * allocated to us. We don't need to update cur_node_item in this case since it             * is always pointing to something in the master_node_list - we'll just pick up             * from wherever we last stopped.             */            working_node_list = &master_node_list;                        num_nodes = (orte_std_cntr_t)opal_list_get_size(&master_node_list);            num_slots = total_num_slots;        }        if (mca_rmaps_round_robin_component.per_node) {            /* there are three use-cases that we need to deal with:            * (a) if -np was not provided, then we just use the number of nodes            * (b) if -np was provided AND #procs > #nodes, then error out            * (c) if -np was provided AND #procs <= #nodes, then launch            *     the specified #procs one/node. In this case, we just            *     leave app->num_procs alone            */            if (0 == app->num_procs) {                app->num_procs = num_nodes;                modify_app_context = true;            } else if (app->num_procs > num_nodes) {                opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:per-node-and-too-many-procs",                               true, app->num_procs, num_nodes, NULL);                return ORTE_ERR_SILENT;            }        } else if (mca_rmaps_round_robin_component.n_per_node) {            /* first, let's check to see if there are enough slots/node to             * meet the request - error out if not             */            slots_per_node = num_slots / num_nodes;            if (num_per_node > slots_per_node) {                opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:n-per-node-and-not-enough-slots",

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -