⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rmaps_rr.c

📁 MPI stands for the Message Passing Interface. Written by the MPI Forum (a large committee comprising
💻 C
📖 第 1 页 / 共 3 页
字号:
                               true, num_per_node, slots_per_node, NULL);                return ORTE_ERR_SILENT;            }            /* there are three use-cases that we need to deal with:            * (a) if -np was not provided, then we just use the n/node * #nodes            * (b) if -np was provided AND #procs > (n/node * #nodes), then error out            * (c) if -np was provided AND #procs <= (n/node * #nodes), then launch            *     the specified #procs n/node. In this case, we just            *     leave app->num_procs alone            */            if (0 == app->num_procs) {                /* set the num_procs to equal the specified num/node * the number of nodes */                app->num_procs = num_per_node * num_nodes;                modify_app_context = true;            } else if (app->num_procs > (num_per_node * num_nodes)) {                opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:n-per-node-and-too-many-procs",                               true, app->num_procs, num_per_node, num_nodes, num_slots, NULL);                return ORTE_ERR_SILENT;            }        } else if (0 == app->num_procs) {            /** set the num_procs to equal the number of slots on these mapped nodes - if            user has specified "-bynode", then set it to the number of nodes            */            if (mca_rmaps_round_robin_component.bynode) {                app->num_procs = num_nodes;            } else {                app->num_procs = num_slots;            }            modify_app_context = true;        }        /* allocate a vpid range for this app within the job, unless told not to do so */        if (mca_rmaps_round_robin_component.no_allocate_range) {            vpid_start = 0;        } else {            if(ORTE_SUCCESS != (rc = orte_ns.reserve_range(jobid, app->num_procs, &vpid_start))) {                ORTE_ERROR_LOG(rc);                OBJ_DESTRUCT(&master_node_list);                return rc;            }        }                /** save the initial starting vpid for later */        if (0 == i) {            map->vpid_start = vpid_start;        }                /** track the total number of processes we mapped */        num_procs += app->num_procs;        /* Make assignments */        if (mca_rmaps_round_robin_component.bynode) {            map->mapping_mode = strdup("bynode");            rc = map_app_by_node(app, map, jobid, vpid_start, working_node_list, &max_used_nodes);        } else {            map->mapping_mode = strdup("byslot");            rc = map_app_by_slot(app, map, jobid, vpid_start, working_node_list, &max_used_nodes);        }                if (ORTE_SUCCESS != rc) {            ORTE_ERROR_LOG(rc);            goto cleanup;        }        /* save the next node name bookmark as we will - in the case of mapped nodes -         * release the node information being pointed to by cur_node_item         */        if(NULL != cur_node_item) {            free(save_bookmark);            save_bookmark = strdup(((orte_ras_node_t*)cur_node_item)->node_name);        }        /** cleanup the mapped_node_list, if necessary */        if (0 < app->num_map) {            /* we need to adjust our bookmark so it points to the node in the             * master node list - this allows the cur_node_item to "survive"             * the disassembly of the mapped_node_list             */            if (NULL != cur_node_item) {                node = (orte_ras_node_t*)cur_node_item;                /* This can be a little tricky due to all the corner                 * cases. If the mapped_node_list only has ONE entry on it, then the                 * cur_node_item will always point at it, even if we used everything                 * on that node. What we will do, therefore, is check the usage of the                 * cur_node_item to see if it has reached the soft limit. If so, we find                 * the node after that one on the master node list                 */                for (item = opal_list_get_first(&master_node_list);                     item != opal_list_get_end(&master_node_list);                     item = opal_list_get_next(item)) {                    node2 = (orte_ras_node_t*)item;                    if (0 == strcmp(node->node_name, node2->node_name)) {                        if (node->node_slots <= node->node_slots_inuse) {                            /* we are at or beyond the soft limit */                            cur_node_item = opal_list_get_next(item);                        } else {                            cur_node_item = item;                        }                        break;                    }                }            }                        /* as we get rid of the mapped_node_list, we need to update            * corresponding entries in the master_node_list so we accurately            * track the usage of slots. Also, any node that was "used up" will have            * been removed from the mapped_node_list - we now also must ensure that            * such a node is removed from the master_node_list.            *            * Clearly, there will be a performance penalty in doing all these            * operations to maintain data integrity. However, the case where            * someone maps processes this specifically is considered the            * atypical one, so penalizing it may not be a major issue.            *            * Still, some effort to improve the efficiency of this process            * may be in order for the future.            *            */            while (NULL != (item = opal_list_remove_first(&mapped_node_list))) {                node = (orte_ras_node_t*)item;                /** if the node was still on the mapped_node_list, then it hasn't                 * been moved to the fully_used_node list - find it on the                 * master_node_list and update the slots_inuse count there                 */                for (item2  = opal_list_get_first(&master_node_list);                     item2 != opal_list_get_end(&master_node_list);                     item2  = opal_list_get_next(item2) ) {                    node2 = (orte_ras_node_t*)item2;                    if (0 == strcmp(node2->node_name, node->node_name)) {                        node2->node_slots_inuse = node->node_slots_inuse;                        break;                    }                }                OBJ_RELEASE(item);            }            /** that updated everything that wasn't fully used up while             * processing the specific map. Now we have to ensure that             * any nodes that were used up (and hence, transferred to the             * max_used_node list) are removed from the master_node_list             * No really nice way to do this - we just have to run through             * the two lists and remove any duplicates.             */            while (NULL != (item = opal_list_remove_first(&max_used_nodes))) {                node = (orte_ras_node_t*)item;                                for (item2  = opal_list_get_first(&master_node_list);                     item2 != opal_list_get_end(&master_node_list);                     item2  = opal_list_get_next(item2) ) {                    node2 = (orte_ras_node_t*)item2;                    /** if we have a match, then remove the entry from the                     * master_node_list. if that entry was our bookmark,                     * shift the bookmark to the next entry on the list                     */                    if (0 == strcmp(node2->node_name, node->node_name)) {                        if (0 == strcmp(node->node_name,                                        ((orte_ras_node_t*)cur_node_item)->node_name)) {                            cur_node_item = opal_list_get_next(item2);                        }                        opal_list_remove_item(&master_node_list, item2);                        OBJ_RELEASE(item2);                        break;                    }                }                                /** now put that node on the fully_used_nodes list */                opal_list_append(&fully_used_nodes, &node->super);            }        } else {            /** this mapping wasn't specified, so all we have to do is add any nodes             * that were used up in the mapping to the fully_used_nodes list - they             * were already removed from the master_node_list when we did the mapping.             */            opal_list_join(&fully_used_nodes, opal_list_get_end(&fully_used_nodes), &max_used_nodes);        }            }    /* compute and save convenience values */    map->vpid_range = num_procs;    map->num_nodes = opal_list_get_size(&map->nodes);    for (item = opal_list_get_first(&map->nodes);         item != opal_list_get_end(&map->nodes);         item = opal_list_get_next(item)) {        mnode = (orte_mapped_node_t*)item;        mnode->num_procs = opal_list_get_size(&mnode->procs);    }        /* save mapping to the registry */    if(ORTE_SUCCESS != (rc = orte_rmaps_base_put_job_map(map))) {        goto cleanup;    }        /** join the master_node_list and fully_used_list so that all info gets updated */    opal_list_join(&master_node_list, opal_list_get_end(&master_node_list), &fully_used_nodes);    /** save the modified node information so we can start from the right     * place next time through    */    if (ORTE_SUCCESS != (rc = orte_rmaps_base_update_node_usage(&master_node_list))) {        ORTE_ERROR_LOG(rc);        goto cleanup;    }        /** if the app_context was modified, update that information too. This can only happen        for the case where num_context=1 and the user didn't specify the number of        processes     */    if (modify_app_context) {        if (ORTE_SUCCESS != (rc = orte_rmgr.store_app_context(jobid, map->apps, 1))) {            ORTE_ERROR_LOG(rc);            goto cleanup;        }    }        /* save a bookmark indicating what node we finished with so that subsequent children (if any)     * can start at the right place     */    if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_BOOKMARK,                                                      ORTE_STRING, save_bookmark,                                                      ORTE_RMGR_ATTR_OVERRIDE))) {        ORTE_ERROR_LOG(rc);    }cleanup:    while(NULL != (item = opal_list_remove_first(&master_node_list))) {        OBJ_RELEASE(item);    }    OBJ_DESTRUCT(&master_node_list);    OBJ_DESTRUCT(&max_used_nodes);    OBJ_DESTRUCT(&fully_used_nodes);    OBJ_DESTRUCT(&mapped_node_list);    OBJ_RELEASE(map);    free(save_bookmark);        return rc;}static int orte_rmaps_rr_finalize(void){    return ORTE_SUCCESS;}orte_rmaps_base_module_t orte_rmaps_round_robin_module = {    orte_rmaps_rr_map,    orte_rmaps_base_get_job_map,    orte_rmaps_base_get_node_map,    orte_rmaps_rr_finalize};

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -