📄 rmaps_rr.c
字号:
/* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */#include "orte_config.h"#include "orte/orte_constants.h"#include "orte/orte_types.h"#include <errno.h>#ifdef HAVE_UNISTD_H#include <unistd.h>#endif /* HAVE_UNISTD_H */#ifdef HAVE_STRING_H#include <string.h>#endif /* HAVE_STRING_H */#include "opal/mca/base/mca_base_param.h"#include "opal/util/output.h"#include "opal/util/trace.h"#include "opal/util/show_help.h"#include "opal/util/argv.h"#include "orte/dss/dss.h"#include "orte/mca/errmgr/errmgr.h"#include "orte/mca/ns/ns.h"#include "orte/mca/gpr/gpr.h"#include "orte/mca/rmgr/rmgr.h"#include "orte/mca/rmaps/base/rmaps_private.h"#include "orte/mca/rmaps/base/base.h"#include "rmaps_rr.h"/* * Local variable */static opal_list_item_t *cur_node_item = NULL;static opal_list_t fully_used_nodes;static orte_std_cntr_t num_per_node;/* * Create a default mapping for the application, scheduling round * robin by node. */static int map_app_by_node( orte_app_context_t* app, orte_job_map_t* map, orte_jobid_t jobid, orte_vpid_t vpid_start, opal_list_t* nodes, opal_list_t* max_used_nodes){ int rc = ORTE_SUCCESS; orte_std_cntr_t num_alloc = 0; opal_list_item_t *next; orte_ras_node_t *node; OPAL_TRACE(2); /* This loop continues until all procs have been mapped or we run out of resources. We determine that we have "run out of resources" when all nodes have node_slots_max processes mapped to them, thus there are no free slots for a process to be mapped, or we have hit the soft limit on all nodes and are in a "no oversubscribe" state. If we still have processes that haven't been mapped yet, then it's an "out of resources" error. In this scenario, we rely on the claim_slot function to handle the oversubscribed case. The claim_slot function will leave a node on the list until it either reachs node_slots_max OR reaches node_slots (the soft limit) and the "no_oversubscribe" flag has been set - at which point, the node will be removed to prevent any more processes from being mapped to it. Since we are taking one slot from each node as we cycle through, the list, oversubscription is automatically taken care of via this logic. */ while (num_alloc < app->num_procs) { /** see if any nodes remain unused and available. We need to do this check * each time since we may remove nodes from the list (as they become fully * used) as we cycle through the loop */ if(0 >= opal_list_get_size(nodes) ) { /* No more nodes to allocate :( */ opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:alloc-error", true, app->num_procs, app->app); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } /* Save the next node we can use before claiming slots, since * we may need to prune the nodes list removing overused nodes. * Wrap around to beginning if we are at the end of the list */ if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) { next = opal_list_get_first(nodes); } else { next = opal_list_get_next(cur_node_item); } /* Allocate a slot on this node */ node = (orte_ras_node_t*) cur_node_item; if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, app->idx, nodes, max_used_nodes, mca_rmaps_round_robin_component.oversubscribe))) { /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this * really isn't an error - we just need to break from the loop * since the node is fully used up. For now, just don't report * an error */ if (ORTE_ERR_NODE_FULLY_USED != rc) { ORTE_ERROR_LOG(rc); return rc; } } ++num_alloc; cur_node_item = next; } return ORTE_SUCCESS;} /* * Create a default mapping for the application, scheduling one round * robin by slot. */static int map_app_by_slot( orte_app_context_t* app, orte_job_map_t* map, orte_jobid_t jobid, orte_vpid_t vpid_start, opal_list_t* nodes, opal_list_t* max_used_nodes){ int rc = ORTE_SUCCESS; orte_std_cntr_t i, num_slots_to_take; orte_std_cntr_t num_alloc = 0; orte_ras_node_t *node; opal_list_item_t *next; OPAL_TRACE(2); /* This loop continues until all procs have been mapped or we run out of resources. We determine that we have "run out of resources" when either all nodes have node_slots_max processes mapped to them, (thus there are no free slots for a process to be mapped), OR all nodes have reached their soft limit and the user directed us to "no oversubscribe". If we still have processes that haven't been mapped yet, then it's an "out of resources" error. */ num_alloc = 0; while ( num_alloc < app->num_procs) { /** see if any nodes remain unused and available. We need to do this check * each time since we may remove nodes from the list (as they become fully * used) as we cycle through the loop */ if(0 >= opal_list_get_size(nodes) ) { /* Everything is at max usage! :( */ opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:alloc-error", true, app->num_procs, app->app); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } /* Save the next node we can use before claiming slots, since * we may need to prune the nodes list removing overused nodes. * Wrap around to beginning if we are at the end of the list */ if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) { next = opal_list_get_first(nodes); } else { next = opal_list_get_next(cur_node_item); } /** declare a shorter name for convenience in the code below */ node = (orte_ras_node_t*) cur_node_item; /* If we have available slots on this node, claim all of them * If node_slots == 0, assume 1 slot for that node. * JJH - is this assumption fully justified? * * If we are now oversubscribing the nodes, then we still take: * (a) if the node has not been used yet, we take a full node_slots * (b) if some of the slots are in-use, then we take the number of * remaining slots before hitting the soft limit (node_slots) * (c) if we are at or above the soft limit, we take a full node_slots * * Note: if node_slots is zero, then we always just take 1 slot * * We continue this process until either everything is done, * or all nodes have hit their hard limit. This algorithm ensures we * fully utilize each node before oversubscribing, and preserves the ratio * of processes between the nodes thereafter (e.g., if one node has twice as * many processes as another before oversubscribing, it will continue * to do so after oversubscribing). */ if (0 == node->node_slots_inuse || node->node_slots_inuse >= node->node_slots) { num_slots_to_take = (node->node_slots == 0) ? 1 : node->node_slots; } else { num_slots_to_take = node->node_slots - node->node_slots_inuse; } /* check if we are in npernode mode - if so, then set the num_slots_to_take * to the num_per_node */ if (mca_rmaps_round_robin_component.n_per_node) { num_slots_to_take = num_per_node; } for( i = 0; i < num_slots_to_take; ++i) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, app->idx, nodes, max_used_nodes, mca_rmaps_round_robin_component.oversubscribe))) { /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this * really isn't an error - we just need to break from the loop * since the node is fully used up. For now, just don't report * an error */ if (ORTE_ERR_NODE_FULLY_USED != rc) { ORTE_ERROR_LOG(rc); return rc; } } /* Update the number of procs allocated */ ++num_alloc; /** if all the procs have been mapped OR we have fully used up this node, then * break from the loop */ if(num_alloc == app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) { break; } } /* we move on to the next node in all cases EXCEPT if we came * out of the loop without having taken a full bite AND the * node is NOT max'd out * */ if (i < (num_slots_to_take-1) && ORTE_ERR_NODE_FULLY_USED != rc) { continue;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -