📄 rmaps_rr.c
字号:
} cur_node_item = next; } return ORTE_SUCCESS;} /* * Process the attributes and push them into our local "global" */static int orte_rmaps_rr_process_attrs(opal_list_t *attributes){ int rc; char *policy; orte_attribute_t *attr; orte_std_cntr_t *scptr; bool policy_override; mca_rmaps_round_robin_component.bynode = false; /* set default mapping policy to byslot*/ policy_override = false; mca_rmaps_round_robin_component.per_node = false; if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_PERNODE))) { /* was provided - set boolean accordingly */ mca_rmaps_round_robin_component.per_node = true; /* indicate that we are going to map this job bynode */ mca_rmaps_round_robin_component.bynode = true; /* indicate that this is to *be* the policy no matter what */ policy_override = true; } mca_rmaps_round_robin_component.n_per_node = false; if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_N_PERNODE))) { /* was provided - set boolean accordingly */ mca_rmaps_round_robin_component.n_per_node = true; /* get the number of procs per node to launch */ if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&scptr, attr->value, ORTE_STD_CNTR))) { ORTE_ERROR_LOG(rc); return rc; } num_per_node = *scptr; /* default to byslot mapping */ mca_rmaps_round_robin_component.bynode = false; } /* define the mapping policy. This *must* come after we process the pernode * options since those set a default mapping policy - we want to be able * to override that setting if requested * * NOTE: we don't do this step if the policy_override has been set! */ if (!policy_override && NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_MAP_POLICY))) { /* they specified a mapping policy - extract its name */ if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&policy, attr->value, ORTE_STRING))) { ORTE_ERROR_LOG(rc); return rc; } if (0 == strcmp(policy, "bynode")) { mca_rmaps_round_robin_component.bynode = true; } else { mca_rmaps_round_robin_component.bynode = false; } } mca_rmaps_round_robin_component.no_use_local = false; if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_NO_USE_LOCAL))) { /* was provided - set boolean accordingly */ mca_rmaps_round_robin_component.no_use_local = true; } mca_rmaps_round_robin_component.oversubscribe = true; if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_NO_OVERSUB))) { /* was provided - set boolean accordingly */ mca_rmaps_round_robin_component.oversubscribe = false; } mca_rmaps_round_robin_component.no_allocate_range = false; if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_NO_ALLOC_RANGE))) { /* was provided - set boolean accordingly */ mca_rmaps_round_robin_component.no_allocate_range = true; } return ORTE_SUCCESS;}/* * Create a round-robin mapping for the job. */static int orte_rmaps_rr_map(orte_jobid_t jobid, opal_list_t *attributes){ orte_app_context_t *app; orte_job_map_t* map; orte_std_cntr_t i; opal_list_t master_node_list, mapped_node_list, max_used_nodes, *working_node_list; opal_list_item_t *item, *item2; orte_ras_node_t *node, *node2; orte_mapped_node_t *mnode; char *save_bookmark; orte_vpid_t vpid_start; orte_std_cntr_t num_procs = 0, total_num_slots, mapped_num_slots, num_nodes, num_slots; int rc; bool modify_app_context = false; char *sptr; orte_attribute_t *attr; orte_std_cntr_t slots_per_node; OPAL_TRACE(1); /* setup the local environment from the attributes */ if (ORTE_SUCCESS != (rc = orte_rmaps_rr_process_attrs(attributes))) { ORTE_ERROR_LOG(rc); return rc; } /* create the map object */ map = OBJ_NEW(orte_job_map_t); if (NULL == map) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } /* set the jobid */ map->job = jobid; /* query for the application context and allocated nodes */ if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &(map->apps), &(map->num_apps)))) { ORTE_ERROR_LOG(rc); return rc; } /* query for all nodes allocated to this job - this will become our master list of * nodes. From this, we will construct a working list of nodes based on any specified * mappings from the user */ OBJ_CONSTRUCT(&master_node_list, opal_list_t); if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&master_node_list, jobid, &total_num_slots, mca_rmaps_round_robin_component.no_use_local))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&master_node_list); return rc; } /* if a bookmark exists from some prior mapping, set us to start there */ if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_BOOKMARK))) { cur_node_item = NULL; if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, attr->value, ORTE_STRING))) { ORTE_ERROR_LOG(rc); return rc; } /* find this node on the master list */ for (item = opal_list_get_first(&master_node_list); item != opal_list_get_end(&master_node_list); item = opal_list_get_next(item)) { node = (orte_ras_node_t*)item; if (0 == strcmp(sptr, node->node_name)) { cur_node_item = item; break; } } /* see if we found it - if not, just start at the beginning */ if (NULL == cur_node_item) { cur_node_item = opal_list_get_first(&master_node_list); } } else { /* if no bookmark, then just start at the beginning of the list */ cur_node_item = opal_list_get_first(&master_node_list); } /* save the node name for the bookmark just in case we don't do anything * useful down below */ save_bookmark = strdup(((orte_ras_node_t*)cur_node_item)->node_name); /** construct the list to hold any nodes that get fully used during this * mapping. We need to keep a record of these so we can update their * information on the registry when we are done, but we want to remove * them from our master_node_list as we go so we don't keep checking to * see if we can still map something onto them. */ OBJ_CONSTRUCT(&fully_used_nodes, opal_list_t); /** construct an intermediate list that will hold the nodes that are fully * used during any one pass through the mapper (i.e., for each app_context). * we will join the results together to form the fully_used_nodes list. This * allows us to more efficiently handle the cases where users specify * the proc-to-node mapping themselves. */ OBJ_CONSTRUCT(&max_used_nodes, opal_list_t); /** construct a list to hold any nodes involved in a user-specified mapping */ OBJ_CONSTRUCT(&mapped_node_list, opal_list_t); for(i=0; i < map->num_apps; i++) { app = map->apps[i]; /** if the number of processes wasn't specified, then we know there can be only * one app_context allowed in the launch, and that we are to launch it across * all available slots. We'll double-check the single app_context rule first */ if (0 == app->num_procs && 1 < map->num_apps) { opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:multi-apps-and-zero-np", true, map->num_apps, NULL); ORTE_ERROR_LOG(ORTE_ERR_INVALID_NUM_PROCS); return ORTE_ERR_INVALID_NUM_PROCS; } if ( 0 < app->num_map ) { /** If the user has specified a mapping for this app_context, then we * create a working node list that contains only those nodes. */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_mapped_targets(&mapped_node_list, app, &master_node_list, &mapped_num_slots))) { ORTE_ERROR_LOG(rc); goto cleanup; } working_node_list = &mapped_node_list; /* Set cur_node_item to point to the first node in the specified list to be used */ cur_node_item = opal_list_get_first(working_node_list); num_nodes = (orte_std_cntr_t)opal_list_get_size(&mapped_node_list); num_slots = (orte_std_cntr_t)mapped_num_slots; } else { /** no mapping was specified, so we are going to just use everything that was * allocated to us. We don't need to update cur_node_item in this case since it * is always pointing to something in the master_node_list - we'll just pick up * from wherever we last stopped. */ working_node_list = &master_node_list; num_nodes = (orte_std_cntr_t)opal_list_get_size(&master_node_list); num_slots = total_num_slots; } if (mca_rmaps_round_robin_component.per_node) { /* there are three use-cases that we need to deal with: * (a) if -np was not provided, then we just use the number of nodes * (b) if -np was provided AND #procs > #nodes, then error out * (c) if -np was provided AND #procs <= #nodes, then launch * the specified #procs one/node. In this case, we just * leave app->num_procs alone */ if (0 == app->num_procs) { app->num_procs = num_nodes; modify_app_context = true; } else if (app->num_procs > num_nodes) { opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:per-node-and-too-many-procs", true, app->num_procs, num_nodes, NULL); return ORTE_ERR_SILENT; } } else if (mca_rmaps_round_robin_component.n_per_node) { /* first, let's check to see if there are enough slots/node to * meet the request - error out if not */ slots_per_node = num_slots / num_nodes; if (num_per_node > slots_per_node) { opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:n-per-node-and-not-enough-slots",
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -