📄 rmaps_rr.c
字号:
true, num_per_node, slots_per_node, NULL); return ORTE_ERR_SILENT; } /* there are three use-cases that we need to deal with: * (a) if -np was not provided, then we just use the n/node * #nodes * (b) if -np was provided AND #procs > (n/node * #nodes), then error out * (c) if -np was provided AND #procs <= (n/node * #nodes), then launch * the specified #procs n/node. In this case, we just * leave app->num_procs alone */ if (0 == app->num_procs) { /* set the num_procs to equal the specified num/node * the number of nodes */ app->num_procs = num_per_node * num_nodes; modify_app_context = true; } else if (app->num_procs > (num_per_node * num_nodes)) { opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:n-per-node-and-too-many-procs", true, app->num_procs, num_per_node, num_nodes, num_slots, NULL); return ORTE_ERR_SILENT; } } else if (0 == app->num_procs) { /** set the num_procs to equal the number of slots on these mapped nodes - if user has specified "-bynode", then set it to the number of nodes */ if (mca_rmaps_round_robin_component.bynode) { app->num_procs = num_nodes; } else { app->num_procs = num_slots; } modify_app_context = true; } /* allocate a vpid range for this app within the job, unless told not to do so */ if (mca_rmaps_round_robin_component.no_allocate_range) { vpid_start = 0; } else { if(ORTE_SUCCESS != (rc = orte_ns.reserve_range(jobid, app->num_procs, &vpid_start))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&master_node_list); return rc; } } /** save the initial starting vpid for later */ if (0 == i) { map->vpid_start = vpid_start; } /** track the total number of processes we mapped */ num_procs += app->num_procs; /* Make assignments */ if (mca_rmaps_round_robin_component.bynode) { map->mapping_mode = strdup("bynode"); rc = map_app_by_node(app, map, jobid, vpid_start, working_node_list, &max_used_nodes); } else { map->mapping_mode = strdup("byslot"); rc = map_app_by_slot(app, map, jobid, vpid_start, working_node_list, &max_used_nodes); } if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; } /* save the next node name bookmark as we will - in the case of mapped nodes - * release the node information being pointed to by cur_node_item */ if(NULL != cur_node_item) { free(save_bookmark); save_bookmark = strdup(((orte_ras_node_t*)cur_node_item)->node_name); } /** cleanup the mapped_node_list, if necessary */ if (0 < app->num_map) { /* we need to adjust our bookmark so it points to the node in the * master node list - this allows the cur_node_item to "survive" * the disassembly of the mapped_node_list */ if (NULL != cur_node_item) { node = (orte_ras_node_t*)cur_node_item; /* This can be a little tricky due to all the corner * cases. If the mapped_node_list only has ONE entry on it, then the * cur_node_item will always point at it, even if we used everything * on that node. What we will do, therefore, is check the usage of the * cur_node_item to see if it has reached the soft limit. If so, we find * the node after that one on the master node list */ for (item = opal_list_get_first(&master_node_list); item != opal_list_get_end(&master_node_list); item = opal_list_get_next(item)) { node2 = (orte_ras_node_t*)item; if (0 == strcmp(node->node_name, node2->node_name)) { if (node->node_slots <= node->node_slots_inuse) { /* we are at or beyond the soft limit */ cur_node_item = opal_list_get_next(item); } else { cur_node_item = item; } break; } } } /* as we get rid of the mapped_node_list, we need to update * corresponding entries in the master_node_list so we accurately * track the usage of slots. Also, any node that was "used up" will have * been removed from the mapped_node_list - we now also must ensure that * such a node is removed from the master_node_list. * * Clearly, there will be a performance penalty in doing all these * operations to maintain data integrity. However, the case where * someone maps processes this specifically is considered the * atypical one, so penalizing it may not be a major issue. * * Still, some effort to improve the efficiency of this process * may be in order for the future. * */ while (NULL != (item = opal_list_remove_first(&mapped_node_list))) { node = (orte_ras_node_t*)item; /** if the node was still on the mapped_node_list, then it hasn't * been moved to the fully_used_node list - find it on the * master_node_list and update the slots_inuse count there */ for (item2 = opal_list_get_first(&master_node_list); item2 != opal_list_get_end(&master_node_list); item2 = opal_list_get_next(item2) ) { node2 = (orte_ras_node_t*)item2; if (0 == strcmp(node2->node_name, node->node_name)) { node2->node_slots_inuse = node->node_slots_inuse; break; } } OBJ_RELEASE(item); } /** that updated everything that wasn't fully used up while * processing the specific map. Now we have to ensure that * any nodes that were used up (and hence, transferred to the * max_used_node list) are removed from the master_node_list * No really nice way to do this - we just have to run through * the two lists and remove any duplicates. */ while (NULL != (item = opal_list_remove_first(&max_used_nodes))) { node = (orte_ras_node_t*)item; for (item2 = opal_list_get_first(&master_node_list); item2 != opal_list_get_end(&master_node_list); item2 = opal_list_get_next(item2) ) { node2 = (orte_ras_node_t*)item2; /** if we have a match, then remove the entry from the * master_node_list. if that entry was our bookmark, * shift the bookmark to the next entry on the list */ if (0 == strcmp(node2->node_name, node->node_name)) { if (0 == strcmp(node->node_name, ((orte_ras_node_t*)cur_node_item)->node_name)) { cur_node_item = opal_list_get_next(item2); } opal_list_remove_item(&master_node_list, item2); OBJ_RELEASE(item2); break; } } /** now put that node on the fully_used_nodes list */ opal_list_append(&fully_used_nodes, &node->super); } } else { /** this mapping wasn't specified, so all we have to do is add any nodes * that were used up in the mapping to the fully_used_nodes list - they * were already removed from the master_node_list when we did the mapping. */ opal_list_join(&fully_used_nodes, opal_list_get_end(&fully_used_nodes), &max_used_nodes); } } /* compute and save convenience values */ map->vpid_range = num_procs; map->num_nodes = opal_list_get_size(&map->nodes); for (item = opal_list_get_first(&map->nodes); item != opal_list_get_end(&map->nodes); item = opal_list_get_next(item)) { mnode = (orte_mapped_node_t*)item; mnode->num_procs = opal_list_get_size(&mnode->procs); } /* save mapping to the registry */ if(ORTE_SUCCESS != (rc = orte_rmaps_base_put_job_map(map))) { goto cleanup; } /** join the master_node_list and fully_used_list so that all info gets updated */ opal_list_join(&master_node_list, opal_list_get_end(&master_node_list), &fully_used_nodes); /** save the modified node information so we can start from the right * place next time through */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_update_node_usage(&master_node_list))) { ORTE_ERROR_LOG(rc); goto cleanup; } /** if the app_context was modified, update that information too. This can only happen for the case where num_context=1 and the user didn't specify the number of processes */ if (modify_app_context) { if (ORTE_SUCCESS != (rc = orte_rmgr.store_app_context(jobid, map->apps, 1))) { ORTE_ERROR_LOG(rc); goto cleanup; } } /* save a bookmark indicating what node we finished with so that subsequent children (if any) * can start at the right place */ if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_BOOKMARK, ORTE_STRING, save_bookmark, ORTE_RMGR_ATTR_OVERRIDE))) { ORTE_ERROR_LOG(rc); }cleanup: while(NULL != (item = opal_list_remove_first(&master_node_list))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&master_node_list); OBJ_DESTRUCT(&max_used_nodes); OBJ_DESTRUCT(&fully_used_nodes); OBJ_DESTRUCT(&mapped_node_list); OBJ_RELEASE(map); free(save_bookmark); return rc;}static int orte_rmaps_rr_finalize(void){ return ORTE_SUCCESS;}orte_rmaps_base_module_t orte_rmaps_round_robin_module = { orte_rmaps_rr_map, orte_rmaps_base_get_job_map, orte_rmaps_base_get_node_map, orte_rmaps_rr_finalize};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -