📄 rmaps_base_registry_fns.c
字号:
/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */#include "orte_config.h"#include "orte/orte_constants.h"#include "orte/orte_types.h"#include "opal/util/output.h"#include "opal/util/trace.h"#include "opal/mca/mca.h"#include "opal/mca/base/mca_base_param.h"#include "orte/mca/schema/schema.h"#include "orte/mca/gpr/gpr.h"#include "orte/mca/ns/ns.h"#include "orte/mca/errmgr/errmgr.h"#include "orte/mca/rmgr/rmgr.h"#include "orte/mca/smr/smr_types.h"#include "orte/mca/rmaps/base/rmaps_private.h"#include "orte/mca/rmaps/base/base.h"/* * Query the process mapping from the registry. */int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid){ orte_job_map_t *mapping; orte_mapped_proc_t *proc; orte_mapped_node_t *mnode; opal_list_item_t *item; orte_cellid_t *cellptr, cell=ORTE_CELLID_INVALID; orte_vpid_t *vptr; orte_std_cntr_t *sptr; bool *bptr, oversub=false; pid_t *pidptr; orte_process_name_t *pptr; int32_t *i32, launch_id; char *segment; char *node_name=NULL; char *username=NULL; orte_gpr_value_t **values, *value; orte_gpr_keyval_t* keyval; orte_std_cntr_t v, kv, num_values; int rc; char* keys[] = { ORTE_PROC_RANK_KEY, ORTE_PROC_NAME_KEY, ORTE_PROC_APP_CONTEXT_KEY, ORTE_PROC_LOCAL_PID_KEY, ORTE_CELLID_KEY, ORTE_NODE_NAME_KEY, ORTE_NODE_LAUNCH_ID_KEY, ORTE_NODE_USERNAME_KEY, ORTE_NODE_OVERSUBSCRIBED_KEY, ORTE_JOB_VPID_START_KEY, ORTE_JOB_VPID_RANGE_KEY, ORTE_JOB_MAPPING_MODE_KEY, NULL }; OPAL_TRACE(1); /* define default answer */ *map = NULL; /* create the object */ mapping = OBJ_NEW(orte_job_map_t); if (NULL == mapping) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } /* set the jobid */ mapping->job = jobid; /* get the job segment name */ if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(mapping); return rc; } /* query the application context */ if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &(mapping->apps), &(mapping->num_apps)))) { ORTE_ERROR_LOG(rc); return rc; } /* query the process list from the registry */ rc = orte_gpr.get( ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR, segment, NULL, keys, &num_values, &values); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(mapping); free(segment); return rc; } free(segment); /* build the node and proc lists. each value corresponds * to a process in the map */ for(v=0; v<num_values; v++) { value = values[v]; node_name = NULL; launch_id = -1; if (0 == strcmp(value->tokens[0], ORTE_JOB_GLOBALS)) { /* this came from the job_globals container, so look for the related values */ for (kv=0; kv < value->cnt; kv++) { if(strcmp(value->keyvals[kv]->key, ORTE_JOB_VPID_START_KEY) == 0) { if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, value->keyvals[kv]->value, ORTE_VPID))) { ORTE_ERROR_LOG(rc); goto cleanup; } mapping->vpid_start = *vptr; continue; } if(strcmp(value->keyvals[kv]->key, ORTE_JOB_VPID_RANGE_KEY) == 0) { if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, value->keyvals[kv]->value, ORTE_VPID))) { ORTE_ERROR_LOG(rc); goto cleanup; } mapping->vpid_range = *vptr; continue; } if(strcmp(value->keyvals[kv]->key, ORTE_JOB_MAPPING_MODE_KEY) == 0) { /* use the dss.copy function here to protect us against zero-length strings */ if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&mapping->mapping_mode, value->keyvals[kv]->value->data, ORTE_STRING))) { ORTE_ERROR_LOG(rc); goto cleanup; } continue; } } } else { /* this came from a process container */ proc = OBJ_NEW(orte_mapped_proc_t); if(NULL == proc) { rc = ORTE_ERR_OUT_OF_RESOURCE; ORTE_ERROR_LOG(rc); goto cleanup; } for(kv = 0; kv<value->cnt; kv++) { keyval = value->keyvals[kv]; if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) { if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) { ORTE_ERROR_LOG(rc); goto cleanup; } proc->rank = *sptr; continue; } if(strcmp(keyval->key, ORTE_PROC_NAME_KEY) == 0) { if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, keyval->value, ORTE_NAME))) { ORTE_ERROR_LOG(rc); goto cleanup; } proc->name = *pptr; continue; } if(strcmp(keyval->key, ORTE_NODE_LAUNCH_ID_KEY) == 0) { if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&i32, keyval->value, ORTE_INT32))) { ORTE_ERROR_LOG(rc); goto cleanup; } launch_id = *i32; continue; } if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) { if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) { ORTE_ERROR_LOG(rc); goto cleanup; } proc->app_idx = *sptr; continue; } if(strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) { if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) { ORTE_ERROR_LOG(rc); goto cleanup; } proc->pid = *pidptr; continue; } if(strcmp(keyval->key, ORTE_CELLID_KEY) == 0) { if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cellptr, keyval->value, ORTE_CELLID))) { ORTE_ERROR_LOG(rc); goto cleanup; } cell = *cellptr; continue; } if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) { /* use the dss.copy function here to protect us against zero-length strings */ if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&node_name, keyval->value->data, ORTE_STRING))) { ORTE_ERROR_LOG(rc); goto cleanup; } continue; } if(strcmp(keyval->key, ORTE_NODE_USERNAME_KEY) == 0) { /* use the dss.copy function here to protect us against zero-length strings */ if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&username, keyval->value->data, ORTE_STRING))) { ORTE_ERROR_LOG(rc); goto cleanup; } continue; } if(strcmp(keyval->key, ORTE_NODE_OVERSUBSCRIBED_KEY) == 0) { if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&bptr, keyval->value, ORTE_BOOL))) { ORTE_ERROR_LOG(rc); goto cleanup; } oversub = *bptr; continue; } } /* store this process in the map */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(mapping, cell, node_name, launch_id, username, oversub, proc))) { ORTE_ERROR_LOG(rc); goto cleanup; } if (NULL != node_name) free(node_name); } } /* compute and save convenience values */ mapping->num_nodes = opal_list_get_size(&mapping->nodes); for (item = opal_list_get_first(&mapping->nodes); item != opal_list_get_end(&mapping->nodes); item = opal_list_get_next(item)) { mnode = (orte_mapped_node_t*)item; mnode->num_procs = opal_list_get_size(&mnode->procs); } /* all done */ *map = mapping; rc = ORTE_SUCCESS;cleanup: if(rc != ORTE_SUCCESS) { OBJ_RELEASE(mapping); } for (v=0; v < num_values; v++) { OBJ_RELEASE(values[v]); } if (NULL != values) free(values); return rc;}int orte_rmaps_base_get_node_map(orte_mapped_node_t **node, orte_cellid_t cell, char *nodename, orte_jobid_t job){ orte_job_map_t *map; opal_list_item_t *item; orte_mapped_node_t *nptr; int rc; /* set default answer */ *node = NULL; if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_job_map(&map, job))) { ORTE_ERROR_LOG(rc); return rc; } /* scan the map for the indicated node */ for (item = opal_list_get_first(&map->nodes); item != opal_list_get_end(&map->nodes); item = opal_list_get_next(item)) { nptr = (orte_mapped_node_t*)item; if (cell == nptr->cell && 0 == strcmp(nodename, nptr->nodename)) { *node = nptr; /* protect the node object from release when we get rid * of the map object */ opal_list_remove_item(&map->nodes, item); OBJ_RELEASE(map); return ORTE_SUCCESS; } } /* if we get here, then the node wasn't found */ OBJ_RELEASE(map); return ORTE_ERR_NOT_FOUND;}/** * Set the process mapping in the registry. */int orte_rmaps_base_put_job_map(orte_job_map_t *map){ orte_std_cntr_t i, j; orte_std_cntr_t index=0; orte_std_cntr_t num_procs = 0; int rc = ORTE_SUCCESS; opal_list_item_t *item, *item2; orte_gpr_value_t **values, *value; char *segment; orte_mapped_node_t *node; orte_mapped_proc_t *proc; orte_proc_state_t proc_state=ORTE_PROC_STATE_INIT; OPAL_TRACE(2); for(item = opal_list_get_first(&map->nodes); item != opal_list_get_end(&map->nodes); item = opal_list_get_next(item)) { node = (orte_mapped_node_t*)item; num_procs += (orte_std_cntr_t)opal_list_get_size(&node->procs); } if(num_procs == 0) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } /** * allocate value array. We need to reserve one extra spot so we can set the counter * for the process INIT state to indicate that all procs are at that state. This will * allow the INIT trigger to fire. */ values = (orte_gpr_value_t**)malloc((1+num_procs) * sizeof(orte_gpr_value_t*)); if(NULL == values) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -