📄 rds_hostfile.c
字号:
return ORTE_ERROR; } break; default: orte_rds_hostfile_parse_error(token); OBJ_RELEASE(node); return ORTE_ERROR; } }done: if (update) { if (!got_count) { if (got_max) { node->node_slots = node->node_slots_max; } else { ++node->node_slots; } } opal_list_append(updates, &node->super); } else { OBJ_RELEASE(node); } return ORTE_SUCCESS;}/** * Parse the specified file into a node list. */static int orte_rds_hostfile_parse(const char *hostfile, opal_list_t* existing, opal_list_t* updates){ int token; int rc = ORTE_SUCCESS; OPAL_LOCK(&mca_rds_hostfile_component.lock); cur_hostfile_name = strdup(hostfile); orte_rds_hostfile_done = false; orte_rds_hostfile_in = fopen(hostfile, "r"); if (NULL == orte_rds_hostfile_in) { rc = ORTE_ERR_NOT_FOUND; goto unlock; } while (!orte_rds_hostfile_done) { token = orte_rds_hostfile_lex(); switch (token) { case ORTE_RDS_HOSTFILE_DONE: orte_rds_hostfile_done = true; break; case ORTE_RDS_HOSTFILE_NEWLINE: break; /* * This looks odd, since we have several forms of host-definitions: * hostname just plain as it is, being a ORTE_RDS_HOSTFILE_STRING * IP4s and user@IPv4s * hostname.domain and user@hostname.domain */ case ORTE_RDS_HOSTFILE_STRING: case ORTE_RDS_HOSTFILE_INT: case ORTE_RDS_HOSTFILE_HOSTNAME: case ORTE_RDS_HOSTFILE_IPV4: rc = orte_rds_hostfile_parse_line(token, existing, updates); if (ORTE_SUCCESS != rc) { goto unlock; } break; default: orte_rds_hostfile_parse_error(token); goto unlock; } } fclose(orte_rds_hostfile_in); orte_rds_hostfile_in = NULL;unlock: if(NULL != cur_hostfile_name) { free(cur_hostfile_name); cur_hostfile_name = NULL; } OPAL_UNLOCK(&mca_rds_hostfile_component.lock); return rc;}/** * Parse the default file as specified by the MCA parameter, * rds_hostfile_path, and add the nodes to the registry. */static int orte_rds_hostfile_query(orte_jobid_t job){ opal_list_t existing; opal_list_t updates, rds_updates; opal_list_item_t *item; orte_rds_cell_desc_t *rds_item; orte_rds_cell_attr_t *new_attr; orte_ras_node_t *ras_item; int rc; if (orte_rds_hostfile_queried) { /* if we have already been queried, then * our info is on the registry, so just * return. Note that this restriction * may eventually be lifted - ideally, * we might check to see if this is a * new file name and go ahead with the * query if so. */ return ORTE_SUCCESS; } orte_rds_hostfile_queried = true; OBJ_CONSTRUCT(&existing, opal_list_t); OBJ_CONSTRUCT(&updates, opal_list_t); OBJ_CONSTRUCT(&rds_updates, opal_list_t); rc = orte_ras_base_node_query(&existing); if(ORTE_SUCCESS != rc) { goto cleanup; } rc = mca_base_param_find("rds", "hostfile", "path"); mca_base_param_lookup_string(rc, &mca_rds_hostfile_component.path); rc = orte_rds_hostfile_parse(mca_rds_hostfile_component.path, &existing, &updates); if (ORTE_ERR_NOT_FOUND == rc) { if(mca_rds_hostfile_component.default_hostfile) { rc = ORTE_SUCCESS; } else { opal_show_help("help-rds-hostfile.txt", "rds:no-hostfile", true, mca_rds_hostfile_component.path); } goto cleanup; } else if (ORTE_SUCCESS != rc) { goto cleanup; } if ( !opal_list_is_empty(&updates) ) { /* Convert RAS update list to RDS update list */ for ( ras_item = (orte_ras_node_t*)opal_list_get_first(&updates); ras_item != (orte_ras_node_t*)opal_list_get_end(&updates); ras_item = (orte_ras_node_t*)opal_list_get_next(ras_item)) { rds_item = OBJ_NEW(orte_rds_cell_desc_t); if (NULL == rds_item) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } rds_item->site = strdup("Hostfile"); rds_item->name = strdup(ras_item->node_name); if (need_cellid) {#if 0 /* JJH Repair when cellid's are fixed */ /* Create a new cellid for this hostfile */ rc = orte_ns.create_cellid(&local_cellid, rds_item->site, rds_item->name); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; }#endif local_cellid = 0; need_cellid = false; } rds_item->cellid = local_cellid; ras_item->node_cellid = local_cellid; new_attr = OBJ_NEW(orte_rds_cell_attr_t); if (NULL == new_attr) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } new_attr->keyval.key = strdup(ORTE_RDS_NAME); new_attr->keyval.value = OBJ_NEW(orte_data_value_t); if (NULL == new_attr->keyval.value) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } new_attr->keyval.value->type = ORTE_STRING; new_attr->keyval.value->data = strdup(ras_item->node_name); opal_list_append(&(rds_item->attributes), &new_attr->super); new_attr = OBJ_NEW(orte_rds_cell_attr_t); if (NULL == new_attr) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } new_attr->keyval.key = strdup(ORTE_CELLID_KEY); new_attr->keyval.value = OBJ_NEW(orte_data_value_t); if (NULL == new_attr->keyval.value) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } new_attr->keyval.value->type = ORTE_CELLID; if (ORTE_SUCCESS != (rc = orte_dss.copy(&(new_attr->keyval.value->data), &(rds_item->cellid), ORTE_CELLID))) { ORTE_ERROR_LOG(rc); return rc; } opal_list_append(&(rds_item->attributes), &new_attr->super); opal_list_append(&rds_updates, &rds_item->super); } /* Insert the new node into the RDS */ rc = orte_rds.store_resource(&rds_updates); if (ORTE_SUCCESS != rc) { goto cleanup; } /* Then the RAS, since we can assume that any * resources listed in the hostfile have been * already allocated for our use. */ rc = orte_ras_base_node_insert(&updates); if (ORTE_SUCCESS != rc) { goto cleanup; } /* and now, indicate that ORTE should override any oversubscribed conditions * based on local hardware limits since the user (a) might not have * provided us any info on the #slots for a node, and (b) the user * might have been wrong! If we don't check the number of local physical * processors, then we could be too aggressive on our sched_yield setting * and cause performance problems. */ rc = orte_ras_base_set_oversubscribe_override(job); if (ORTE_SUCCESS != rc) { goto cleanup; } }cleanup: if (NULL != mca_rds_hostfile_component.path) { free(mca_rds_hostfile_component.path); mca_rds_hostfile_component.path = NULL; } while(NULL != (item = opal_list_remove_first(&existing))) { OBJ_RELEASE(item); } while(NULL != (item = opal_list_remove_first(&updates))) { OBJ_RELEASE(item); } while (NULL != (rds_item = (orte_rds_cell_desc_t*)opal_list_remove_first(&rds_updates))) { while (NULL != (new_attr = (orte_rds_cell_attr_t*)opal_list_remove_first(&(rds_item->attributes)))) { OBJ_RELEASE(new_attr); } OBJ_RELEASE(rds_item); } OBJ_DESTRUCT(&existing); OBJ_DESTRUCT(&updates); OBJ_DESTRUCT(&rds_updates); return rc;}orte_rds_base_module_t orte_rds_hostfile_module = { orte_rds_hostfile_query, orte_rds_base_store_resource};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -