📄 ns_proxy_general_fns.c
字号:
/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ *//** @file: * */#include "orte_config.h"#include <string.h>#include "orte/orte_constants.h"#include "orte/orte_types.h"#include "opal/mca/mca.h"#include "opal/util/output.h"#include "opal/util/trace.h"#include "orte/dss/dss.h"#include "orte/mca/errmgr/errmgr.h"#include "orte/mca/rmgr/rmgr.h"#include "orte/mca/rml/rml.h"#include "ns_proxy.h"/* * PEER functions */int orte_ns_proxy_get_peers(orte_process_name_t **procs, orte_std_cntr_t *num_procs, opal_list_t *attrs){ orte_buffer_t* cmd; orte_buffer_t* answer; orte_ns_cmd_flag_t command; orte_std_cntr_t count, nprocs, i; orte_cellid_t *cptr; orte_attribute_t *attr; int rc; OPAL_TRACE(1); OPAL_THREAD_LOCK(&orte_ns_proxy.mutex); /* set default value */ *procs = NULL; *num_procs = 0; /* check the attributes to see if USE_JOB or USE_CELL has been set. If not, then this is * a request for my own job peers - process that one locally */ /* if the cell is given AND it matches my own, then we can process this * quickly. Otherwise, we have to do some more work. * * RHC: when we go multi-cell, we need a way to find all the cells upon * which a job is executing so we can make this work! */ if (NULL != (attr = orte_rmgr.find_attribute(attrs, ORTE_NS_USE_CELL))) { if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cptr, attr->value, ORTE_CELLID))) { ORTE_ERROR_LOG(rc); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return rc; } if (*cptr != ORTE_PROC_MY_NAME->cellid && *cptr != ORTE_CELLID_WILDCARD) { ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return ORTE_ERR_NOT_IMPLEMENTED; } } if (NULL == (attr = orte_rmgr.find_attribute(attrs, ORTE_NS_USE_JOBID))) { /* get my own job peers, assuming all are on this cell - process here * * RHC: This is a bad assumption. When we go multi-cell, we are going to have to process * get peer requests solely on the HNP since we won't know the cellid otherwise */ *procs = (orte_process_name_t*)malloc(orte_process_info.num_procs * sizeof(orte_process_name_t)); if (NULL == *procs) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return ORTE_ERR_OUT_OF_RESOURCE; } for (i=0; i < orte_process_info.num_procs; i++) { (*procs)[i].cellid = ORTE_PROC_MY_NAME->cellid; (*procs)[i].jobid = ORTE_PROC_MY_NAME->jobid; (*procs)[i].vpid = orte_process_info.vpid_start + i; } *num_procs = orte_process_info.num_procs; OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return ORTE_SUCCESS; } /* non-local request for peers in another job - send to replica for processing */ if ((cmd = OBJ_NEW(orte_buffer_t)) == NULL) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return ORTE_ERR_OUT_OF_RESOURCE; } command = ORTE_NS_GET_PEERS_CMD; /* pack the command */ if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&command, 1, ORTE_NS_CMD))) { /* got a problem */ ORTE_ERROR_LOG(rc); OBJ_RELEASE(cmd); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return rc; } /* pack the attributes */ if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, attrs, 1, ORTE_ATTR_LIST))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(cmd); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return rc; } if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); OBJ_RELEASE(cmd); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return ORTE_ERR_COMM_FAILURE; } OBJ_RELEASE(cmd); if ((answer = OBJ_NEW(orte_buffer_t)) == NULL) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); OBJ_RELEASE(answer); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return ORTE_ERR_OUT_OF_RESOURCE; } if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); OBJ_RELEASE(answer); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return ORTE_ERR_COMM_FAILURE; } count = 1; if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_NS_CMD))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(answer); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return rc; } if (ORTE_NS_GET_PEERS_CMD != command) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); OBJ_RELEASE(answer); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return ORTE_ERR_COMM_FAILURE; } count = 1; if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &nprocs, &count, ORTE_STD_CNTR))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(answer); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return rc; } /* allocate space for array of proc names */ if (0 < nprocs) { *procs = (orte_process_name_t*)malloc((nprocs) * sizeof(orte_process_name_t)); if (NULL == *procs) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); OBJ_RELEASE(answer); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return ORTE_ERR_OUT_OF_RESOURCE; } if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, *procs, &nprocs, ORTE_NAME))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(answer); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return rc; } } *num_procs = nprocs; OBJ_RELEASE(answer); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return ORTE_SUCCESS;}int orte_ns_proxy_assign_rml_tag(orte_rml_tag_t *tag, char *name){ orte_buffer_t* cmd; orte_buffer_t* answer; orte_ns_cmd_flag_t command; orte_ns_proxy_tagitem_t* tagitem, **tags; orte_std_cntr_t count, i; orte_rml_tag_t j; int rc; OPAL_THREAD_LOCK(&orte_ns_proxy.mutex); if (NULL != name) { /* see if this name is already in list - if so, return tag */ tags = (orte_ns_proxy_tagitem_t**)orte_ns_proxy.tags->addr; for (i=0, j=0; j < orte_ns_proxy.num_tags && i < (orte_ns_proxy.tags)->size; i++) { if (NULL != tags[i]) { j++; if (tags[i]->name != NULL && 0 == strcmp(name, tags[i]->name)) { /* found name on list */ *tag = tags[i]->tag; OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return ORTE_SUCCESS; } } } } /* okay, not on local list - so go get one from tag server */ command = ORTE_NS_ASSIGN_OOB_TAG_CMD; *tag = ORTE_RML_TAG_MAX; /* set the default error value */ if ((cmd = OBJ_NEW(orte_buffer_t)) == NULL) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return ORTE_ERR_OUT_OF_RESOURCE; } if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, (void*)&command, 1, ORTE_NS_CMD))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(cmd); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); return rc; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -