📄 odls_default_module.c
字号:
/* * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker * semantics. Since linkers generally pull in symbols by object * files, keeping these symbols as the only symbols in this file * prevents utility programs such as "ompi_info" from having to import * entire components just to query their version and parameters. */#include "orte_config.h"#include "orte/orte_constants.h"#include <stdlib.h>#ifdef HAVE_UNISTD_H#include <unistd.h>#endif#include <errno.h>#if HAVE_SYS_TYPES_H#include <sys/types.h>#endif#ifdef HAVE_SYS_WAIT_H#include <sys/wait.h>#endif#include <signal.h>#ifdef HAVE_FCNTL_H#include <fcntl.h>#endif#ifdef HAVE_SYS_TIME_H#include <sys/time.h>#endif#ifdef HAVE_SYS_PARAM_H#include <sys/param.h>#endif#ifdef HAVE_NETDB_H#include <netdb.h>#endif#ifdef HAVE_SYS_STAT_H#include <sys/stat.h>#endif /* HAVE_SYS_STAT_H */#if defined(HAVE_SCHED_YIELD)/* Only if we have sched_yield() */#ifdef HAVE_SCHED_H#include <sched.h>#endif#else/* Only do these if we don't have <sched.h> */#ifdef HAVE_SYS_SELECT_H#include <sys/select.h>#endif#endif /* HAVE_SCHED_YIELD */#include "opal/event/event.h"#include "opal/util/argv.h"#include "opal/util/output.h"#include "opal/util/os_path.h"#include "opal/util/show_help.h"#include "opal/util/path.h"#include "opal/util/basename.h"#include "opal/util/opal_environ.h"#include "opal/mca/base/mca_base_param.h"#include "opal/mca/paffinity/base/base.h"#include "orte/dss/dss.h"#include "orte/util/sys_info.h"#include "orte/util/univ_info.h"#include "orte/util/session_dir.h"#include "orte/runtime/orte_wait.h"#include "orte/mca/errmgr/errmgr.h"#include "orte/mca/errmgr/base/base.h"#include "orte/mca/iof/iof.h"#include "orte/mca/iof/base/iof_base_setup.h"#include "orte/mca/ns/ns.h"#include "orte/mca/sds/base/base.h"#include "orte/mca/rmgr/rmgr.h"#include "orte/mca/rml/rml.h"#include "orte/mca/gpr/gpr.h"#include "orte/mca/rmaps/base/base.h"#include "orte/mca/smr/smr.h"#include "orte/mca/odls/base/odls_private.h"#include "orte/mca/odls/default/odls_default.h"static void set_handler_default(int sig);orte_odls_base_module_t orte_odls_default_module = { orte_odls_default_subscribe_launch_data, orte_odls_default_get_add_procs_data, orte_odls_default_launch_local_procs, orte_odls_default_kill_local_procs, orte_odls_default_signal_local_procs};/* this entire function gets called within a GPR compound command, * so the subscription actually doesn't get done until the orted * executes the compound command */int orte_odls_default_subscribe_launch_data(orte_jobid_t job, orte_gpr_notify_cb_fn_t cbfunc){ char *segment; orte_gpr_value_t *values[2]; orte_gpr_subscription_t *subs, sub=ORTE_GPR_SUBSCRIPTION_EMPTY; orte_gpr_trigger_t *trigs, trig=ORTE_GPR_TRIGGER_EMPTY; char *glob_keys[] = { ORTE_JOB_APP_CONTEXT_KEY, ORTE_JOB_VPID_START_KEY, ORTE_JOB_VPID_RANGE_KEY, ORTE_JOB_OVERSUBSCRIBE_OVERRIDE_KEY }; int num_glob_keys = 4; char* keys[] = { ORTE_PROC_NAME_KEY, ORTE_PROC_APP_CONTEXT_KEY, ORTE_NODE_NAME_KEY, ORTE_NODE_OVERSUBSCRIBED_KEY }; int num_keys = 4; int i, rc; /* get the job segment name */ if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) { ORTE_ERROR_LOG(rc); return rc; } /* attach ourselves to the "standard" orted trigger */ if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name), ORTED_LAUNCH_STAGE_GATE_TRIGGER, job))) { ORTE_ERROR_LOG(rc); free(segment); return rc; } /* ask for return of all data required for launching local processes */ subs = ⊂ sub.action = ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG; if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&(sub.name), ORTED_LAUNCH_STG_SUB, job))) { ORTE_ERROR_LOG(rc); free(segment); free(trig.name); return rc; } sub.cnt = 2; sub.values = values; if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[0]), ORTE_GPR_TOKENS_OR, segment, num_glob_keys, 1))) { ORTE_ERROR_LOG(rc); free(segment); free(sub.name); free(trig.name); return rc; } values[0]->tokens[0] = strdup(ORTE_JOB_GLOBALS); for (i=0; i < num_glob_keys; i++) { if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[i]), glob_keys[i], ORTE_UNDEF, NULL))) { ORTE_ERROR_LOG(rc); free(segment); free(sub.name); free(trig.name); OBJ_RELEASE(values[0]); return rc; } } if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[1]), ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR | ORTE_GPR_STRIPPED, segment, num_keys, 0))) { ORTE_ERROR_LOG(rc); free(segment); free(sub.name); free(trig.name); OBJ_RELEASE(values[0]); return rc; } for (i=0; i < num_keys; i++) { if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[1]->keyvals[i]), keys[i], ORTE_UNDEF, NULL))) { ORTE_ERROR_LOG(rc); free(segment); free(sub.name); free(trig.name); OBJ_RELEASE(values[0]); OBJ_RELEASE(values[1]); return rc; } } sub.cbfunc = cbfunc; trigs = &trig; /* do the subscription */ if (ORTE_SUCCESS != (rc = orte_gpr.subscribe(1, &subs, 1, &trigs))) { ORTE_ERROR_LOG(rc); } free(segment); free(sub.name); free(trig.name); OBJ_RELEASE(values[0]); OBJ_RELEASE(values[1]); return rc;}int orte_odls_default_get_add_procs_data(orte_gpr_notify_data_t **data, orte_job_map_t *map){ orte_gpr_notify_data_t *ndat; orte_gpr_value_t **values, *value; orte_std_cntr_t cnt; char *glob_tokens[] = { ORTE_JOB_GLOBALS, NULL }; char *glob_keys[] = { ORTE_JOB_APP_CONTEXT_KEY, ORTE_JOB_VPID_START_KEY, ORTE_JOB_VPID_RANGE_KEY, NULL }; opal_list_item_t *item, *m_item; orte_mapped_node_t *node; orte_mapped_proc_t *proc; int rc; char *segment; /* set default answer */ *data = NULL; ndat = OBJ_NEW(orte_gpr_notify_data_t); if (NULL == ndat) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } /* construct a fake trigger name so that the we can extract the jobid from it later */ if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(ndat->target), "bogus", map->job))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(ndat); return rc; } /* get the segment name */ if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, map->job))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(ndat); return rc; } /* get the info from the job globals container first */ if (ORTE_SUCCESS != (rc = orte_gpr.get(ORTE_GPR_TOKENS_AND | ORTE_GPR_KEYS_OR, segment, glob_tokens, glob_keys, &cnt, &values))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(ndat); return rc; } /* there can only be one value here since we only specified a single container. * Just transfer the returned value to the ndat structure */ if (ORTE_SUCCESS != (rc = orte_pointer_array_add(&cnt, ndat->values, values[0]))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(ndat); OBJ_RELEASE(values[0]); return rc; } ndat->cnt = 1; /* the remainder of our required info is in the mapped_node objects, so all we * have to do is transfer it over */ for (m_item = opal_list_get_first(&map->nodes); m_item != opal_list_get_end(&map->nodes); m_item = opal_list_get_next(m_item)) { node = (orte_mapped_node_t*)m_item; for (item = opal_list_get_first(&node->procs); item != opal_list_get_end(&node->procs); item = opal_list_get_next(item)) { proc = (orte_mapped_proc_t*)item; if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, 0, segment, 3, 0))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(ndat); OBJ_RELEASE(value); return rc; } /* be sure NOT to insert tokens into the value as the launch_local_procs * function uses that as an indicator that this is data for a process * as opposed to from the global container */ if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_PROC_NAME_KEY, ORTE_NAME, &proc->name))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(ndat); OBJ_RELEASE(value); return rc; } if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), ORTE_PROC_APP_CONTEXT_KEY, ORTE_STD_CNTR, &proc->app_idx))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(ndat); OBJ_RELEASE(value); return rc; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -