📄 orted.c
字号:
/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco, Inc. All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */#include "orte_config.h"#include <stdio.h>#include <ctype.h>#ifdef HAVE_UNISTD_H#include <unistd.h>#endif#ifdef HAVE_NETDB_H#include <netdb.h>#endif#ifdef HAVE_SYS_PARAM_H#include <sys/param.h>#endif#include <fcntl.h>#include <errno.h>#include <signal.h>#include <stdlib.h>#ifdef HAVE_LIMITS_H#include <limits.h>#endif#ifdef HAVE_SYS_TYPES_H#include <sys/types.h>#endif#ifdef HAVE_SYS_STAT_H#include <sys/stat.h>#endif#include "orte/orte_constants.h"#include "opal/event/event.h"#include "opal/mca/base/base.h"#include "opal/threads/mutex.h"#include "opal/threads/condition.h"#include "opal/util/cmd_line.h"#include "opal/util/daemon_init.h"#include "opal/util/opal_environ.h"#include "opal/util/os_path.h"#include "opal/util/output.h"#include "opal/util/printf.h"#include "opal/util/show_help.h"#include "opal/util/trace.h"#include "opal/util/argv.h"#include "opal/runtime/opal.h"#include "orte/dss/dss.h"#include "orte/class/orte_value_array.h"#include "orte/util/sys_info.h"#include "orte/util/proc_info.h"#include "orte/util/univ_info.h"#include "orte/util/session_dir.h"#include "orte/util/universe_setup_file_io.h"#include "orte/mca/errmgr/errmgr.h"#include "orte/mca/ns/ns.h"#include "orte/mca/ns/base/base.h"#include "orte/mca/gpr/gpr.h"#include "orte/mca/rml/rml.h"#include "orte/mca/smr/smr.h"#include "orte/mca/rmgr/rmgr.h"#include "orte/mca/rmgr/base/base.h"#include "orte/mca/odls/odls.h"#include "orte/mca/pls/pls.h"#include "orte/runtime/runtime.h"#include "orte/runtime/params.h"#include "orte/tools/orted/orted.h"/* * Globals */orted_globals_t orted_globals;static struct opal_event term_handler;static struct opal_event int_handler;static void signal_callback(int fd, short flags, void *arg);static void orte_daemon_recv(int status, orte_process_name_t* sender, orte_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata);static void orte_daemon_recv_pls(int status, orte_process_name_t* sender, orte_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata);static void orted_local_cb_launcher(orte_gpr_notify_data_t *data, void *user_tag);/* * define the orted context table for obtaining parameters */opal_cmd_line_init_t orte_cmd_line_opts[] = { /* Various "obvious" options */ { NULL, NULL, NULL, 'h', NULL, "help", 0, &orted_globals.help, OPAL_CMD_LINE_TYPE_BOOL, "This help message" }, { "orted", "spin", NULL, 'd', NULL, "spin", 0, &orted_globals.spin, OPAL_CMD_LINE_TYPE_BOOL, "Have the orted spin until we can connect a debugger to it" }, { "orte", "debug", NULL, 'd', NULL, "debug", 0, &orted_globals.debug, OPAL_CMD_LINE_TYPE_BOOL, "Debug the OpenRTE" }, { "orte", "no_daemonize", NULL, '\0', NULL, "no-daemonize", 0, &orted_globals.no_daemonize, OPAL_CMD_LINE_TYPE_BOOL, "Don't daemonize into the background" }, { "orte", "debug", "daemons", '\0', NULL, "debug-daemons", 0, &orted_globals.debug_daemons, OPAL_CMD_LINE_TYPE_BOOL, "Enable debugging of OpenRTE daemons" }, { "orte", "debug", "daemons_file", '\0', NULL, "debug-daemons-file", 0, &orted_globals.debug_daemons_file, OPAL_CMD_LINE_TYPE_BOOL, "Enable debugging of OpenRTE daemons, storing output in files" }, { "rmgr", "bootproxy", "jobid", '\0', NULL, "bootproxy", 1, &orted_globals.bootproxy, OPAL_CMD_LINE_TYPE_INT, "Run as boot proxy for <job-id>" }, { NULL, NULL, NULL, '\0', NULL, "set-sid", 0, &orted_globals.set_sid, OPAL_CMD_LINE_TYPE_BOOL, "Direct the orted to separate from the current session"}, { NULL, NULL, NULL, '\0', NULL, "name", 1, &orted_globals.name, OPAL_CMD_LINE_TYPE_STRING, "Set the orte process name"}, { NULL, NULL, NULL, '\0', NULL, "vpid_start", 1, &orted_globals.vpid_start, OPAL_CMD_LINE_TYPE_STRING, "Set the starting vpid for this job"}, { NULL, NULL, NULL, '\0', NULL, "num_procs", 1, &orted_globals.num_procs, OPAL_CMD_LINE_TYPE_STRING, "Set the number of process in this job"}, { NULL, NULL, NULL, '\0', NULL, "ns-nds", 1, &orted_globals.ns_nds, OPAL_CMD_LINE_TYPE_STRING, "set sds/nds component to use for daemon (normally not needed)"}, { NULL, NULL, NULL, '\0', NULL, "nsreplica", 1, &orte_process_info.ns_replica_uri, OPAL_CMD_LINE_TYPE_STRING, "Name service contact information."}, { NULL, NULL, NULL, '\0', NULL, "gprreplica", 1, &orte_process_info.gpr_replica_uri, OPAL_CMD_LINE_TYPE_STRING, "Registry contact information."}, { NULL, NULL, NULL, '\0', NULL, "nodename", 1, &orte_system_info.nodename, OPAL_CMD_LINE_TYPE_STRING, "Node name as specified by host/resource description." }, { "universe", NULL, NULL, '\0', NULL, "universe", 1, &orted_globals.universe, OPAL_CMD_LINE_TYPE_STRING, "Set the universe name as username@hostname:universe_name for this application" }, { "tmpdir", "base", NULL, '\0', NULL, "tmpdir", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, "Set the root for the session directory tree" }, { "seed", NULL, NULL, '\0', NULL, "seed", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, "Host replicas for the core universe services"}, { "universe", "persistence", NULL, '\0', NULL, "persistent", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, "Remain alive after the application process completes"}, { "universe", "scope", NULL, '\0', NULL, "scope", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, "Set restrictions on who can connect to this universe"}, { NULL, NULL, NULL, '\0', NULL, "report-uri", 1, &orted_globals.uri_pipe, OPAL_CMD_LINE_TYPE_INT, "Report this process' uri on indicated pipe"}, { NULL, NULL, NULL, '\0', NULL, "mpi-call-yield", 1, &orted_globals.mpi_call_yield, OPAL_CMD_LINE_TYPE_INT, "Have MPI (or similar) applications call yield when idle" }, /* End of list */ { NULL, NULL, NULL, '\0', NULL, NULL, 0, NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }};int main(int argc, char *argv[]){ int ret = 0; int fd; opal_cmd_line_t *cmd_line = NULL; char *log_path = NULL; char log_file[PATH_MAX]; char *jobidstring; orte_gpr_value_t *value; char *segment; int i; orte_buffer_t answer; char *umask_str; /* Allow the PLS starters to pass us a umask to use, if required. Most starters by default can do something sane with the umask, but some (like TM) do not pass on the umask but instead inherit it form the root level process starter. This has to happen before opal_init and everything else so that the couple of places that stash a umask end up with the correct value. Only do it here (and not in orte_daemon) mainly to make it clear that this should only happen when starting an orted for the first time. All startes I'm aware of that don't require an orted are smart enough to pass on a reasonable umask, so they wouldn't need this functionality anyway. */ umask_str = getenv("ORTE_DAEMON_UMASK_VALUE"); if (NULL != umask_str) { char *endptr; long mask = strtol(umask_str, &endptr, 8); if ((! (0 == mask && (EINVAL == errno || ERANGE == errno))) && (*endptr == '\0')) { umask(mask); } } /* initialize the globals */ memset(&orted_globals, 0, sizeof(orted_globals_t)); /* Ensure that enough of OPAL is setup for us to be able to run */ if (OPAL_SUCCESS != opal_init_util()) { fprintf(stderr, "OPAL failed to initialize -- orted aborting\n"); exit(1); } /* save the environment for use when launching application processes */ orted_globals.saved_environ = opal_argv_copy(environ); /* setup mca param system */ mca_base_param_init(); /* setup to check common command line options that just report and die */ cmd_line = OBJ_NEW(opal_cmd_line_t); opal_cmd_line_create(cmd_line, orte_cmd_line_opts); if (ORTE_SUCCESS != (ret = opal_cmd_line_parse(cmd_line, false, argc, argv))) { char *args = NULL; args = opal_cmd_line_get_usage_msg(cmd_line); opal_show_help("help-orted.txt", "orted:usage", false, argv[0], args); free(args); return ret; } /* check for help request */ if (orted_globals.help) { char *args = NULL; args = opal_cmd_line_get_usage_msg(cmd_line); opal_show_help("help-orted.txt", "orted:usage", false, argv[0], args); free(args); return 1; } /* see if we were directed to separate from current session */ if (orted_globals.set_sid) { setsid(); } /* see if they want us to spin until they can connect a debugger to us */ i=0; while (orted_globals.spin) { i++; if (1000 < i) i=0; } /* Okay, now on to serious business! */ /* Ensure the process info structure in instantiated and initialized * and set the daemon flag to true */ orte_process_info.daemon = true; /* * If the daemon was given a name on the command line, need to set the * proper indicators in the environment so the name discovery service * can find it */ if (orted_globals.name) { if (ORTE_SUCCESS != (ret = opal_setenv("OMPI_MCA_ns_nds", "env", true, &environ))) { opal_show_help("help-orted.txt", "orted:environ", false,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -