📄 odls_bproc.c
字号:
/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ *//** * @file: * Part of the bproc launcher. * See odls_bproc.h for an overview of how it works. */#include "orte_config.h"#include <stdlib.h>#include <unistd.h>#include <sys/types.h>#include <fcntl.h>#include <pty.h>#include <dirent.h>#include "opal/mca/base/mca_base_param.h"#include "opal/runtime/opal_progress.h"#include "opal/threads/condition.h"#include "opal/util/os_dirpath.h"#include "opal/util/os_path.h"#include "opal/util/output.h"#include "orte/dss/dss.h"#include "orte/util/sys_info.h"#include "orte/orte_constants.h"#include "orte/mca/errmgr/errmgr.h"#include "orte/mca/gpr/gpr.h"#include "orte/mca/iof/iof.h"#include "orte/mca/iof/base/iof_base_setup.h"#include "orte/mca/ns/base/base.h"#include "orte/mca/oob/base/base.h"#include "orte/mca/rml/rml.h"#include "orte/util/session_dir.h"#include "orte/util/univ_info.h"#include "odls_bproc.h"/** * Initialization of the bproc_orted module with all the needed function pointers */orte_odls_base_module_t orte_odls_bproc_module = { orte_odls_bproc_subscribe_launch_data, orte_odls_bproc_get_add_procs_data, orte_odls_bproc_launch_local_procs, orte_odls_bproc_kill_local_procs, orte_odls_bproc_signal_local_procs};static int odls_bproc_make_dir(char *directory);static char * odls_bproc_get_base_dir_name(int proc_rank, orte_jobid_t jobid, orte_std_cntr_t app_context);static void odls_bproc_delete_dir_tree(char * path);static int odls_bproc_remove_dir(void);static void odls_bproc_send_cb(int status, orte_process_name_t * peer, orte_buffer_t* buffer, int tag, void* cbdata);static int odls_bproc_setup_stdio(orte_process_name_t *proc_name, int proc_rank, orte_jobid_t jobid, orte_std_cntr_t app_context, bool connect_stdin);int orte_odls_bproc_get_add_procs_data(orte_gpr_notify_data_t **data, orte_job_map_t *map){ return ORTE_ERR_NOT_IMPLEMENTED;}/** * Creates the passed directory. If the directory already exists, it and its * contents will be deleted then the directory will be created. * @param directory The directory to be created. * @retval ORTE_SUCCESS * @retval error */static intodls_bproc_make_dir(char *directory){ struct stat buf; mode_t my_mode = S_IRWXU; /* at the least, I need to be able to do anything */ if (0 == stat(directory, &buf)) { /* exists - delete it and its contents */ odls_bproc_delete_dir_tree(directory); } /* try to create it with proper mode */ return(opal_os_dirpath_create(directory, my_mode));}/** * Returns a path of the form: * @code * /tmp/openmpi-bproc-<user>/<universe>/<jobid>-<app_context>/<proc_rank>/ * @endcode * which is used to put links to the pty/pipes in * @param proc_rank the process's rank on the node * @param jobid the jobid the proc belongs to * @param app_context the application context number within the job * @retval path */static char * odls_bproc_get_base_dir_name(int proc_rank, orte_jobid_t jobid, orte_std_cntr_t app_context){ char *path = NULL, *user = NULL, *job = NULL; int rc; /* ensure that system info is set */ orte_sys_info(); if (NULL == orte_universe_info.name) { /* error condition */ ORTE_ERROR_LOG(ORTE_ERROR); return NULL; } rc = orte_ns.convert_jobid_to_string(&job, jobid); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return NULL; } /* get the username set by the bproc pls. We need to get it from here * because on many bproc systems the method we use to get the username * from the system on the backend fails and we only get the uid. */ rc = mca_base_param_register_string("pls", "bproc", "username", NULL, orte_system_info.user); mca_base_param_lookup_string(rc,&user); if (0 > asprintf(&path, OPAL_PATH_SEP"tmp"OPAL_PATH_SEP"openmpi-bproc-%s"OPAL_PATH_SEP"%s"OPAL_PATH_SEP"%s-%d"OPAL_PATH_SEP"%d", user, orte_universe_info.name, job, (int) app_context, proc_rank)) { ORTE_ERROR_LOG(ORTE_ERROR); path = NULL; } if(0 < mca_odls_bproc_component.debug) { opal_output(0, "odls bproc io setup. Path: %s\n", path); } free(user); free(job); return path;}/** * deletes the passed directory tree recursively * @param path the path to the base directory to delete */static voidodls_bproc_delete_dir_tree(char * path){ DIR *dp; struct dirent *ep; char *filenm; int ret; struct stat buf; dp = opendir(path); if (NULL == dp) { return; } while (NULL != (ep = readdir(dp)) ) { /* skip: . and .. */ if ((0 != strcmp(ep->d_name, ".")) && (0 != strcmp(ep->d_name, ".."))) { filenm = opal_os_path(false, path, ep->d_name, NULL); ret = stat(filenm, &buf); if (ret < 0 || S_ISDIR(buf.st_mode)) { odls_bproc_delete_dir_tree(filenm); free(filenm); continue; } unlink(filenm); free(filenm); } } closedir(dp); rmdir(path);}/** * Removes the bproc directory * @code /tmp/openmpi-bproc-<user>/ @endcode and all of its contents * @retval ORTE_SUCCESS * @retval error */static intodls_bproc_remove_dir(){ char *frontend = NULL, *user = NULL, *filename = NULL; int id; /* get the username set by the bproc pls. We need to get it from here * because on many bproc systems the method we use to get the username * from the system on the backend fails and we only get the uid. */ id = mca_base_param_register_string("pls", "bproc", "username", NULL, orte_system_info.user); mca_base_param_lookup_string(id,&user); asprintf(&filename, "openmpi-bproc-%s", user ); if( NULL == filename ) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERROR; } frontend = opal_os_path(false, "tmp", filename, NULL ); free(filename); /* Always free the filename */ if (NULL == frontend) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERROR; } /* we do our best to clean up the directory tree, but we ignore errors*/ odls_bproc_delete_dir_tree(frontend); free(frontend); return ORTE_SUCCESS;}/** * Callback function for when we tell mpirun we are ready * @param status * @param peer * @param buffer * @param tag * @param cbdata */static voidodls_bproc_send_cb(int status, orte_process_name_t * peer, orte_buffer_t* buffer, int tag, void* cbdata){ OBJ_RELEASE(buffer);}/** * Create Standard I/O symlinks in the filesystem for a given proc * * Create Standard I/O symlinks in the filesystem for a given proc. * The symlinks will be placed in: * @code * /tmp/openmpi-bproc-<user>/<universe>/<jobid>-<app_context>/<proc_rank>/ * @endcode * * The symlinks will be to FIFOs for stdin and stderr. stdout will either * be to a FIFO or pty, depending on the configuration of Open MPI. * * @param proc_rank the process's rank on the node * @param jobid the jobid the proc belongs to * @param app_context the application context number within the job * @param connect_stdin if true, stdin will be connected, otherwise it will be * set to /dev/null * * @retval ORTE_SUCCESS * @retval error */static intodls_bproc_setup_stdio(orte_process_name_t *proc_name, int proc_rank, orte_jobid_t jobid, orte_std_cntr_t app_context, bool connect_stdin){ char *path_prefix, *fd_link_path = NULL; int rc = ORTE_SUCCESS, fd;#if defined(HAVE_OPENPTY) && (OMPI_ENABLE_PTY_SUPPORT != 0) int amaster, aslave; char pty_name[256]; struct termios term_attrs;#endif path_prefix = odls_bproc_get_base_dir_name(proc_rank, jobid, (size_t)app_context); if (NULL == path_prefix) { rc = ORTE_ERROR; ORTE_ERROR_LOG(rc); goto cleanup; } /* check for existence and access, or create it */ if (ORTE_SUCCESS != (rc = odls_bproc_make_dir(path_prefix))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* setup the stdin FIFO. Always use a fifo for the same reason we always use a pipe in the iof_setup code -- don't want to flush onto the floor during close */ fd_link_path = opal_os_path( false, path_prefix, "0", NULL ); if (NULL == fd_link_path) { rc = ORTE_ERROR; ORTE_ERROR_LOG(rc); goto cleanup; } if (connect_stdin) { if (0 != mkfifo(fd_link_path, S_IRWXU)) { perror("odls_bproc mkfifo failed"); rc = ORTE_ERROR; ORTE_ERROR_LOG(rc); goto cleanup; } fd = open(fd_link_path, O_RDWR); if (-1 == fd) { perror("odls_bproc open failed"); rc = ORTE_ERROR; ORTE_ERROR_LOG(rc); goto cleanup; } orte_iof.iof_publish(proc_name, ORTE_IOF_SINK, ORTE_IOF_STDIN, fd); } else { if(0 != symlink("/dev/null", fd_link_path)) { perror("odls_bproc could not create symlink"); rc = ORTE_ERROR; ORTE_ERROR_LOG(rc); goto cleanup; } } free(fd_link_path); fd_link_path = NULL; /* setup the stdout PTY / FIFO */ fd_link_path = opal_os_path( false, path_prefix, "1", NULL ); if (NULL == fd_link_path) { rc = ORTE_ERROR; ORTE_ERROR_LOG(rc); goto cleanup; }#if defined(HAVE_OPENPTY) && (OMPI_ENABLE_PTY_SUPPORT != 0) if (0 != openpty(&amaster, &aslave, pty_name, NULL, NULL)) { opal_output(0, "odls_bproc: openpty failed, using pipes instead"); goto stdout_fifo_setup; } if (0 != symlink(pty_name, fd_link_path)) { rc = ORTE_ERROR; ORTE_ERROR_LOG(rc); goto cleanup;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -