📄 totalview.c
字号:
/* Can we find argv[0] in the path? */ getcwd(cwd, PATH_MAX); tmp = opal_path_findv((*new_argv)[0], X_OK, environ, cwd); if (NULL != tmp) { free(tmp); /* Ok, we found a good debugger. Check for some error conditions. */ /* We do not support launching a debugger that requires the -np value if the user did not specify -np on the command line. */ if (used_num_procs && 0 == orterun_globals.num_procs) { opal_show_help("help-orterun.txt", "debugger requires -np", true, (*new_argv)[0], argv[0], user_argv, (*new_argv)[0]); /* Fall through to free / fail, below */ } /* Some debuggers do not support launching MPMD */ else if (fail_single_app) { opal_show_help("help-orterun.txt", "debugger only accepts single app", true, (*new_argv)[0], (*new_argv)[0]); /* Fall through to free / fail, below */ } /* Some debuggers do not use orterun/mpirun, and therefore must have an executable to run (e.g., cannot use mpirun's app context file feature). */ else if (fail_needed_executable) { opal_show_help("help-orterun.txt", "debugger requires executable", true, (*new_argv)[0], argv[0], (*new_argv)[0], argv[0], (*new_argv)[0]); /* Fall through to free / fail, below */ } /* Otherwise, we succeeded. Return happiness. */ else { return ORTE_SUCCESS; } } /* All done -- didn't find it */ opal_argv_free(*new_argv); *new_argv = NULL; return ORTE_ERR_NOT_FOUND;}/** * Run a user-level debugger */void orte_run_debugger(char *basename, opal_cmd_line_t *cmd_line, int argc, char *argv[]){ int i, id; char **new_argv = NULL; char *value, **lines; /* Get the orte_base_debug MCA parameter and search for a debugger that can run */ id = mca_base_param_find("orte", NULL, "base_user_debugger"); if (id < 0) { opal_show_help("help-orterun.txt", "debugger-mca-param-not-found", true); exit(1); } value = NULL; mca_base_param_lookup_string(id, &value); if (NULL == value) { opal_show_help("help-orterun.txt", "debugger-orte_base_user_debugger-empty", true); exit(1); } /* Look through all the values in the MCA param */ lines = opal_argv_split(value, ':'); free(value); for (i = 0; NULL != lines[i]; ++i) { if (ORTE_SUCCESS == process(lines[i], basename, cmd_line, argc, argv, &new_argv)) { break; } } /* If we didn't find one, abort */ if (NULL == lines[i]) { opal_show_help("help-orterun.txt", "debugger-not-found", true); exit(1); } opal_argv_free(lines); /* We found one */ execvp(new_argv[0], new_argv); value = opal_argv_join(new_argv, ' '); opal_show_help("help-orterun.txt", "debugger-exec-failed", true, basename, value, new_argv[0]); free(value); opal_argv_free(new_argv); exit(1);}/** * Initialization of data structures for running under a debugger * using the MPICH/TotalView parallel debugger interface. Before the * spawn we need to check if we are being run under a TotalView-like * debugger; if so then inform applications via an MCA parameter. */void orte_totalview_init_before_spawn(void){ if (MPIR_DEBUG_SPAWNED == MPIR_being_debugged) { int value; char *s; if (orte_debug_flag) { opal_output(0, "Info: Spawned by a debugger"); } if (mca_base_param_reg_int_name("orte", "mpi_wait_for_totalview", "Whether the MPI application should wait for a debugger or not", false, false, (int)false, &value) < 0) { opal_output(0, "Error: mca_base_param_reg_int_name\n"); } /* push mca parameter into the environment (not done automatically?) */ s = mca_base_param_environ_variable("orte", "mpi_wait_for_totalview", NULL); if (ORTE_SUCCESS != opal_setenv(s, "1", true, &environ)) { opal_output(0, "Error: Can't setenv %s\n", s); } free(s); }}/** * Initialization of data structures for running under a debugger * using the MPICH/TotalView parallel debugger interface. This stage * of initialization must occur after stage2 of spawn and is invoked * via a callback. * * @param jobid The jobid returned by spawn. */void orte_totalview_init_after_spawn(orte_jobid_t jobid){ orte_job_map_t *map; opal_list_item_t *item, *item2; orte_mapped_node_t *node; orte_mapped_proc_t *proc; orte_app_context_t *appctx; orte_std_cntr_t i; int rc; if (MPIR_proctable) { /* already initialized */ return; } if (0) { /* debugging daemons <<-- needs work */ if (orte_debug_flag) { opal_output(0, "Info: Setting up debugger process table for daemons\n"); } } else { /* * Debugging applications or not being debugged. * * Either way, fill in the proc table for the application * processes in case someone attaches later. */ if (orte_debug_flag) { opal_output(0, "Info: Setting up debugger process table for applications\n"); } MPIR_debug_state = 1; /* Get the resource map for this job */ rc = orte_rmaps.get_job_map(&map, jobid); if (ORTE_SUCCESS != rc) { opal_output(0, "Error: Can't get resource map\n"); ORTE_ERROR_LOG(rc); } /* find the total number of processes in the job */ for (i=0; i < map->num_apps; i++) { MPIR_proctable_size += map->apps[i]->num_procs; } /* allocate MPIR_proctable */ MPIR_proctable = (struct MPIR_PROCDESC *) malloc(sizeof(struct MPIR_PROCDESC) * MPIR_proctable_size); if (MPIR_proctable == NULL) { opal_output(0, "Error: Out of memory\n"); OBJ_RELEASE(map); } /* initialize MPIR_proctable */ i=0; for (item = opal_list_get_first(&map->nodes); item != opal_list_get_end(&map->nodes); item = opal_list_get_next(item)) { node = (orte_mapped_node_t*)item; for (item2 = opal_list_get_first(&node->procs); item2 != opal_list_get_end(&node->procs); item2 = opal_list_get_next(item2)) { proc = (orte_mapped_proc_t*)item2; appctx = map->apps[proc->app_idx]; MPIR_proctable[i].host_name = strdup(node->nodename); if ( 0 == strncmp(appctx->app, OPAL_PATH_SEP, 1 )) { MPIR_proctable[i].executable_name = opal_os_path( false, appctx->app, NULL ); } else { MPIR_proctable[i].executable_name = opal_os_path( false, appctx->cwd, appctx->app, NULL ); } MPIR_proctable[i].pid = proc->pid; i++; } } OBJ_RELEASE(map); } if (orte_debug_flag) { dump(); } (void) MPIR_Breakpoint();}/** * Release resources associated with data structures for running under * a debugger using the MPICH/TotalView parallel debugger interface. */void orte_totalview_finalize(void){ if (MPIR_proctable) { free(MPIR_proctable); }}/** * Breakpoint function for parallel debuggers */void *MPIR_Breakpoint(void){ return NULL;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -