⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pls_tm_module.c

📁 MPI stands for the Message Passing Interface. Written by the MPI Forum (a large committee comprising
💻 C
📖 第 1 页 / 共 2 页
字号:
            mca_pls_tm_component.verbose) {            opal_output(0, "pls:tm: launching on node %s",                         node->nodename);        }                /* setup process name */        rc = orte_ns.get_proc_name_string(&name_string, name);        if (ORTE_SUCCESS != rc) {            opal_output(0, "pls:tm: unable to create process name");            return rc;        }        free(argv[proc_name_index]);        argv[proc_name_index] = strdup(name_string);            /* exec the daemon */        if (mca_pls_tm_component.debug) {            param = opal_argv_join(argv, ' ');            if (NULL != param) {                opal_output(0, "pls:tm: executing: %s", param);                free(param);            }        }                /* check for timing request - get start time if so */        if (mca_pls_tm_component.timing) {            if (0 != gettimeofday(&launchstart, NULL)) {                opal_output(0, "pls_tm: could not obtain start time");                launchstart.tv_sec = 0;                launchstart.tv_usec = 0;            }        }                rc = tm_spawn(argc, argv, env, node->launch_id, tm_task_ids + launched, tm_events + launched);        if (TM_SUCCESS != rc) {            return ORTE_ERROR;        }                if (ORTE_SUCCESS != rc) {            opal_output(0, "pls:tm: start_procs returned error %d", rc);            goto cleanup;        }        /* check for timing request - get stop time and process if so */        if (mca_pls_tm_component.timing) {            if (0 != gettimeofday(&launchstop, NULL)) {                opal_output(0, "pls_tm: could not obtain stop time");            } else {                deltat = (launchstop.tv_sec - launchstart.tv_sec)*1000000 +                         (launchstop.tv_usec - launchstart.tv_usec);                avgtime = avgtime + deltat / num_nodes;                if (deltat < mintime) {                    mintime = deltat;                    miniter = launched;                }                if (deltat > maxtime) {                    maxtime = deltat;                    maxiter = launched;                }            }        }                launched++;        ++vpid;        free(name);        /* Allow some progress to occur */        opal_event_loop(OPAL_EVLOOP_NONBLOCK);    }    if (mca_pls_tm_component.debug) {        opal_output(0, "pls:tm:launch: finished spawning orteds\n");    }    /* check for timing request - get start time for launch completion */    if (mca_pls_tm_component.timing) {        if (0 != gettimeofday(&completionstart, NULL)) {            opal_output(0, "pls_tm: could not obtain completion start time");            completionstart.tv_sec = 0;            completionstart.tv_usec = 0;        }    }        /* all done, so store the daemon info on the registry */    if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {        ORTE_ERROR_LOG(rc);    }        /* TM poll for all the spawns */    for (i = 0; i < launched; ++i) {        rc = tm_poll(TM_NULL_EVENT, &event, 1, &local_err);        if (TM_SUCCESS != rc) {            errno = local_err;            opal_output(0, "pls:tm: failed to poll for a spawned proc, return status = %d", rc);            return ORTE_ERR_IN_ERRNO;        }    }        /* check for timing request - get stop time for launch completion and report */    if (mca_pls_tm_component.timing) {        if (0 != gettimeofday(&completionstop, NULL)) {            opal_output(0, "pls_tm: could not obtain completion stop time");        } else {            deltat = (launchstop.tv_sec - launchstart.tv_sec)*1000000 +                     (launchstop.tv_usec - launchstart.tv_usec);            opal_output(0, "pls_tm: launch completion required %d usec", deltat);        }        opal_output(0, "pls_tm: Launch statistics:");        opal_output(0, "pls_tm: Average time to launch an orted: %f usec", avgtime);        opal_output(0, "pls_tm: Max time to launch an orted: %d usec at iter %d", maxtime, maxiter);        opal_output(0, "pls_tm: Min time to launch an orted: %d usec at iter %d", mintime, miniter);    }         cleanup:    OBJ_RELEASE(map);        if (connected) {        pls_tm_disconnect();    }    if (NULL != tm_events) {        free(tm_events);    }    if (NULL != tm_task_ids) {        free(tm_task_ids);    }        if (NULL != lib_base) {        free(lib_base);    }    if (NULL != bin_base) {        free(bin_base);    }    /* deconstruct the daemon list */    while (NULL != (item = opal_list_remove_first(&daemons))) {        OBJ_RELEASE(item);    }    OBJ_DESTRUCT(&daemons);    /* check for timing request - get stop time and process if so */    if (mca_pls_tm_component.timing) {        if (0 != gettimeofday(&jobstop, NULL)) {            opal_output(0, "pls_tm: could not obtain stop time");        } else {            deltat = (jobstop.tv_sec - jobstart.tv_sec)*1000000 +                     (jobstop.tv_usec - jobstart.tv_usec);            opal_output(0, "pls_tm: launch of entire job required %d usec", deltat);        }    }        if (mca_pls_tm_component.debug) {        opal_output(0, "pls:tm:launch: finished\n");    }    return rc;}static int pls_tm_terminate_job(orte_jobid_t jobid, struct timeval *timeout, opal_list_t *attrs){    int rc;    opal_list_t daemons;    opal_list_item_t *item;        /* construct the list of active daemons on this job */    OBJ_CONSTRUCT(&daemons, opal_list_t);    if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid, attrs))) {        ORTE_ERROR_LOG(rc);        goto CLEANUP;    }        /* order them to kill their local procs for this job */    if (ORTE_SUCCESS != (rc = orte_pls_base_orted_kill_local_procs(&daemons, jobid, timeout))) {        ORTE_ERROR_LOG(rc);        goto CLEANUP;    }    CLEANUP:    while (NULL != (item = opal_list_remove_first(&daemons))) {        OBJ_RELEASE(item);    }    OBJ_DESTRUCT(&daemons);    return rc;}/** * Terminate the orteds for a given job */int pls_tm_terminate_orteds(orte_jobid_t jobid, struct timeval *timeout, opal_list_t *attrs){    int rc;    opal_list_t daemons;    opal_list_item_t *item;        /* construct the list of active daemons on this job */    OBJ_CONSTRUCT(&daemons, opal_list_t);    if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid, attrs))) {        ORTE_ERROR_LOG(rc);        goto CLEANUP;    }        /* now tell them to die! */    if (ORTE_SUCCESS != (rc = orte_pls_base_orted_exit(&daemons, timeout))) {        ORTE_ERROR_LOG(rc);    }    CLEANUP:    while (NULL != (item = opal_list_remove_first(&daemons))) {        OBJ_RELEASE(item);    }    OBJ_DESTRUCT(&daemons);    return rc;}/* * TM can't kill individual processes -- PBS will kill the entire job */static int pls_tm_terminate_proc(const orte_process_name_t *name){    if (mca_pls_tm_component.debug) {        opal_output(0, "pls:tm:terminate_proc: not supported");    }    return ORTE_ERR_NOT_SUPPORTED;}static int pls_tm_signal_job(orte_jobid_t jobid, int32_t signal, opal_list_t *attrs){    int rc;    opal_list_t daemons;    opal_list_item_t *item;        /* construct the list of active daemons on this job */    OBJ_CONSTRUCT(&daemons, opal_list_t);    if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid, attrs))) {        ORTE_ERROR_LOG(rc);        OBJ_DESTRUCT(&daemons);        return rc;    }        /* order them to pass this signal to their local procs */    if (ORTE_SUCCESS != (rc = orte_pls_base_orted_signal_local_procs(&daemons, signal))) {        ORTE_ERROR_LOG(rc);    }        while (NULL != (item = opal_list_remove_first(&daemons))) {        OBJ_RELEASE(item);    }    OBJ_DESTRUCT(&daemons);    return rc;}static int pls_tm_signal_proc(const orte_process_name_t *name, int32_t signal){    return ORTE_ERR_NOT_IMPLEMENTED;}/** * Cancel an operation involving comm to an orted */static int pls_tm_cancel_operation(void){    int rc;    if (ORTE_SUCCESS != (rc = orte_pls_base_orted_cancel_operation())) {        ORTE_ERROR_LOG(rc);    }        return rc;}/* * Free stuff */static int pls_tm_finalize(void){    int rc;        /* cleanup any pending recvs */    if (ORTE_SUCCESS != (rc = orte_pls_base_comm_stop())) {        ORTE_ERROR_LOG(rc);    }    return ORTE_SUCCESS;}static int pls_tm_connect(void){    int ret;    struct tm_roots tm_root;    int count, progress;    /* try a couple times to connect - might get busy signals every       now and then */    for (count = 0 ; count < 10; ++count) {        ret = tm_init(NULL, &tm_root);        if (TM_SUCCESS == ret) {            return ORTE_SUCCESS;        }        for (progress = 0 ; progress < 10 ; ++progress) {            opal_progress();#if HAVE_SCHED_YIELD            sched_yield();#endif        }    }    return ORTE_ERR_RESOURCE_BUSY;}static int pls_tm_disconnect(void){    tm_finalize();    return ORTE_SUCCESS;}static int pls_tm_check_path(char *exe, char **env){    static int size = 256;    int i;    char *file;    char *cwd;    char *path = NULL;    /* Do we want this check at all? */    if (!mca_pls_tm_component.want_path_check) {        return ORTE_SUCCESS;    }    /* Find the path in the supplied environment */    for (i = 0; NULL != env[i]; ++i) {        if (0 == strncmp("PATH=", env[i], 5)) {            path = strdup(env[i]);            break;        }    }    if (NULL == env[i]) {        path = strdup("NULL");    }    /* Check the already-successful paths (i.e., be a little       friendlier to the filesystem -- if we find the executable       successfully, save it) */    for (i = 0; NULL != mca_pls_tm_component.checked_paths &&             NULL != mca_pls_tm_component.checked_paths[i]; ++i) {        if (0 == strcmp(path, mca_pls_tm_component.checked_paths[i])) {            return ORTE_SUCCESS;        }    }    /* We didn't already find it, so check now.  First, get the cwd. */    do {        cwd = malloc(size);        if (NULL == cwd) {            return ORTE_ERR_OUT_OF_RESOURCE;        }        if (NULL == getcwd(cwd, size)) {            free(cwd);            if (ERANGE == errno) {                size *= 2;            } else {                return ORTE_ERR_IN_ERRNO;            }        } else {            break;        }    } while (1);    /* Now do the search */    file = opal_path_findv(exe, X_OK, env, cwd);    free(cwd);    if (NULL == file) {        free(path);        return ORTE_ERR_NOT_FOUND;    }    if (mca_pls_tm_component.debug) {        opal_output(0, "pls:tm: found %s", file);    }    free(file);    /* Success -- so cache it */    opal_argv_append_nosize(&mca_pls_tm_component.checked_paths, path);    /* All done */    free(path);    return ORTE_SUCCESS;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -