⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 clog_sync.c

📁 fortran并行计算包
💻 C
📖 第 1 页 / 共 2 页
字号:
/*   (C) 2001 by Argonne National Laboratory.       See COPYRIGHT in top-level directory.*/#include "mpe_logging_conf.h"#if defined( STDC_HEADERS ) || defined( HAVE_STDIO_H )#include <stdio.h>#endif#if defined( STDC_HEADERS ) || defined( HAVE_STDLIB_H )#include <stdlib.h>#endif#if defined( STDC_HEADERS ) || defined( HAVE_STRING_H )#include <string.h>#endif#if !defined( CLOG_NOMPI )#include "mpi.h"#else#include "mpi_null.h"#endif /* Endof if !defined( CLOG_NOMPI ) */#include "clog_const.h"#include "clog_mem.h"#include "clog_timer.h"#include "clog_util.h"#include "clog_sync.h"CLOG_Sync_t *CLOG_Sync_create( int world_size, int world_rank ){    CLOG_Sync_t  *sync;    sync = (CLOG_Sync_t *) MALLOC( sizeof(CLOG_Sync_t) );    if ( sync == NULL ) {        fprintf( stderr, __FILE__":CLOG_Sync_create() - \n"                         "\t""MALLOC() fails for CLOG_Sync_t!\n" );        fflush( stderr );        return NULL;    }    sync->is_ok_to_sync  = CLOG_BOOL_FALSE;    sync->root           = 0;    sync->frequency      = CLOG_SYNC_FREQ_COUNT;    sync->algorithm_ID   = CLOG_SYNC_AGRM_DEFAULT;    sync->world_size     = world_size;    sync->world_rank     = world_rank;    return sync;}void CLOG_Sync_free( CLOG_Sync_t **sync_handle ){    CLOG_Sync_t  *sync;    sync = *sync_handle;    if ( sync != NULL ) {        FREE( sync );    }    *sync_handle  = NULL;}void CLOG_Sync_init( CLOG_Sync_t *sync ){    char         *env_sync_agrm;    char         *env_sync_freq;    int           local_is_ok_to_sync;    if ( CLOG_Util_is_MPIWtime_synchronized() == CLOG_BOOL_TRUE )        local_is_ok_to_sync = CLOG_BOOL_FALSE;    else        local_is_ok_to_sync = CLOG_BOOL_TRUE;    /* If MPE_CLOCKS_SYNC is not set, use the value of MPI_WTIME_IS_GLOBAL  */    local_is_ok_to_sync = CLOG_Util_getenvbool( "MPE_CLOCKS_SYNC",                                                local_is_ok_to_sync );    PMPI_Allreduce( &local_is_ok_to_sync, &(sync->is_ok_to_sync),                    1, MPI_INT, MPI_MAX, MPI_COMM_WORLD );    env_sync_freq = (char *) getenv( "MPE_SYNC_FREQUENCY" );    if ( env_sync_freq != NULL ) {        sync->frequency = atoi( env_sync_freq );    }    PMPI_Bcast( &(sync->frequency), 1, MPI_INT,                sync->root, MPI_COMM_WORLD );    env_sync_agrm = (char *) getenv( "MPE_SYNC_ALGORITHM" );    if ( env_sync_agrm != NULL ) {        if (  strcmp( env_sync_agrm, "DEFAULT" ) == 0           || strcmp( env_sync_agrm, "default" ) == 0 )            sync->algorithm_ID = CLOG_SYNC_AGRM_DEFAULT;         if (  strcmp( env_sync_agrm, "SEQ" ) == 0           || strcmp( env_sync_agrm, "seq" ) == 0 )            sync->algorithm_ID = CLOG_SYNC_AGRM_SEQ;         if (  strcmp( env_sync_agrm, "BITREE" ) == 0           || strcmp( env_sync_agrm, "bitree" ) == 0 )            sync->algorithm_ID = CLOG_SYNC_AGRM_BITREE;         if (  strcmp( env_sync_agrm, "ALTNGBR" ) == 0           || strcmp( env_sync_agrm, "altngbr" ) == 0 )            sync->algorithm_ID = CLOG_SYNC_AGRM_ALTNGBR;     }    PMPI_Bcast( &(sync->algorithm_ID), 1, MPI_INT,                sync->root, MPI_COMM_WORLD );}static int CLOG_Sync_ring_rank( int world_size, int root, int adjusting_rank ){    if ( adjusting_rank >= root )        return adjusting_rank - root;    else        return adjusting_rank - root + world_size;}/*    A Sequential and non-scalable, O(N), algorithm.    The root process serially synchronizes with each slave, using the    first algorithm similar to what quoted in Gropp's "Scalable clock    synchronization on distributed processors without a common clock".    The array is calculated on the root and the elements are then scattered    back to each processes based on rank, local_time_offset.*/static CLOG_Time_t CLOG_Sync_run_seq( CLOG_Sync_t *sync ){    int           ii, idx;    CLOG_Time_t   dummytime;    CLOG_Time_t  *gpofst_pairs;    CLOG_Time_t   sync_time_start, sync_time_final, sync_time_esti;    CLOG_Time_t   bestgap, bestshift;    MPI_Request  *sync_reqs;    MPI_Status    status;    dummytime    = 0.0;    sync_reqs    = NULL;    gpofst_pairs = NULL;    if ( sync->world_rank == sync->root ) {        sync_reqs    = (MPI_Request *) MALLOC( sync->world_size                                             * sizeof(MPI_Request) );        gpofst_pairs = (CLOG_Time_t *) MALLOC( 2 * sync->world_size                                             * sizeof(CLOG_Time_t) );    }    PMPI_Barrier( MPI_COMM_WORLD );    PMPI_Barrier( MPI_COMM_WORLD ); /* approximate common starting point */    if ( sync->world_rank == sync->root ) {        /* I am the root node, but not necessarily rank 0 */        for ( ii = 0; ii < sync->world_size; ii++ ) {            if ( ii != sync->root ) {                bestgap    = CLOG_SYNC_LARGEST_GAP;                bestshift  = 0.0;                PMPI_Send( &dummytime, 0, CLOG_TIME_MPI_TYPE, ii,                           CLOG_SYNC_MASTER_READY, MPI_COMM_WORLD );                PMPI_Recv( &dummytime, 0, CLOG_TIME_MPI_TYPE, ii,                           CLOG_SYNC_SLAVE_READY, MPI_COMM_WORLD,                           &status );                for ( idx = 0; idx < sync->frequency; idx++ ) {                    sync_time_start = CLOG_Timer_get();                    PMPI_Send( &dummytime, 1, CLOG_TIME_MPI_TYPE, ii,                               CLOG_SYNC_TIME_QUERY, MPI_COMM_WORLD );                    PMPI_Recv( &sync_time_esti, 1, CLOG_TIME_MPI_TYPE, ii,                               CLOG_SYNC_TIME_ANSWER, MPI_COMM_WORLD,                               &status );                    sync_time_final = CLOG_Timer_get();                    if ( (sync_time_final - sync_time_start) < bestgap ) {                        bestgap   = sync_time_final - sync_time_start;                        bestshift = 0.5 * (sync_time_final + sync_time_start)                                  - sync_time_esti;                    }                }                gpofst_pairs[2*ii]   = bestgap;                gpofst_pairs[2*ii+1] = bestshift;            }            else {   /* ii = root */                gpofst_pairs[2*ii]   = 0.0;                gpofst_pairs[2*ii+1] = 0.0;            }        }    }    else {                        /* not the root */        PMPI_Recv( &dummytime, 0, CLOG_TIME_MPI_TYPE, sync->root,                   CLOG_SYNC_MASTER_READY, MPI_COMM_WORLD, &status );        PMPI_Send( &dummytime, 0, CLOG_TIME_MPI_TYPE, sync->root,                   CLOG_SYNC_SLAVE_READY, MPI_COMM_WORLD );        for ( idx = 0; idx < sync->frequency; idx++ ) {            PMPI_Recv( &dummytime, 1, CLOG_TIME_MPI_TYPE, sync->root,                       CLOG_SYNC_TIME_QUERY, MPI_COMM_WORLD, &status );            sync_time_esti = CLOG_Timer_get();            PMPI_Send( &sync_time_esti, 1, CLOG_TIME_MPI_TYPE, sync->root,                       CLOG_SYNC_TIME_ANSWER, MPI_COMM_WORLD );        }    }    PMPI_Scatter( gpofst_pairs, 2, CLOG_TIME_MPI_TYPE,                  sync->best_gpofst, 2, CLOG_TIME_MPI_TYPE,                  sync->root, MPI_COMM_WORLD );    if ( sync->world_rank == sync->root ) {        FREE( gpofst_pairs );        FREE( sync_reqs );    }    return sync->best_gpofst[1];}/*    A scalable, O(log2(N)), algorithm.    A Binomial-Tree algorithm that does O(log2(N)) steps.    A scalable but not perfectly parallel algorithm but produces    reasonable accuracy compared to O(N) algorithm.*/static CLOG_Time_t CLOG_Sync_run_bitree( CLOG_Sync_t *sync ){    int          prev_adj_rank, next_adj_rank, local_adj_rank;    int          rank_sep, rank_quot, rank_rem;    int          gpofst_size, gpofst_idx, idx, ii;    int          send_cnt, recv_cnt, max_cnt;    CLOG_Time_t *gpofst_pairs, *gpofst_ptr;    CLOG_Time_t  best_gp, best_ofst, dummytime;    CLOG_Time_t  sync_time_start, sync_time_final, sync_time_esti;    MPI_Status   status;    PMPI_Barrier( MPI_COMM_WORLD );    PMPI_Barrier( MPI_COMM_WORLD ); /* approximate common starting point */    local_adj_rank  = CLOG_Sync_ring_rank( sync->world_size, sync->root,                                           sync->world_rank );    dummytime     = 0.0;    gpofst_size   = 0;    gpofst_pairs  = NULL;    if ( local_adj_rank % 2 == 0 ) {        if ( local_adj_rank == 0 ) /* if ( sync->world_rank == sync->root ) */            gpofst_size  = 2 * sync->world_size;        else            gpofst_size  = sync->world_size;        gpofst_pairs = (CLOG_Time_t *) MALLOC( gpofst_size                                             * sizeof(CLOG_Time_t) );    }    /* Initialize all the gap variables in gpofst_pairs[ even index ] */    for ( ii = 0; ii < gpofst_size ; ii+=2 ) {        gpofst_pairs[ ii ] = CLOG_SYNC_LARGEST_GAP;    }    for ( idx = 0; idx < sync->frequency; idx++ ) {        /* Setup variable for Binomial Tree algorithm */        rank_sep   = 1;        rank_quot  = local_adj_rank >> 1; /* rank_quot  = local_adj_rank / 2; */        rank_rem   = local_adj_rank &  1; /* rank_rem   = local_adj_rank % 2; */        gpofst_idx = 0;  /* point at next available index in gpofst_pairs[] */        while ( rank_sep < sync->world_size ) {            if ( rank_rem == 0 ) {                next_adj_rank  = CLOG_Sync_ring_rank( sync->world_size,                                                      sync->root,                                                      sync->world_rank                                                    + rank_sep );                if ( next_adj_rank < sync->world_size ) {                    /* Send&Recv ready message */                    PMPI_Send( &dummytime, 0, CLOG_TIME_MPI_TYPE,                               next_adj_rank, CLOG_SYNC_MASTER_READY,                               MPI_COMM_WORLD );                       /*                          Set max_cnt to maximum possible receive count                          as if sync->world_size == power_of_2.                          - gpofst_pairs[ gpofst_idx .. gpofst_idx+1 ] are for                            DIRECT gpofst exchange with the remote process.                          - gpofst_pairs[ gpofst_idx+2 ..                                          gpofst_idx+2 + recv_cnt-1 ] are for                            INDIRECT gpofst exchanges with the remote process,                            i.e. other gpofst exchanges between the remote and                            other processes(not the local and remote processes).                       */                    max_cnt  = 2 * ( rank_sep-1 );                    if ( max_cnt > 0 )                        gpofst_ptr = &( gpofst_pairs[gpofst_idx+2] );                    else                        gpofst_ptr = &dummytime;                    PMPI_Recv( gpofst_ptr, max_cnt, CLOG_TIME_MPI_TYPE,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -