mpptest.c

来自「MPICH是MPI的重要研究,提供了一系列的接口函数,为并行计算的实现提供了编程」· C语言代码 · 共 877 行 · 第 1/2 页
877 行
/*D   mpptest - Measure the communications performance of a message-passing system   Details:   The greatest challange in performing these experiments in making the results   reproducible.  On many (most?) systems, there are various events that    perturb timings; these can occur on the scale of 10's of milliseconds.    To attempt to remove the effect of these events, we make multiple tests,   taking the minimum of many tests, each of which gives an average time.  To   reduce the effect of transient perturbations, the entire sequence of tests   is run several times, taking the best (fastest) time on each test.  Finally,   a post-processing step retests any anomolies, defined as single peaks peaks   that are significantly greater than the surrounding times (using a locally   linear-fit model).D*//*   This code is a major re-write of an older version that was generated   automatically from an older Chameleon program.  Previous versions  worked with a wide variety of message-passing systems.*/#include <stdio.h>#include <math.h>#ifndef HUGE_VAL#define HUGE_VAL 10.0e38#endif#include "mpi.h"#include "mpptest.h"#include "getopts.h"int __NUMNODES, __MYPROCID;#if HAVE_STDLIB_H#include <stdlib.h>#endif#ifndef DEFAULT_REPS#define DEFAULT_REPS 50#endif/* Forward declarations */void PrintHelp( char *[] );/*    This is a simple program to test the communications performance of    a parallel machine.   */    /* These statics (globals) are used to estimate the parameters in the   basic (s + rn) complexity model   Sum of lengths is stored as a double to give 53 bit integer on   systems where sizeof(int) == 4. */static double sumtime = 0.0, sumlentime = 0.0;static double sumlen  = 0.0,  sumlen2 = 0.0;static double sumtime2 = 0.0;static int    ntest   = 0;/* If doinfo is 0, don't write out the various text lines */static int    doinfo = 1;/* Scaling of time and rate */static double TimeScale = 1.0;static double RateScale = 1.0;/* This is the number of times to run a test, taking as time the minimum   achieve timing.  This uses an adaptive approach that also stops when   minThreshTest values are within a few percent of the current minimum */static int    minreps       = 30;static double repsThresh    = 0.05;char   protocol_name[256];/*    We would also like to adaptively modify the number of repetitions to    meet a time estimate (later, we'd like to meet a statistical estimate).      One relatively easy way to do this is to use a linear estimate (either   extrapolation or interpolation) based on 2 other computations.   That is, if the goal time is T and the measured tuples (time,reps,len)   are, the formula for the local time is s + r n, where   r = (time2/reps2 - time1/reps1) / (len2 - len1)   s = time1/reps1 - r * len1   Then the appropriate number of repititions to use is   Tgoal / (s + r * len) = reps */static double Tgoal = 1.0;/* If less than Tgoalmin is spent, increase the number of repititions */static double TgoalMin = 0.5;/* This structure allows a collection of arbitray sizes to be specified */#define MAX_SIZE_LIST 256static int sizelist[MAX_SIZE_LIST];static int nsizes = 0;/* We wish to control the TOTAL amount of time that the test takes.   We could do this with gettimeofday or clock or something, but fortunately   the MPI timer is an elapsed timer */static double max_run_time = 15.0*60.0;static double start_time = 0.0;/* All test data is contained in an array of values.  Because we may    adaptively choose the message lengths, provision is made to maintain the   list elements in an array, and for many processing tasks (output, smoothing)   only the list version is used. *//* These are used to contain results for a single test */typedef struct _TwinResults {/*    double len, t, mean_time, rate; */    double t,               /* min of the observations (per loop) */	   max_time,        /* max of the observations (per loop) */           sum_time;        /* sum of all of the observations */    int    len;             /* length of the message for this test */    int    ntests;          /* number of observations */    int    reps;    struct _TwinResults *next, *prev;    } TwinResults;TwinResults *AllocResultsArray( int );void SetResultsForStrided( int first, int last, int incr, TwinResults *twin );void SetResultsForList( int sizelist[], int nsizes, TwinResults *twin );void SetRepsForList( TwinResults *, int );int RunTest( TwinResults *, double (*)(int,int,void *), void *, double );void RunTestList( TwinResults *, double (*)(int,int,void*), void* );int SmoothList( TwinResults *, double (*)(int,int,void *), void * );int RefineTestList( TwinResults *, double (*)(int,int,void *),void *,		    int, double );void OutputTestList( TwinResults *, void *, int, int, int );/* Initialize the results array of a given list of data *//* This structure is used to provice information for the automatic    message-length routines */typedef struct {    double (*f)( int, int, void * );    int    reps, proc1, proc2;    void *msgctx;    /* Here is where we should put "recent" timing data used to estimate       the values of reps */    double t1, t2;    int    len1, len2;    } TwinTest;int main( int argc, char *argv[] ){    int    dist;    double (* BasicCommTest)( int, int, void * );    void *MsgCtx = 0; /* This is the context of the message-passing operation */    void *outctx;    void (*ChangeDist)( int, PairData ) = 0;    int  reps,proc1,proc2, distance_flag,distance;    int  first,last,incr, svals[3];    int      autosize = 0, autodx;    double   autorel;    char     units[32];         /* Name of units of length */    MPI_Init( &argc, &argv );    MPI_Comm_size( MPI_COMM_WORLD, &__NUMNODES );    MPI_Comm_rank( MPI_COMM_WORLD, &__MYPROCID );    strcpy( protocol_name, "blocking" );    strcpy( units, "(bytes)" );    if (SYArgHasName( &argc, argv, 1, "-help" )) {	if (__MYPROCID == 0) PrintHelp( argv );	MPI_Finalize();	return 0;    }    if (__NUMNODES < 2) {	fprintf( stderr, "Must run mpptest with at least 2 nodes\n" );	return 1;    }/* Get the output context */    outctx = SetupGraph( &argc, argv );    if (SYArgHasName( &argc, argv, 1, "-noinfo" ))    doinfo    = 0;    reps          = DEFAULT_REPS;    proc1         = 0;    proc2         = __NUMNODES-1;    distance_flag = 0;    svals[0]      = 0;    svals[1]      = 1024;    svals[2]      = 32;    if (SYArgHasName( &argc, argv, 1, "-distance" ))  distance_flag++;    SYArgGetIntVec( &argc, argv, 1, "-size", 3, svals );    nsizes = SYArgGetIntList( &argc, argv, 1, "-sizelist", MAX_SIZE_LIST, 			      sizelist );    if (SYArgHasName( &argc, argv, 1, "-logscale" )) {	/* Use the sizelist field to specify a collection of power of	   two sizes.  This is a temporary hack until we have something	   better.  You can use the -size argument to set min and max values	   (the stride is ignored) */	int k;	nsizes = 0;	if (svals[0] == 0) {	    sizelist[nsizes++] = 0;	    k = 4;	}	else {	    k = svals[0];	}	while( k <= svals[1] && nsizes < MAX_SIZE_LIST ) {	    sizelist[nsizes++] = k;	    k *= 2;	}	/* Need to tell graphics package to use log/log scale */	DataScale( outctx, 1 );    }    SYArgGetInt(    &argc, argv, 1, "-reps", &reps );    if (SYArgGetDouble( &argc, argv, 1, "-tgoal", &Tgoal )) {	if (TgoalMin > 0.1 * Tgoal) TgoalMin = 0.1 * Tgoal;    }    SYArgGetDouble( &argc, argv, 1, "-rthresh", &repsThresh );    SYArgGetInt( &argc, argv, 1, "-sample_reps", &minreps );    SYArgGetDouble( &argc, argv, 1, "-max_run_time", &max_run_time );    autosize = SYArgHasName( &argc, argv, 1, "-auto" );    if (autosize) {	autodx = 4;	SYArgGetInt( &argc, argv, 1, "-autodx", &autodx );	autorel = 0.02;	SYArgGetDouble( &argc, argv, 1, "-autorel", &autorel );    }/* Pick the general test based on the presence of an -gop, -overlap, -bisect   or no arg */    SetPattern( &argc, argv );    if (SYArgHasName( &argc, argv, 1, "-gop")) {	/* we need to fix this cast eventually */	BasicCommTest = (double (*)(int,int,void*))			 GetGOPFunction( &argc, argv, protocol_name, units );	MsgCtx = GOPInit( &argc, argv );    }    else if (SYArgHasName( &argc, argv, 1, "-bisect" )) {	BasicCommTest = GetPairFunction( &argc, argv, protocol_name );	dist = 1;	SYArgGetInt( &argc, argv, 1, "-bisectdist", &dist );	MsgCtx     = BisectInit( dist );	ChangeDist = BisectChange;	strcat( protocol_name, "-bisect" );	if (SYArgHasName( &argc, argv, 1, "-debug" ))	    PrintPairInfo( MsgCtx );	TimeScale = 0.5;	RateScale = (double) __NUMNODES; /* * (2 * 0.5) */    }    else if (SYArgHasName( &argc, argv, 1, "-overlap" )) {	int MsgSize;	char cbuf[32];	if (SYArgHasName( &argc, argv, 1, "-sync" )) {	    BasicCommTest = round_trip_b_overlap;	    strcpy( protocol_name, "blocking" );	}	else {  /* Assume -async */	    BasicCommTest = round_trip_nb_overlap;	    strcpy( protocol_name, "nonblocking" );	}	MsgSize = 0;	SYArgGetInt( &argc, argv, 1, "-overlapmsgsize", &MsgSize );	MsgCtx  = OverlapInit( proc1, proc2, MsgSize );	/* Compute floating point lengths if requested */	if (SYArgHasName( &argc, argv, 1, "-overlapauto")) {	    OverlapSizes( MsgSize >= 0 ? MsgSize : 0, svals, MsgCtx );	}	strcat( protocol_name, "-overlap" );	if (MsgSize >= 0) {	    sprintf( cbuf, "-%d bytes", MsgSize );	}	else {	    strcpy( cbuf, "-no msgs" );	}	strcat( protocol_name, cbuf );	TimeScale = 0.5;	RateScale = 2.0;    }    else if (SYArgHasName( &argc, argv, 1, "-memcpy" )) {	BasicCommTest = memcpy_rate;	MsgCtx     = 0;	ChangeDist = 0;	strcpy( protocol_name, "memcpy" );	TimeScale = 1.0;	RateScale = 1.0;    }    else {	/* Pair by default */	BasicCommTest = GetPairFunction( &argc, argv, protocol_name );	MsgCtx = PairInit( proc1, proc2 );	ChangeDist = PairChange;	if (SYArgHasName( &argc, argv, 1, "-debug" ))	    PrintPairInfo( MsgCtx );	TimeScale = 0.5;	RateScale = 2.0;    }    first = svals[0];    last  = svals[1];    incr  = svals[2];    if (incr == 0) incr = 1;/*   Finally, we are ready to run the tests.  We want to report times as   the times for a single link, and rates as the aggregate rate.   To do this, we need to know how to scale both the times and the rates.   Times: scaled by the number of one-way trips measured by the base testing   code.  This is often 2 trips, or a scaling of 1/2.   Rates: scaled by the number of simultaneous participants (as well as   the scaling in times).  Compute the rates based on the updated time,    then multiply by the number of participants.  Note that, for a single   sender, time and rate are inversely proportional (that is, if TimeScale    is 0.5, RateScale is 2.0). */    start_time = MPI_Wtime();/* If the distance flag is set, we look at a range of distances.  Otherwise,   we just use the first and last processor */    if (doinfo && __MYPROCID == 0) {	HeaderGraph( outctx, protocol_name, (char *)0, units );    }    if(distance_flag) {	for(distance=1;distance<GetMaxIndex();distance++) {	    proc2 = GetNeighbor( 0, distance, 0 );	    if (ChangeDist)		(*ChangeDist)( distance, MsgCtx );	    time_function(reps,first,last,incr,proc1,proc2,			  BasicCommTest,outctx,			  autosize,autodx,autorel,MsgCtx);	    ClearTimes();	}    }    else{	time_function(reps,first,last,incr,proc1,proc2,BasicCommTest,outctx,		      autosize,autodx,autorel,MsgCtx);    }/*    Generate the "end of page".  This allows multiple distance graphs on the   same plot  */    if (doinfo && __MYPROCID == 0) 	EndPageGraph( outctx );    MPI_Finalize();    return 0;}/*    This is the basic routine for timing an operation.   Input Parameters:.  reps - Basic number of times to run basic test (see below).  first,last,incr - length of data is first, first+incr, ... last         (if last != first + k * incr, then actual last value is the          value of first + k * incr that is <= last and such that          first + (k+1) * incr > last, just as you'd expect).  proc1,proc2 - processors to participate in communication.  Note that         all processors must call because we use global operations to          manage some operations, and we want to avoid using process-subset         operations (supported in Chameleon) to simplify porting this code.  CommTest  -  Routine to call to run a basic test.  This routine returns          the time that the test took in seconds..  outctx -  Pointer to output context.  autosize - If true, the actual sizes are picked automatically.  That is         instead of using first, first + incr, ... , the routine choses values         of len such that first <= len <= last and other properties, given          by autodx and autorel, are satisfied..  autodx - Parameter for TST1dauto, used to set minimum distance between          test sizes.  4 (for 4 bytes) is good for small values of last.  autorel - Relative error tolerance used by TST1dauto in determining the          message sizes used..  msgctx - Context to pass through to operation routine */void time_function(reps,first,last,incr,proc1,proc2,CommTest,outctx,		   autosize,autodx,autorel,msgctx)int    reps,first,last,incr,proc1,proc2,autosize,autodx;double autorel;double (* CommTest)( int, int, void *);void      *outctx;void *msgctx;{    int    distance,myproc;    double s, r;    myproc   = __MYPROCID;    distance = ((proc1)<(proc2)?(proc2)-(proc1):(proc1)-(proc2));    /* Run test, using either the simple direct test or the automatic length     test */    ntest = 0;    if (autosize) {	TwinResults *twin;	int k;	twin = AllocResultsArray( 1024 );	SetResultsForStrided( first, last, (last-first)/8, twin );	/* Run tests */	SetRepsForList( twin, reps );	for (k=0; k<minreps; k++) {	    int kk;	    for (kk=0; kk<5; kk++)		RunTestList( twin, CommTest, msgctx );	    RefineTestList( twin, CommTest, msgctx, autodx, autorel );	}	for (k=1; k<5; k++) {	    if (!SmoothList( twin, CommTest, msgctx )) break;	}	/* Final output */	if (myproc == 0) 	    OutputTestList( twin, outctx, proc1, proc2, distance );    }    else {	TwinResults *twin;	int k;	if (nsizes) {	    twin = AllocResultsArray( nsizes );	    SetResultsForList( sizelist, nsizes, twin );	}	else {	    nsizes = 1 + (last - first)/incr;	    twin = AllocResultsArray( nsizes );	    SetResultsForStrided( first, last, incr, twin );	}	/* Run tests */	for (k=1; k<minreps; k++) {	    SetRepsForList( twin, reps );	    RunTestList( twin, CommTest, msgctx );
mpptest.c - 源码说明

本页面展示了「MPICH是MPI的重要研究,提供了一系列的接口函数,为并行计算的实现提供了编程环境.」中的 mpptest.c 源码文件，采用 C语言编程语言编写，共 877 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与MPICH相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?