📄 mpptest.c
字号:
/*D mpptest - Measure the communications performance of a message-passing system Details: The greatest challange in performing these experiments in making the results reproducible. On many (most?) systems, there are various events that perturb timings; these can occur on the scale of 10's of milliseconds. To attempt to remove the effect of these events, we make multiple tests, taking the minimum of many tests, each of which gives an average time. To reduce the effect of transient perturbations, the entire sequence of tests is run several times, taking the best (fastest) time on each test. Finally, a post-processing step retests any anomolies, defined as single peaks peaks that are significantly greater than the surrounding times (using a locally linear-fit model).D*//* This code is a major re-write of an older version that was generated automatically from an older Chameleon program. Previous versions worked with a wide variety of message-passing systems.*/#include <stdio.h>#include <math.h>#ifndef HUGE_VAL#define HUGE_VAL 10.0e38#endif#include "mpi.h"#include "mpptest.h"#include "getopts.h"int __NUMNODES, __MYPROCID;#if HAVE_STDLIB_H#include <stdlib.h>#endif#ifndef DEFAULT_REPS#define DEFAULT_REPS 50#endif/* Forward declarations */void PrintHelp( char *[] );/* This is a simple program to test the communications performance of a parallel machine. */ /* These statics (globals) are used to estimate the parameters in the basic (s + rn) complexity model Sum of lengths is stored as a double to give 53 bit integer on systems where sizeof(int) == 4. */static double sumtime = 0.0, sumlentime = 0.0;static double sumlen = 0.0, sumlen2 = 0.0;static double sumtime2 = 0.0;static int ntest = 0;/* If doinfo is 0, don't write out the various text lines */static int doinfo = 1;/* Scaling of time and rate */static double TimeScale = 1.0;static double RateScale = 1.0;/* This is the number of times to run a test, taking as time the minimum achieve timing. This uses an adaptive approach that also stops when minThreshTest values are within a few percent of the current minimum */static int minreps = 30;static double repsThresh = 0.05;char protocol_name[256];/* We would also like to adaptively modify the number of repetitions to meet a time estimate (later, we'd like to meet a statistical estimate). One relatively easy way to do this is to use a linear estimate (either extrapolation or interpolation) based on 2 other computations. That is, if the goal time is T and the measured tuples (time,reps,len) are, the formula for the local time is s + r n, where r = (time2/reps2 - time1/reps1) / (len2 - len1) s = time1/reps1 - r * len1 Then the appropriate number of repititions to use is Tgoal / (s + r * len) = reps */static double Tgoal = 1.0;/* If less than Tgoalmin is spent, increase the number of repititions */static double TgoalMin = 0.5;/* This structure allows a collection of arbitray sizes to be specified */#define MAX_SIZE_LIST 256static int sizelist[MAX_SIZE_LIST];static int nsizes = 0;/* We wish to control the TOTAL amount of time that the test takes. We could do this with gettimeofday or clock or something, but fortunately the MPI timer is an elapsed timer */static double max_run_time = 15.0*60.0;static double start_time = 0.0;/* All test data is contained in an array of values. Because we may adaptively choose the message lengths, provision is made to maintain the list elements in an array, and for many processing tasks (output, smoothing) only the list version is used. *//* These are used to contain results for a single test */typedef struct _TwinResults {/* double len, t, mean_time, rate; */ double t, /* min of the observations (per loop) */ max_time, /* max of the observations (per loop) */ sum_time; /* sum of all of the observations */ int len; /* length of the message for this test */ int ntests; /* number of observations */ int reps; struct _TwinResults *next, *prev; } TwinResults;TwinResults *AllocResultsArray( int );void SetResultsForStrided( int first, int last, int incr, TwinResults *twin );void SetResultsForList( int sizelist[], int nsizes, TwinResults *twin );void SetRepsForList( TwinResults *, int );int RunTest( TwinResults *, double (*)(int,int,void *), void *, double );void RunTestList( TwinResults *, double (*)(int,int,void*), void* );int SmoothList( TwinResults *, double (*)(int,int,void *), void * );int RefineTestList( TwinResults *, double (*)(int,int,void *),void *, int, double );void OutputTestList( TwinResults *, void *, int, int, int );/* Initialize the results array of a given list of data *//* This structure is used to provice information for the automatic message-length routines */typedef struct { double (*f)( int, int, void * ); int reps, proc1, proc2; void *msgctx; /* Here is where we should put "recent" timing data used to estimate the values of reps */ double t1, t2; int len1, len2; } TwinTest;int main( int argc, char *argv[] ){ int dist; double (* BasicCommTest)( int, int, void * ); void *MsgCtx = 0; /* This is the context of the message-passing operation */ void *outctx; void (*ChangeDist)( int, PairData ) = 0; int reps,proc1,proc2, distance_flag,distance; int first,last,incr, svals[3]; int autosize = 0, autodx; double autorel; char units[32]; /* Name of units of length */ MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &__NUMNODES ); MPI_Comm_rank( MPI_COMM_WORLD, &__MYPROCID ); strcpy( protocol_name, "blocking" ); strcpy( units, "(bytes)" ); if (SYArgHasName( &argc, argv, 1, "-help" )) { if (__MYPROCID == 0) PrintHelp( argv ); MPI_Finalize(); return 0; } if (__NUMNODES < 2) { fprintf( stderr, "Must run mpptest with at least 2 nodes\n" ); return 1; }/* Get the output context */ outctx = SetupGraph( &argc, argv ); if (SYArgHasName( &argc, argv, 1, "-noinfo" )) doinfo = 0; reps = DEFAULT_REPS; proc1 = 0; proc2 = __NUMNODES-1; distance_flag = 0; svals[0] = 0; svals[1] = 1024; svals[2] = 32; if (SYArgHasName( &argc, argv, 1, "-distance" )) distance_flag++; SYArgGetIntVec( &argc, argv, 1, "-size", 3, svals ); nsizes = SYArgGetIntList( &argc, argv, 1, "-sizelist", MAX_SIZE_LIST, sizelist ); if (SYArgHasName( &argc, argv, 1, "-logscale" )) { /* Use the sizelist field to specify a collection of power of two sizes. This is a temporary hack until we have something better. You can use the -size argument to set min and max values (the stride is ignored) */ int k; nsizes = 0; if (svals[0] == 0) { sizelist[nsizes++] = 0; k = 4; } else { k = svals[0]; } while( k <= svals[1] && nsizes < MAX_SIZE_LIST ) { sizelist[nsizes++] = k; k *= 2; } /* Need to tell graphics package to use log/log scale */ DataScale( outctx, 1 ); } SYArgGetInt( &argc, argv, 1, "-reps", &reps ); if (SYArgGetDouble( &argc, argv, 1, "-tgoal", &Tgoal )) { if (TgoalMin > 0.1 * Tgoal) TgoalMin = 0.1 * Tgoal; } SYArgGetDouble( &argc, argv, 1, "-rthresh", &repsThresh ); SYArgGetInt( &argc, argv, 1, "-sample_reps", &minreps ); SYArgGetDouble( &argc, argv, 1, "-max_run_time", &max_run_time ); autosize = SYArgHasName( &argc, argv, 1, "-auto" ); if (autosize) { autodx = 4; SYArgGetInt( &argc, argv, 1, "-autodx", &autodx ); autorel = 0.02; SYArgGetDouble( &argc, argv, 1, "-autorel", &autorel ); }/* Pick the general test based on the presence of an -gop, -overlap, -bisect or no arg */ SetPattern( &argc, argv ); if (SYArgHasName( &argc, argv, 1, "-gop")) { /* we need to fix this cast eventually */ BasicCommTest = (double (*)(int,int,void*)) GetGOPFunction( &argc, argv, protocol_name, units ); MsgCtx = GOPInit( &argc, argv ); } else if (SYArgHasName( &argc, argv, 1, "-bisect" )) { BasicCommTest = GetPairFunction( &argc, argv, protocol_name ); dist = 1; SYArgGetInt( &argc, argv, 1, "-bisectdist", &dist ); MsgCtx = BisectInit( dist ); ChangeDist = BisectChange; strcat( protocol_name, "-bisect" ); if (SYArgHasName( &argc, argv, 1, "-debug" )) PrintPairInfo( MsgCtx ); TimeScale = 0.5; RateScale = (double) __NUMNODES; /* * (2 * 0.5) */ } else if (SYArgHasName( &argc, argv, 1, "-overlap" )) { int MsgSize; char cbuf[32]; if (SYArgHasName( &argc, argv, 1, "-sync" )) { BasicCommTest = round_trip_b_overlap; strcpy( protocol_name, "blocking" ); } else { /* Assume -async */ BasicCommTest = round_trip_nb_overlap; strcpy( protocol_name, "nonblocking" ); } MsgSize = 0; SYArgGetInt( &argc, argv, 1, "-overlapmsgsize", &MsgSize ); MsgCtx = OverlapInit( proc1, proc2, MsgSize ); /* Compute floating point lengths if requested */ if (SYArgHasName( &argc, argv, 1, "-overlapauto")) { OverlapSizes( MsgSize >= 0 ? MsgSize : 0, svals, MsgCtx ); } strcat( protocol_name, "-overlap" ); if (MsgSize >= 0) { sprintf( cbuf, "-%d bytes", MsgSize ); } else { strcpy( cbuf, "-no msgs" ); } strcat( protocol_name, cbuf ); TimeScale = 0.5; RateScale = 2.0; } else if (SYArgHasName( &argc, argv, 1, "-memcpy" )) { BasicCommTest = memcpy_rate; MsgCtx = 0; ChangeDist = 0; strcpy( protocol_name, "memcpy" ); TimeScale = 1.0; RateScale = 1.0; } else { /* Pair by default */ BasicCommTest = GetPairFunction( &argc, argv, protocol_name ); MsgCtx = PairInit( proc1, proc2 ); ChangeDist = PairChange; if (SYArgHasName( &argc, argv, 1, "-debug" )) PrintPairInfo( MsgCtx ); TimeScale = 0.5; RateScale = 2.0; } first = svals[0]; last = svals[1]; incr = svals[2]; if (incr == 0) incr = 1;/* Finally, we are ready to run the tests. We want to report times as the times for a single link, and rates as the aggregate rate. To do this, we need to know how to scale both the times and the rates. Times: scaled by the number of one-way trips measured by the base testing code. This is often 2 trips, or a scaling of 1/2. Rates: scaled by the number of simultaneous participants (as well as the scaling in times). Compute the rates based on the updated time, then multiply by the number of participants. Note that, for a single sender, time and rate are inversely proportional (that is, if TimeScale is 0.5, RateScale is 2.0). */ start_time = MPI_Wtime();/* If the distance flag is set, we look at a range of distances. Otherwise, we just use the first and last processor */ if (doinfo && __MYPROCID == 0) { HeaderGraph( outctx, protocol_name, (char *)0, units ); } if(distance_flag) { for(distance=1;distance<GetMaxIndex();distance++) { proc2 = GetNeighbor( 0, distance, 0 ); if (ChangeDist) (*ChangeDist)( distance, MsgCtx ); time_function(reps,first,last,incr,proc1,proc2, BasicCommTest,outctx, autosize,autodx,autorel,MsgCtx); ClearTimes(); } } else{ time_function(reps,first,last,incr,proc1,proc2,BasicCommTest,outctx, autosize,autodx,autorel,MsgCtx); }/* Generate the "end of page". This allows multiple distance graphs on the same plot */ if (doinfo && __MYPROCID == 0) EndPageGraph( outctx ); MPI_Finalize(); return 0;}/* This is the basic routine for timing an operation. Input Parameters:. reps - Basic number of times to run basic test (see below). first,last,incr - length of data is first, first+incr, ... last (if last != first + k * incr, then actual last value is the value of first + k * incr that is <= last and such that first + (k+1) * incr > last, just as you'd expect). proc1,proc2 - processors to participate in communication. Note that all processors must call because we use global operations to manage some operations, and we want to avoid using process-subset operations (supported in Chameleon) to simplify porting this code. CommTest - Routine to call to run a basic test. This routine returns the time that the test took in seconds.. outctx - Pointer to output context. autosize - If true, the actual sizes are picked automatically. That is instead of using first, first + incr, ... , the routine choses values of len such that first <= len <= last and other properties, given by autodx and autorel, are satisfied.. autodx - Parameter for TST1dauto, used to set minimum distance between test sizes. 4 (for 4 bytes) is good for small values of last. autorel - Relative error tolerance used by TST1dauto in determining the message sizes used.. msgctx - Context to pass through to operation routine */void time_function(reps,first,last,incr,proc1,proc2,CommTest,outctx, autosize,autodx,autorel,msgctx)int reps,first,last,incr,proc1,proc2,autosize,autodx;double autorel;double (* CommTest)( int, int, void *);void *outctx;void *msgctx;{ int distance,myproc; double s, r; myproc = __MYPROCID; distance = ((proc1)<(proc2)?(proc2)-(proc1):(proc1)-(proc2)); /* Run test, using either the simple direct test or the automatic length test */ ntest = 0; if (autosize) { TwinResults *twin; int k; twin = AllocResultsArray( 1024 ); SetResultsForStrided( first, last, (last-first)/8, twin ); /* Run tests */ SetRepsForList( twin, reps ); for (k=0; k<minreps; k++) { int kk; for (kk=0; kk<5; kk++) RunTestList( twin, CommTest, msgctx ); RefineTestList( twin, CommTest, msgctx, autodx, autorel ); } for (k=1; k<5; k++) { if (!SmoothList( twin, CommTest, msgctx )) break; } /* Final output */ if (myproc == 0) OutputTestList( twin, outctx, proc1, proc2, distance ); } else { TwinResults *twin; int k; if (nsizes) { twin = AllocResultsArray( nsizes ); SetResultsForList( sizelist, nsizes, twin ); } else { nsizes = 1 + (last - first)/incr; twin = AllocResultsArray( nsizes ); SetResultsForStrided( first, last, incr, twin ); } /* Run tests */ for (k=1; k<minreps; k++) { SetRepsForList( twin, reps ); RunTestList( twin, CommTest, msgctx );
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -