📄 mpptest.c
字号:
/*D
mpptest - Measure the communications performance of a message-passing system
Details:
The greatest challange in performing these experiments in making the results
reproducible. On many (most?) systems, there are various events that
perturb timings; these can occur on the scale of 10's of milliseconds.
To attempt to remove the effect of these events, we make multiple tests,
taking the minimum of many tests, each of which gives an average time. To
reduce the effect of transient perturbations, the entire sequence of tests
is run several times, taking the best (fastest) time on each test. Finally,
a post-processing step retests any anomolies, defined as single peaks peaks
that are significantly greater than the surrounding times (using a locally
linear-fit model).
D*/
/*
This code is a major re-write of an older version that was generated
automatically from an older Chameleon program. Previous versions
worked with a wide variety of message-passing systems.
*/
#include <stdio.h>
#include <math.h>
#ifndef HUGE_VAL
#define HUGE_VAL 10.0e38
#endif
#include "mpi.h"
#include "mpptest.h"
#include "getopts.h"
int __NUMNODES, __MYPROCID;
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifndef DEFAULT_AVG
#define DEFAULT_AVG 50
#endif
#include <string.h>
/* Forward declarations */
void PrintHelp( char *[] );
/*
This is a simple program to test the communications performance of
a parallel machine.
*/
/* If doinfo is 0, don't write out the various text lines */
static int doinfo = 1;
/* Scaling of time and rate */
static double TimeScale = 1.0;
static double RateScale = 1.0;
/* The maximum of the MPI_Wtick values for all processes */
static double gwtick;
/* This is the number of times to run a test, taking as time the minimum
achieve timing.
(NOT CURRENTLY IMPLEMENTED)
This uses an adaptive approach that also stops when
minThreshTest values are within a few percent of the current minimum
n_avg - number of iterations used to average the time for a test
n_rep - number of repititions of a test, used to sample test average
to avoid transient effects
*/
static int minreps = 30;
/* n_stable is the number of tests that must not (significantly, see
repsThresh) change the results before mpptest will decide that no
further tests are required
*/
static int n_stable;
static double repsThresh = 0.05;
/* n_smooth is the number of passes over the data that will be taken to
smooth out any anomolies, defined as times that deviate significantly from
a linear progression
*/
static int n_smooth = 5;
char protocol_name[256];
/*
We would also like to adaptively modify the number of repetitions to
meet a time estimate (later, we'd like to meet a statistical estimate).
One relatively easy way to do this is to use a linear estimate (either
extrapolation or interpolation) based on 2 other computations.
That is, if the goal time is T and the measured tuples (time,reps,len)
are, the formula for the local time is s + r n, where
r = (time2/reps2 - time1/reps1) / (len2 - len1)
s = time1/reps1 - r * len1
Then the appropriate number of repititions to use is
Tgoal / (s + r * len) = reps
*/
static double Tgoal = 1.0;
/* If less than Tgoalmin is spent, increase the number of tests to average */
static double TgoalMin = 0.5;
static int autoavg = 0;
/* This structure allows a collection of arbitray sizes to be specified */
#define MAX_SIZE_LIST 256
static int sizelist[MAX_SIZE_LIST];
static int nsizes = 0;
/* We wish to control the TOTAL amount of time that the test takes.
We could do this with gettimeofday or clock or something, but fortunately
the MPI timer is an elapsed timer */
static double max_run_time = 15.0*60.0;
static double start_time = 0.0;
/* All test data is contained in an array of values. Because we may
adaptively choose the message lengths, provision is made to maintain the
list elements in an array, and for many processing tasks (output, smoothing)
only the list version is used. */
/* These are used to contain results for a single test */
typedef struct _TwinResults {
double t, /* min of the observations (per loop) */
max_time, /* max of the observations (per loop) */
sum_time; /* sum of all of the observations */
int len; /* length of the message for this test */
int ntests; /* number of observations */
int n_avg; /* number of times to run a test to get average
time */
int new_min_found; /* true if a new minimum was found */
int n_loop; /* number of times the timing loop was
run and accepted */
struct _TwinResults *next, *prev;
} TwinResults;
TwinResults *AllocResultsArray( int );
void FreeResults( TwinResults * );
void SetResultsForStrided( int first, int last, int incr, TwinResults *twin );
void SetResultsForList( int sizelist[], int nsizes, TwinResults *twin );
void SetRepsForList( TwinResults *, int );
int RunTest( TwinResults *, double (*)(int,int,void *), void *, double );
int RunTestList( TwinResults *, double (*)(int,int,void*), void* );
int SmoothList( TwinResults *, double (*)(int,int,void *), void * );
int RefineTestList( TwinResults *, double (*)(int,int,void *),void *,
int, double );
void OutputTestList( TwinResults *, void *, int, int, int );
double LinearTimeEst( TwinResults *, double );
double LinearTimeEstBase( TwinResults *, TwinResults *, TwinResults*, double );
TwinResults *InsertElm( TwinResults *, TwinResults * );
/* Initialize the results array of a given list of data */
/* This structure is used to provice information for the automatic
message-length routines */
typedef struct {
double (*f)( int, int, void * );
int reps, proc1, proc2;
void *msgctx;
/* Here is where we should put "recent" timing data used to estimate
the values of reps */
double t1, t2;
int len1, len2;
} TwinTest;
int main( int argc, char *argv[] )
{
int dist;
double (* BasicCommTest)( int, int, void * ) = 0;
void *MsgCtx = 0; /* This is the context of the
message-passing operation */
void *outctx;
void (*ChangeDist)( int, PairData ) = 0;
int n_avg, proc1, proc2, distance_flag, distance;
int first,last,incr, svals[3];
int autosize = 0, autodx;
double autorel;
double wtick;
char units[32]; /* Name of units of length */
MPI_Init( &argc, &argv );
MPI_Comm_size( MPI_COMM_WORLD, &__NUMNODES );
MPI_Comm_rank( MPI_COMM_WORLD, &__MYPROCID );
/* Get the maximum clock grain */
wtick = MPI_Wtick();
MPI_Allreduce( &wtick, &gwtick, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD );
/* Set the default test name and labels */
strcpy( protocol_name, "blocking" );
strcpy( units, "(bytes)" );
if (SYArgHasName( &argc, argv, 1, "-help" )) {
if (__MYPROCID == 0) PrintHelp( argv );
MPI_Finalize();
return 0;
}
if (__NUMNODES < 2 && !SYArgHasName( &argc, argv, 0, "-memcpy" )) {
fprintf( stderr, "Must run mpptest with at least 2 nodes\n" );
MPI_Finalize();
return 1;
}
/* Get the output context */
outctx = SetupGraph( &argc, argv );
if (SYArgHasName( &argc, argv, 1, "-noinfo" )) doinfo = 0;
/* Proc1 *must* be 0 because of the way other data is collected */
proc1 = 0;
proc2 = __NUMNODES-1;
distance_flag = 0;
if (SYArgHasName( &argc, argv, 0, "-logscale" )) {
svals[0] = sizeof(int);
svals[1] = 131072; /* 128k */
svals[2] = 32;
}
else {
svals[0] = 0;
svals[1] = 1024;
svals[2] = 32;
}
if (SYArgHasName( &argc, argv, 1, "-distance" )) distance_flag++;
SYArgGetIntVec( &argc, argv, 1, "-size", 3, svals );
nsizes = SYArgGetIntList( &argc, argv, 1, "-sizelist", MAX_SIZE_LIST,
sizelist );
if (SYArgHasName( &argc, argv, 1, "-logscale" )) {
/* Use the sizelist field to specify a collection of power of
two sizes. This is a temporary hack until we have something
better. You can use the -size argument to set min and max values
(the stride is ignored) */
int k;
nsizes = 0;
if (svals[0] == 0) {
sizelist[nsizes++] = 0;
k = 4;
}
else {
k = svals[0];
}
while( k <= svals[1] && nsizes < MAX_SIZE_LIST ) {
sizelist[nsizes++] = k;
k *= 2;
}
/* Need to tell graphics package to use log/log scale */
DataScale( outctx, 1 );
}
/* Get the number of tests to average over */
n_avg = DEFAULT_AVG;
if (SYArgHasName( &argc, argv, 1, "-autoavg" )) {
autoavg = 1;
n_avg = 5; /* Set a new default. This can be overridden */
}
SYArgGetInt( &argc, argv, 1, "-n_avg", &n_avg ); /* was -reps */
if (SYArgGetDouble( &argc, argv, 1, "-tgoal", &Tgoal )) {
if (TgoalMin > 0.1 * Tgoal) TgoalMin = 0.1 * Tgoal;
}
SYArgGetDouble( &argc, argv, 1, "-rthresh", &repsThresh );
SYArgGetInt( &argc, argv, 1, "-sample_reps", &minreps );
n_stable = minreps;
SYArgGetInt( &argc, argv, 1, "-n_stable", &n_stable );
SYArgGetDouble( &argc, argv, 1, "-max_run_time", &max_run_time );
if (SYArgHasName( &argc, argv, 1, "-quick" ) ||
SYArgHasName( &argc, argv, 1, "-fast" )) {
/* This is a short cut for
-autoavg -n_stable 5 */
autoavg = 1;
n_avg = 5;
n_stable = 5;
}
autosize = SYArgHasName( &argc, argv, 1, "-auto" );
if (autosize) {
autodx = 4;
SYArgGetInt( &argc, argv, 1, "-autodx", &autodx );
autorel = 0.02;
SYArgGetDouble( &argc, argv, 1, "-autorel", &autorel );
}
/* Pick the general test based on the presence of an -gop, -overlap, -bisect
or no arg */
SetPattern( &argc, argv );
if (SYArgHasName( &argc, argv, 1, "-gop")) {
/* we need to fix this cast eventually */
BasicCommTest = (double (*)(int,int,void*))
GetGOPFunction( &argc, argv, protocol_name, units );
MsgCtx = GOPInit( &argc, argv );
}
else if (SYArgHasName( &argc, argv, 1, "-halo" )) {
int local_partners, max_partners;
BasicCommTest = GetHaloFunction( &argc, argv, &MsgCtx, protocol_name );
TimeScale = 1.0; /* Halo time, not half round trip */
local_partners = GetHaloPartners( MsgCtx );
MPI_Allreduce( &local_partners, &max_partners, 1, MPI_INT, MPI_MAX,
MPI_COMM_WORLD );
RateScale = (double) max_partners; /* Since each sends len data */
/* I.e., gives total rate per byte */
}
else if (SYArgHasName( &argc, argv, 1, "-bisect" )) {
BasicCommTest = GetPairFunction( &argc, argv, protocol_name );
dist = 1;
SYArgGetInt( &argc, argv, 1, "-bisectdist", &dist );
MsgCtx = BisectInit( dist );
ChangeDist = BisectChange;
strcat( protocol_name, "-bisect" );
if (SYArgHasName( &argc, argv, 1, "-debug" ))
PrintPairInfo( MsgCtx );
TimeScale = 0.5;
RateScale = (double) __NUMNODES; /* * (2 * 0.5) */
}
else if (SYArgHasName( &argc, argv, 1, "-overlap" )) {
int MsgSize;
char cbuf[32];
if (SYArgHasName( &argc, argv, 1, "-sync" )) {
BasicCommTest = round_trip_b_overlap;
strcpy( protocol_name, "blocking" );
}
else { /* Assume -async */
BasicCommTest = round_trip_nb_overlap;
strcpy( protocol_name, "nonblocking" );
}
MsgSize = 0;
SYArgGetInt( &argc, argv, 1, "-overlapmsgsize", &MsgSize );
MsgCtx = OverlapInit( proc1, proc2, MsgSize );
/* Compute floating point lengths if requested */
if (SYArgHasName( &argc, argv, 1, "-overlapauto")) {
OverlapSizes( MsgSize >= 0 ? MsgSize : 0, svals, MsgCtx );
}
strcat( protocol_name, "-overlap" );
if (MsgSize >= 0) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -