📄 mpptest.c
字号:
sprintf( cbuf, "-%d bytes", MsgSize );
}
else {
strcpy( cbuf, "-no msgs" );
}
strcat( protocol_name, cbuf );
TimeScale = 0.5;
RateScale = 2.0;
}
else if (SYArgHasName( &argc, argv, 1, "-memcpy" )) {
int use_vector = 0;
MsgCtx = 0;
ChangeDist = 0;
TimeScale = 1.0;
RateScale = 1.0;
use_vector = SYArgHasName( &argc, argv, 1, "-vector" );
/* memcpy_rate_int, memcpy_rate_double */
if (SYArgHasName( &argc, argv, 1, "-int" )) {
if (use_vector) {
}
else {
BasicCommTest = memcpy_rate_int;
strcpy( protocol_name, "memcpy-int" );
}
}
else if (SYArgHasName( &argc, argv, 1, "-double" )) {
if (use_vector) {
BasicCommTest = memcpy_rate_double_vector;
strcpy( protocol_name, "memcpy-double-vector" );
}
else {
BasicCommTest = memcpy_rate_double;
strcpy( protocol_name, "memcpy-double" );
}
}
#ifdef HAVE_LONG_LONG
else if (SYArgHasName( &argc, argv, 1, "-longlong" )) {
if (use_vector) {
BasicCommTest = memcpy_rate_long_long_vector;
strcpy( protocol_name, "memcpy-longlong-vector" );
}
else {
BasicCommTest = memcpy_rate_long_long;
strcpy( protocol_name, "memcpy-longlong" );
}
}
#endif
else {
BasicCommTest = memcpy_rate;
strcpy( protocol_name, "memcpy" );
}
}
else {
/* Pair by default */
BasicCommTest = GetPairFunction( &argc, argv, protocol_name );
MsgCtx = PairInit( proc1, proc2 );
ChangeDist = PairChange;
if (SYArgHasName( &argc, argv, 1, "-debug" ))
PrintPairInfo( MsgCtx );
TimeScale = 0.5;
RateScale = 2.0;
}
first = svals[0];
last = svals[1];
incr = svals[2];
if (incr == 0) incr = 1;
/*
Finally, we are ready to run the tests. We want to report times as
the times for a single link, and rates as the aggregate rate.
To do this, we need to know how to scale both the times and the rates.
Times: scaled by the number of one-way trips measured by the base testing
code. This is often 2 trips, or a scaling of 1/2.
Rates: scaled by the number of simultaneous participants (as well as
the scaling in times). Compute the rates based on the updated time,
then multiply by the number of participants. Note that, for a single
sender, time and rate are inversely proportional (that is, if TimeScale
is 0.5, RateScale is 2.0).
*/
start_time = MPI_Wtime();
/* If the distance flag is set, we look at a range of distances. Otherwise,
we just use the first and last processor */
if (doinfo && __MYPROCID == 0) {
HeaderGraph( outctx, protocol_name, (char *)0, units );
}
if(distance_flag) {
for(distance=1;distance<GetMaxIndex();distance++) {
proc2 = GetNeighbor( 0, distance, 0 );
if (ChangeDist)
(*ChangeDist)( distance, MsgCtx );
time_function(n_avg,first,last,incr,proc1,proc2,
BasicCommTest,outctx,
autosize,autodx,autorel,MsgCtx);
}
}
else{
time_function(n_avg,first,last,incr,proc1,proc2,BasicCommTest,outctx,
autosize,autodx,autorel,MsgCtx);
}
/*
Generate the "end of page". This allows multiple distance graphs on the
same plot
*/
if (doinfo && __MYPROCID == 0)
EndPageGraph( outctx );
EndGraph( outctx );
MPI_Finalize();
return 0;
}
/*
This is the basic routine for timing an operation.
Input Parameters:
. n_avg - Basic number of times to run basic test (see below)
. first,last,incr - length of data is first, first+incr, ... last
(if last != first + k * incr, then actual last value is the
value of first + k * incr that is <= last and such that
first + (k+1) * incr > last, just as you'd expect)
. proc1,proc2 - processors to participate in communication. Note that
all processors must call because we use global operations to
manage some operations, and we want to avoid using process-subset
operations (supported in Chameleon) to simplify porting this code
. CommTest - Routine to call to run a basic test. This routine returns
the time that the test took in seconds.
. outctx - Pointer to output context
. autosize - If true, the actual sizes are picked automatically. That is
instead of using first, first + incr, ... , the routine choses values
of len such that first <= len <= last and other properties, given
by autodx and autorel, are satisfied.
. autodx - Parameter for TST1dauto, used to set minimum distance between
test sizes. 4 (for 4 bytes) is good for small values of last
. autorel - Relative error tolerance used by TST1dauto in determining the
message sizes used.
. msgctx - Context to pass through to operation routine
*/
void time_function( int n_avg, int first, int last, int incr,
int proc1, int proc2, double (*CommTest)(int,int,void*),
void *outctx, int autosize, int autodx,
double autorel, void *msgctx)
{
int distance, myproc;
int n_without_change; /* Number of times through the list without
changes */
myproc = __MYPROCID;
distance = ((proc1)<(proc2)?(proc2)-(proc1):(proc1)-(proc2));
/* Run test, using either the simple direct test or the automatic length
test */
if (autosize) {
TwinResults *twin;
int k;
twin = AllocResultsArray( 1024 );
SetResultsForStrided( first, last, (last-first)/8, twin );
/* Run tests */
SetRepsForList( twin, n_avg );
for (k=0; k<minreps/5; k++) {
int kk;
for (kk=0; kk<5; kk++)
(void)RunTestList( twin, CommTest, msgctx );
/* Don't refine on the last iteration */
if (k != minreps-1)
RefineTestList( twin, CommTest, msgctx, autodx, autorel );
}
for (k=1; k<n_smooth; k++) {
if (!SmoothList( twin, CommTest, msgctx )) break;
}
/* Final output */
if (myproc == 0)
OutputTestList( twin, outctx, proc1, proc2, distance );
FreeResults(twin);
}
else {
TwinResults *twin;
int k;
if (nsizes) {
twin = AllocResultsArray( nsizes );
SetResultsForList( sizelist, nsizes, twin );
}
else {
nsizes = 1 + (last - first)/incr;
twin = AllocResultsArray( nsizes );
SetResultsForStrided( first, last, incr, twin );
}
/* Run tests */
SetRepsForList( twin, n_avg );
n_without_change = 0;
for (k=1; k<minreps; k++) {
if (RunTestList( twin, CommTest, msgctx )) {
n_without_change = 0;
}
else
n_without_change++;
if (n_without_change > n_stable) {
#if DEBUG_AUTO
printf( "Breaking because stable results reached\n" );
#endif
break;
}
}
for (k=1; k<n_smooth; k++) {
if (!SmoothList( twin, CommTest, msgctx )) break;
}
/* Final output */
if (myproc == 0)
OutputTestList( twin, outctx, proc1, proc2, distance );
FreeResults(twin);
}
if (myproc == 0)
DrawGraph( outctx, 0, 0, 0.0, 0.0 );
}
/*****************************************************************************
Utility routines
*****************************************************************************/
void PrintHelp( char *argv[] )
{
if (__MYPROCID != 0) return;
fprintf( stderr, "%s - test individual communication speeds\n", argv[0] );
fprintf( stderr,
"Test a single communication link by various methods. The tests are \n\
combinations of\n\
Protocol: \n\
-sync Blocking sends/receives (default)\n\
-async NonBlocking sends/receives\n\
-ssend MPI Syncronous send (MPI_Ssend) and MPI_Irecv\n\
-force Ready-receiver (with a null message)\n\
-persistant Persistant communication\n\
-put MPI_Put (only on systems that support it)\n\
-get MPI_Get (only on systems that support it)\n\
-vector Data is separated by constant stride (only with MPI, using UBs)\n\
-vectortype Data is separated by constant stride (only with MPI, using \n\
MPI_Type_vector)\n\
\n\
Message data:\n\
-cachesize n Perform test so that cached data is NOT reused\n\
\n\
-vstride n For -vector, set the stride between elements\n\
Message pattern:\n\
-roundtrip Roundtrip messages (default)\n\
-head Head-to-head messages\n\
-halo Halo Exchange (multiple head-to-head; limited options)\n\
\n" );
PrintHaloHelp();
fprintf( stderr, "\
-memcpy Memory copy performance (no communication)\n\
-memcpy -int Memory copy using a for-loop with integers\n\
-memcpy -double Memory copy using a for-loop with doubles\n\
-memcpy -longlong Memory copy using a for-loop with long longs\n" );
fprintf( stderr,
" Message test type:\n\
(if not specified, only communication tests run)\n\
-overlap Overlap computation with communication (see -size)\n\
-overlapmsgsize nn\n\
Size of messages to overlap with is nn bytes.\n\
-bisect Bisection test (all processes participate)\n\
-bisectdist n Distance between processes\n\
\n" );
fprintf( stderr,
" Message sizes:\n\
-size start end stride (default 0 1024 32)\n\
Messages of length (start + i*stride) for i=0,1,... until\n\
the length is greater than end.\n\
-sizelist n1,n2,...\n\
Messages of length n1, n2, etc are used. This overrides \n\
-size\n\
-logscale Messages of length 2**i are used. The -size argument\n\
may be used to set the limits. If -logscale is given,\n\
the default limits are from sizeof(int) to 128 k.\n\
-auto Compute message sizes automatically (to create a smooth\n\
graph. Use -size values for lower and upper range\n\
-autodx n Minimum number of bytes between samples when using -auto\n\
-autorel d Relative error tolerance when using -auto (0.02 by default)\n");
fprintf( stderr, "\n\
Detailed control of tests:\n\
-quick Short hand for -autoavg -n_stable 5\n\
this is a good choice for performing a relatively quick and\n\
accurate assessment of communication performance\n\
-n_avg n Number of times a test is run; the time is averaged over this\n\
number of tests (default %d)\n\
-autoavg Compute the number of times a message is sent automatically\n\
-tgoal d Time that each test should take, in seconds. Use with \n\
-autoavg\n\
-rthresh d Fractional threshold used to determine when minimum time\n\
has been found. The default is 0.05.\n\
-sample_reps n Number of times a full test is run in order to find the\n\
minimum average time. The default is 30\n\
-n_stable n Number of full tests that must not change the minimum \n\
average value before mpptest will stop testing. By default,\n\
the value of -sample_reps is used (i.e.,no early termination)\n\
-max_run_time n Maximum number of seconds for all tests. The default\n\
is %d\n\
\n", DEFAULT_AVG, (int)max_run_time );
fprintf( stderr, "\n\
Collective operations may be tested with -gop [ options ]:\n" );
PrintGOPHelp();
PrintGraphHelp();
PrintPatternHelp();
fflush( stderr );
}
/****************************************************************************
* New code that uses a list to manage all timing experiments
****************************************************************************/
/* Setup the results array */
static TwinResults *twin_avail = 0;
TwinResults *AllocResultsArray( int nsizes )
{
TwinResults *new;
int i;
new = (TwinResults *)calloc( nsizes+1, sizeof(TwinResults) );
if (!new) MPI_Abort( MPI_COMM_WORLD, 1 );
for (i=1; i<nsizes-1; i++) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -