📄 bandwidthtest.cu
字号:
/* * Copyright 1993-2007 NVIDIA Corporation. All rights reserved. * * NOTICE TO USER: * * This source code is subject to NVIDIA ownership rights under U.S. and * international Copyright laws. Users and possessors of this source code * are hereby granted a nonexclusive, royalty-free license to use this code * in individual and commercial software. * * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE * OR PERFORMANCE OF THIS SOURCE CODE. * * U.S. Government End Users. This source code is a "commercial item" as * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of * "commercial computer software" and "commercial computer software * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) * and is provided to the U.S. Government only as a commercial end item. * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the * source code with only those rights set forth herein. * * Any use of this source code in individual and commercial software must * include, in the user documentation and internal comments to the code, * the above Disclaimer and U.S. Government End Users Notice. *//* * This is a simple test program to measure the memcopy bandwidth of the GPU. * It can measure device to device copy bandwidth, host to device copy bandwidth * for pageable and pinned memory, and device to host copy bandwidth for pageable * and pinned memory. * * Usage: * ./bandwidthTest [option]... */// includes, system#include <stdlib.h>#include <stdio.h>#include <string.h>#include <math.h>// includes, project#include <cutil.h>#include <cuda.h>// defines, project#define MEMCOPY_ITERATIONS 10#define DEFAULT_SIZE ( 32 * ( 1 << 20 ) ) //32 M#define DEFAULT_INCREMENT (1 << 22) //4 M#define CACHE_CLEAR_SIZE (1 << 24) //16 M//shmoo mode defines#define SHMOO_MEMSIZE_MAX (1 << 26) //64 M#define SHMOO_MEMSIZE_START (1 << 10) //1 KB#define SHMOO_INCREMENT_1KB (1 << 10) //1 KB#define SHMOO_INCREMENT_2KB (1 << 11) //2 KB#define SHMOO_INCREMENT_10KB (10 * (1 << 10)) //10KB#define SHMOO_INCREMENT_100KB (100 * (1 << 10)) //100 KB#define SHMOO_INCREMENT_1MB (1 << 20) //1 MB#define SHMOO_INCREMENT_2MB (1 << 21) //2 MB#define SHMOO_INCREMENT_4MB (1 << 22) //4 MB#define SHMOO_LIMIT_20KB (20 * (1 << 10)) //20 KB#define SHMOO_LIMIT_50KB (50 * (1 << 10)) //50 KB#define SHMOO_LIMIT_100KB (100 * (1 << 10)) //100 KB#define SHMOO_LIMIT_1MB (1 << 20) //1 MB#define SHMOO_LIMIT_16MB (1 << 24) //16 MB#define SHMOO_LIMIT_32MB (1 << 25) //32 MB//enums, projectenum testMode { QUICK_MODE, RANGE_MODE, SHMOO_MODE };enum memcpyKind { DEVICE_TO_HOST, HOST_TO_DEVICE, DEVICE_TO_DEVICE };enum printMode { USER_READABLE, CSV };enum memoryMode { PINNED, PAGEABLE };////////////////////////////////////////////////////////////////////////////////// declaration, forwardvoid runTest(const int argc, const char **argv);void testBandwidth( unsigned int start, unsigned int end, unsigned int increment, testMode mode, memcpyKind kind, printMode printmode, memoryMode memMode, int startDevice, int endDevice);void testBandwidthQuick(unsigned int size, memcpyKind kind, printMode printmode, memoryMode memMode, int startDevice, int endDevice);void testBandwidthRange(unsigned int start, unsigned int end, unsigned int increment, memcpyKind kind, printMode printmode, memoryMode memMode, int startDevice, int endDevice);void testBandwidthShmoo(memcpyKind kind, printMode printmode, memoryMode memMode, int startDevice, int endDevice);float testDeviceToHostTransfer(unsigned int memSize, memoryMode memMode);float testHostToDeviceTransfer(unsigned int memSize, memoryMode memMode);float testDeviceToDeviceTransfer(unsigned int memSize);void printResultsReadable(unsigned int *memSizes, float *bandwidths, unsigned int count);void printResultsCSV(unsigned int *memSizes, float *bandwidths, unsigned int count);void printHelp(void);////////////////////////////////////////////////////////////////////////////////// Program main////////////////////////////////////////////////////////////////////////////////intmain(int argc, char** argv) { runTest(argc, (const char**)argv); CUT_EXIT(argc, argv);}/////////////////////////////////////////////////////////////////////////////////Parse args, run the appropriate tests///////////////////////////////////////////////////////////////////////////////void runTest(const int argc, const char **argv){ int start = DEFAULT_SIZE; int end = DEFAULT_SIZE; int startDevice = 0; int endDevice = 0; int increment = DEFAULT_INCREMENT; testMode mode = QUICK_MODE; bool htod = false; bool dtoh = false; bool dtod = false; char *modeStr; char *device = NULL; printMode printmode = USER_READABLE; char *memModeStr = NULL; memoryMode memMode = PAGEABLE; //process command line args if(cutCheckCmdLineFlag( argc, argv, "help")) { printHelp(); return; } if(cutCheckCmdLineFlag( argc, argv, "csv")) { printmode = CSV; } if( cutGetCmdLineArgumentstr(argc, argv, "memory", &memModeStr) ) { if( strcmp(memModeStr, "pageable") == 0 ) { memMode = PAGEABLE; } else if( strcmp(memModeStr, "pinned") == 0) { memMode = PINNED; } else { printf("Invalid memory mode - valid modes are pageable or pinned\n"); printf("See --help for more information\n"); return; } } else { //default - pageable memory memMode = PAGEABLE; } if( cutGetCmdLineArgumentstr(argc, argv, "device", &device) ) { int deviceCount; cudaGetDeviceCount(&deviceCount); if( deviceCount == 0 ) { printf("!!!!!No devices found!!!!!\n"); return; } if( strcmp (device, "all") == 0 ) { printf ("\n!!!!!Cumulative Bandwidth to be computed from all the devices !!!!!!\n\n"); startDevice = 0; endDevice = deviceCount-1; } else { startDevice = endDevice = atoi(device); if( startDevice >= deviceCount || startDevice < 0) { printf("\n!!!!!Invalid GPU number %d given hence default gpu %d will be used !!!!!\n", startDevice,0); startDevice = endDevice = 0; } } } printf("Running on......\n"); for( int currentDevice = startDevice; currentDevice <= endDevice; currentDevice++) { cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, currentDevice); printf (" device %d:%s\n", currentDevice,deviceProp.name); } if( cutGetCmdLineArgumentstr(argc, argv, "mode", &modeStr) ) { //figure out the mode if( strcmp(modeStr, "quick") == 0 ) { mode = QUICK_MODE; } else if( strcmp(modeStr, "shmoo") == 0 ) { mode = SHMOO_MODE; } else if( strcmp(modeStr, "range") == 0 ) { mode = RANGE_MODE; } else { printf("Invalid mode - valid modes are quick, range, or shmoo\n"); printf("See --help for more information\n"); return; } } else { //default mode - quick mode = QUICK_MODE; } if(cutCheckCmdLineFlag( argc, argv, "htod")) htod = true; if(cutCheckCmdLineFlag( argc, argv, "dtoh")) dtoh = true; if(cutCheckCmdLineFlag( argc, argv, "dtod")) dtod = true; if( !htod && !dtoh && !dtod ) { //default: All htod = true; dtoh = true; dtod = true; } if( RANGE_MODE == mode ) { if( cutGetCmdLineArgumenti( argc, argv, "start", &start) ) { if( start <= 0 ) { printf("Illegal argument - start must be greater than zero\n"); return; } } else { printf("Must specify a starting size in range mode\n"); printf("See --help for more information\n"); return; } if( cutGetCmdLineArgumenti( argc, argv, "end", &end) ) { if( end <= 0 ) { printf("Illegal argument - end must be greater than zero\n"); return; } if( start > end ) { printf("Illegal argument - start is greater than end\n"); return; } } else { printf("Must specify an end size in range mode.\n"); printf("See --help for more information\n"); return; } if( cutGetCmdLineArgumenti( argc, argv, "increment", &increment) ) { if( increment <= 0 ) { printf("Illegal argument - increment must be greater than zero\n"); return; } } else { printf("Must specify an increment in user mode\n"); printf("See --help for more information\n"); return; } } if( htod ) { testBandwidth((unsigned int)start, (unsigned int)end, (unsigned int)increment, mode, HOST_TO_DEVICE, printmode, memMode,startDevice, endDevice); } if( dtoh ) { testBandwidth((unsigned int)start, (unsigned int)end, (unsigned int)increment, mode, DEVICE_TO_HOST, printmode, memMode, startDevice, endDevice); } if( dtod ) { testBandwidth((unsigned int)start, (unsigned int)end, (unsigned int)increment, mode, DEVICE_TO_DEVICE, printmode, memMode, startDevice, endDevice); } printf("&&&& Test PASSED\n"); cutFree( memModeStr); return;}///////////////////////////////////////////////////////////////////////////////// Run a bandwidth test///////////////////////////////////////////////////////////////////////////////voidtestBandwidth(unsigned int start, unsigned int end, unsigned int increment, testMode mode, memcpyKind kind, printMode printmode, memoryMode memMode, int startDevice, int endDevice){ switch( mode ) { case QUICK_MODE: printf("Quick Mode\n"); testBandwidthQuick( DEFAULT_SIZE, kind, printmode, memMode, startDevice, endDevice ); break; case RANGE_MODE: printf("Range Mode\n"); testBandwidthRange(start, end, increment, kind, printmode, memMode, startDevice, endDevice); break; case SHMOO_MODE: printf("Shmoo Mode\n"); testBandwidthShmoo(kind, printmode, memMode, startDevice, endDevice); break; default: printf("Invalid testing mode\n"); break; }}//////////////////////////////////////////////////////////////////////// Run a quick mode bandwidth test//////////////////////////////////////////////////////////////////////voidtestBandwidthQuick(unsigned int size, memcpyKind kind, printMode printmode, memoryMode memMode, int startDevice, int endDevice){ testBandwidthRange(size, size, DEFAULT_INCREMENT, kind, printmode, memMode, startDevice, endDevice);}///////////////////////////////////////////////////////////////////////// Run a range mode bandwidth test//////////////////////////////////////////////////////////////////////voidtestBandwidthRange(unsigned int start, unsigned int end, unsigned int increment, memcpyKind kind, printMode printmode, memoryMode memMode, int startDevice, int endDevice){ //count the number of copies we're going to run unsigned int count = 1 + ((end - start) / increment); unsigned int *memSizes = ( unsigned int * )malloc( count * sizeof( unsigned int ) ); float *bandwidths = ( float * ) malloc( count * sizeof(float) ); //print information for use switch(kind) { case DEVICE_TO_HOST: printf("Device to Host Bandwidth for "); break; case HOST_TO_DEVICE: printf("Host to Device Bandwidth for "); break; case DEVICE_TO_DEVICE: printf("Device to Device Bandwidth\n"); break; } if( DEVICE_TO_DEVICE != kind ) { switch(memMode) { case PAGEABLE: printf("Pageable memory\n"); break; case PINNED: printf("Pinned memory\n"); break; } } // Before calculating the cumulative bandwidth, initialize bandwidths array to NULL for (int i = 0; i < count; i++) bandwidths[i] = 0.0f; // Use the device asked by the user for (int currentDevice = startDevice; currentDevice <= endDevice; currentDevice++) { cudaSetDevice(currentDevice); //run each of the copies for(unsigned int i = 0; i < count; i++) { memSizes[i] = start + i * increment; switch(kind) { case DEVICE_TO_HOST: bandwidths[i] += testDeviceToHostTransfer( memSizes[i], memMode ); break; case HOST_TO_DEVICE: bandwidths[i] += testHostToDeviceTransfer( memSizes[i], memMode ); break; case DEVICE_TO_DEVICE: bandwidths[i] += testDeviceToDeviceTransfer( memSizes[i] ); break; } printf("."); } } // Complete the bandwidth computation on all the devices
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -