⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 readwrite.br

📁 用于GPU通用计算的编程语言BrookGPU 0.4
💻 BR
字号:
/* * readwrite.br * *      Very simple tests of how long it takes to fill and then read back a *      stream. */#include <stdlib.h>#include <stdio.h>#include <assert.h>#include "main.h"#include "readwrite.h"#if 1#define CHECK_MISMATCH 1#else#define CHECK_MISMATCH 0#endifstatic const int lengths[] = {   1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,   21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,   40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 61, 62, 63, 64, 65, 66,   67, 68, 69, 70, 80, 90, 100, 120, 140, 160, 180, 200, 220, 240, 256,   300, 350, 400, 450, 512, 550, 600, 650, 700, 750, 800, 850, 900, 950,   1024, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2048};static const int numLengths = sizeof lengths / sizeof lengths[0];/* * ReadWriteNOP -- * *      We need to touch our streams in order to force the runtime to pull *      them down to the card, so we use the simplest kernel to accomplish *      that. */kernel voidReadWriteNOP(float4 s<>, out float4 o<>){   o = s;}/* * ReadWriteBuildData -- * *      Fill the input and output streams for the memcpy() test.  Input has *      a simple to recognize pattern and output has a value that's *      guaranteed not to appear in the pattern. */static voidReadWriteBuildData(float4 **input, float4 **output, int numEntries){   int i;   *input = (float4 *) malloc(numEntries * sizeof **input);   assert(input);   *output = (float4 *) malloc(numEntries * sizeof **output);   assert(output);   for (i = 0; i < numEntries; i++) {      (*input)[i].x = (float) i;      (*input)[i].y = (float) i;      (*input)[i].z = (float) i;      (*input)[i].w = (float) i;      (*output)[i] = float4(-1.0f, -1.0f, -1.0f, -1.0f);   }}/* * ReadWriteProcessTiming -- * *      Calculate the throughput / bandwidth and verify that the kernel *      executed corectly. * *      NOTE: stop and start are both tunnelled as globals */static voidReadWriteProcessTiming(const char *name, float4 *input, float4 *output,                       int length, int numEntries){   int i, numBytes = numEntries * sizeof *input;   printf("%9d\t%6d\t\t%6.2f\t\t(* %s *)\n",          length, (int) CyclesToUsecs(stop - start),          numBytes / (float) CyclesToUsecs(stop - start), name);   for (i = 0; CHECK_MISMATCH && i < numEntries; i++) {      if ((input[i].x != output[i].x &&           (output[i].x - input[i].x > 0.1f*input[i].x ||            input[i].x - output[i].x > 0.1f*input[i].x)) ||          output[i].x != output[i].y ||          output[i].x != output[i].z ||          output[i].x != output[i].w) {         printf("(* Mismatch %d/%d.  In: %f, Out: %f\t\t%s *)\n",                 i, numEntries, input[i].x, output[i].x, name);         break;      }   }}/* * ReadWrite1D -- * *      Measure how fast we can streamRead() and streamWrite() a 1D stream *      of float4's of the specified length. */static voidReadWrite1D(const char *logName, int length){   float4 *in, *output;   float4 s<length>, o<length>;   ReadWriteBuildData(&in, &output, length);   start = GetTimeTSC();   streamRead(s, in);   ReadWriteNOP(s, o);   streamWrite(o, output);   stop = GetTimeTSC();   ReadWriteProcessTiming(logName, in, output, length, length);   free(in);   free(output);}/* * ReadWrite2D -- * *      Measure how fast we can streamRead() and streamWrite() a 2D stream *      of float4's of the specified length in each dimension. */static voidReadWrite2D(const char *logName, int length){   float4 *in, *output;   float4 s<length, length>, o<length, length>;   ReadWriteBuildData(&in, &output, length * length);   start = GetTimeTSC();   streamRead(s, in);   ReadWriteNOP(s, o);   streamWrite(o, output);   stop = GetTimeTSC();   ReadWriteProcessTiming(logName, in, output, length, length * length);   free(in);   free(output);}voidReadWrite1D_Time(int maxLength){   float4 s<1>, o<1>;   int i;   /* Prevent kernel cold start costs from inflating ReadWrite1D() */   ReadWriteNOP(s, o);   printf("(* ReadWrite1D: max length %d *)\n"          "(* length\t usecs\t\tMFloats/sec *)\n", maxLength);   for (i = 0; i < numLengths && lengths[i] < maxLength; i++) {      ReadWrite1D("RW1D", lengths[i]);   }   ReadWrite1D("RW1D", maxLength);   printf("\n");}voidReadWrite2D_Time(int maxLength){   float4 s<1>, o<1>;   int i;   /* Prevent kernel cold start costs from inflating ReadWrite1D() */   ReadWriteNOP(s, o);   printf("(* ReadWrite2D: max length %d *)\n"          "(* length\t usecs\t\tMFloats/sec *)\n", maxLength);   for (i = 0; i < numLengths && lengths[i] < maxLength; i++) {      ReadWrite2D("RW2D", lengths[i]);   }   ReadWrite2D("RW2D", maxLength);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -