⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bitonic_sort_2d.br

📁 用于GPU通用计算的编程语言BrookGPU 0.4
💻 BR
字号:
/***************************************************************************  Parallel Bitonic Sort  Sorts ARRAYSIZE numbers in O(ARRAYSIZE * lg^2(ARRAYSIZE) ) time.  ARRAYSIZE should be a power of two  A good explaination of the parallel sorting algorithm can be found at:  http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/bitonicen.htm****************************************************************************/#include <stdio.h>#include <stdlib.h>#include <time.h>kernel void calculateDividedIndex(float4 index,                                   float modulus,                                   float length,                                   out float2 newindex<>){   float epsilon=1.0f/32.0f;   //this is needed because the division may result in   // loss of accuracy.  We know that for a 2048 texture the mantissa holds    // 1/32 precision   newindex=float2(index.x,index.y);   newindex/=modulus;   newindex.x=floor(fmod(newindex.x+frac(newindex.y)*length+epsilon,length));   newindex.y=floor(newindex.y+epsilon);}kernel void calculateIndexModulus (float4 index,                                    float modulus,                                   float mod,                                    float offset,                                   float lengthmodmodulus,                                    out float which <>) {   which= floor(fmod(round(index.y*lengthmodmodulus                 + fmod(index.x,mod)),	               modulus)-offset);}kernel void getIndexAt(float4 inputindex,                       float shiftRight,                       float2 maxvalue,                       out float2 outputindex<>) {   float2 index;   index.x=inputindex.x+shiftRight;   index.y=inputindex.y+floor(index.x/maxvalue.x);   index.x=fmod(index.x,maxvalue.x);   if (index.x<0)      index.x+=maxvalue.x;//only necessary if shiftRight<0   outputindex=index;   //   printf(maxvalue.x,maxvalue.y,outputindex.x,outputindex.y);}kernel void valueAt(float value[][],                    float2 index,                    out float output<>,                    float2 maxvalue,                    float nothing) {   if (index.y>=maxvalue.y||index.y<0)      output = nothing;   else       output = value[index];}// Note: the bitonic sort kernel is written for clarity as an// example, not for optimal GPU performance.// Note2: we pass in twoOffset rather than calculating 2*offset// in the kernel to avoid an apparent code-generation bug in fxckernel void bitonic(float input[][], out float output<>, float stageWidth, float offset, float twoOffset,float2 maxvalue) {  float2 idx1=indexof(output),idx2;  float idx;        float sign, dir;  float min, max;  idx=idx1.x+maxvalue.x*idx1.y;  // either compared with element above or below   sign = ( fmod(idx, twoOffset) < offset) ? 1.0 : -1.0;  // "arrow" direction in the bitonic search algorithm  (see above reference)  dir =  ( fmod( floor(idx/stageWidth), 2) == 0) ? 1.0 : -1.0;  // comparing elements idx1 and idx2  getIndexAt(indexof(output),sign*offset,maxvalue,idx2);  min = (input[idx1] < input[idx2]) ? input[idx1] : input[idx2];  max = (input[idx1] > input[idx2]) ? input[idx1] : input[idx2];  output = (sign == dir) ? min : max;  }int main(int argc, char ** argv) {  int i;  int lg_arraySize;  int flip = 0;  int stage, step;  float segWidth, offset;  float* array;  int wid = argc>1?atoi(argv[1]):256;  int hei = argc>2?atoi(argv[2]):wid;  float sorted1Strm<hei,wid>;  float sorted2Strm<hei,wid>;  int ARRAYSIZE=hei*wid;  float2 maxvalue;  maxvalue.x=wid;maxvalue.y=hei;  array = (float*)malloc(sizeof(float) * ARRAYSIZE);    // compute lg(ArraySize)  lg_arraySize = 0;  for (i=ARRAYSIZE>>1; i; lg_arraySize++)    i = i >> 1;  srand(time(NULL));  // initialize list of ARRAYSIZE random numbers  for (i=0; i<ARRAYSIZE; i++)    array[i] = (float)(rand() % 100);  printf("N = %d  (requires lg_N=%d stages)\n", ARRAYSIZE, lg_arraySize);  printf("Original list:\n");  for (i=0;i<ARRAYSIZE;i++)    printf("%3.2f ", array[i]);  printf("\n\n");  streamRead(sorted1Strm, array);  // lg(ARRAYSIZE) stages  for (stage=1; stage<=lg_arraySize; stage++) {    // width of each sorted segment to be sorted in parallel (2, 4, 8, ...)    segWidth = (float)pow(2.0f, stage);      for (step=1; step<=stage; step++) {      // offset = (stageWidth/2, stageWidth/4, ... , 2, 1)      offset = (float)pow(2.0f, stage-step);        // two buffers required since non-sequential gather is performed from src buffer each step.      // flip src and target streams each iteration      if (!flip)        bitonic(sorted1Strm, sorted2Strm, segWidth, offset, 2*offset,maxvalue);      else        bitonic(sorted2Strm, sorted1Strm, segWidth, offset, 2*offset,maxvalue);      flip = (flip) ? 0 : 1;    }  }  // want to write out the last stream used as on output  if (flip)    streamWrite(sorted2Strm, array);  else    streamWrite(sorted1Strm, array);    printf("Sorted list:\n");  for (i=0;i<ARRAYSIZE;i++)    printf("%3.2f ", array[i]);  printf("\n");  free(array);	return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -