📄 bitonic_sort.br
字号:
/*************************************************************************** Parallel Bitonic Sort Sorts ARRAYSIZE numbers in O(ARRAYSIZE * lg^2(ARRAYSIZE) ) time. ARRAYSIZE should be a power of two A good explaination of the parallel sorting algorithm can be found at: http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/bitonicen.htm****************************************************************************/#include <stdio.h>#include <stdlib.h>#include <time.h>#define ARRAYSIZE 32// Note: the bitonic sort kernel is written for clarity as an// example, not for optimal GPU performance.// Note2: we pass in twoOffset rather than calculating 2*offset// in the kernel to avoid an apparent code-generation bug in fxckernel void bitonic(iter float idx1<>, float input[ARRAYSIZE], out float output<>, float stageWidth, float offset, float twoOffset) { float idx2; float sign, dir; float min, max; // either compared with element above or below sign = ( fmod(idx1, twoOffset) < offset) ? 1.0 : -1.0; // "arrow" direction in the bitonic search algorithm (see above reference) dir = ( fmod( floor(idx1/stageWidth), 2) == 0) ? 1.0 : -1.0; // comparing elements idx1 and idx2 idx2 = idx1 + sign*offset; min = (input[idx1] < input[idx2]) ? input[idx1] : input[idx2]; max = (input[idx1] > input[idx2]) ? input[idx1] : input[idx2]; output = (sign == dir) ? min : max; }int main() { int i; int lg_arraySize; int flip = 0; int stage, step; float segWidth, offset; float* array; float sorted1Strm<ARRAYSIZE>; float sorted2Strm<ARRAYSIZE>; float rangeEnd = ARRAYSIZE; // HACK: assigning to float variable first, to get around compiler for ensuring iter constructor iter float idxStrm<ARRAYSIZE> = iter(0.0, rangeEnd); array = (float*)malloc(sizeof(float) * ARRAYSIZE); // compute lg(ArraySize) lg_arraySize = 0; for (i=ARRAYSIZE>>1; i; lg_arraySize++) i = i >> 1; srand(time(NULL)); // initialize list of ARRAYSIZE random numbers for (i=0; i<ARRAYSIZE; i++) array[i] = (float)(rand() % 100); printf("N = %d (requires lg_N=%d stages)\n", ARRAYSIZE, lg_arraySize); printf("Original list:\n"); for (i=0;i<ARRAYSIZE;i++) printf("%3.2f ", array[i]); printf("\n\n"); streamRead(sorted1Strm, array); // lg(ARRAYSIZE) stages for (stage=1; stage<=lg_arraySize; stage++) { // width of each sorted segment to be sorted in parallel (2, 4, 8, ...) segWidth = (float)pow(2.0f, stage); for (step=1; step<=stage; step++) { // offset = (stageWidth/2, stageWidth/4, ... , 2, 1) offset = (float)pow(2.0f, stage-step); // two buffers required since non-sequential gather is performed from src buffer each step. // flip src and target streams each iteration if (!flip) bitonic( idxStrm, sorted1Strm, sorted2Strm, segWidth, offset, 2*offset); else bitonic( idxStrm, sorted2Strm, sorted1Strm, segWidth, offset, 2*offset); flip = (flip) ? 0 : 1; } } // want to write out the last stream used as on output if (flip) streamWrite(sorted2Strm, array); else streamWrite(sorted1Strm, array); printf("Sorted list:\n"); for (i=0;i<ARRAYSIZE;i++) printf("%3.2f ", array[i]); printf("\n"); free(array); return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -