📄 vout1dfloatn.br
字号:
#include <stdio.h>#include <stdlib.h>extern unsigned int debug_vout;#define debugStreamPrint(stream,title) if (debug_vout) { printf ("%s\n",title); streamPrint(stream,1); printf ("\n\n"); }kernel void kernelReadItem1d (float items[], float index, out float item<>) { item = items[index];}void cpuGetIndexAt1d(float inputindex, float shiftRight, float maxvalue, float *index) { *index=inputindex+shiftRight; *index=*index > maxvalue-1?maxvalue-1:*index<0?0:*index;}const unsigned int MAX_VOUT_STREAMS=31;//if you change this, change one 5 lineskernel void calculateDividedIndex1d(float4 index, float modulus, out float newindex<>){ float epsilon=1.0f/32.0f;//this is needed because the division may result in // loss of accuracy. We know that for a 2048 texture the mantissa holds // 1/32 precision newindex=floor(index.x/modulus+epsilon);}kernel void calculateIndexModulus1d (float4 index, float modulus, float offset, out float which <>) { which= floor(fmod(index.x,modulus)-offset);}#define VECTOR_TEMPLATIZED_FUNCTIONSreduce void valueProducedfloat1d (float input <>, reduce float output<>) { output=isinf(input.x)?output:input;}kernel void isFiniteKernelfloat1d(float inp<>, out float outp<>) { outp=!isinf(inp.x);}int finiteValueProduced1dfloat (float input<>) { float output<1>; float finiteout<1>; float rettype; float ret; debugStreamPrint (input,"Finite Values in..."); valueProducedfloat1d(input,output); streamWrite(output,&rettype); isFiniteKernelfloat1d(output,finiteout); streamWrite(finiteout,&ret); return (int)ret;}kernel void valueAtfloat1d (float value[], float index, out float output<>, float maxvalue, float nothing) { if (index>=maxvalue||index<-.1) output = nothing; else output = value[index];}kernel void NanToBoolRightfloat1d (float value[], out float output<>, float sign, float maxvalue) { float nextPlaceToLook=(indexof output).x+sign; float neighbor; valueAtfloat1d(value,nextPlaceToLook,neighbor,maxvalue,0); output = (isinf(value[indexof(output)].x)?1:0) + (isinf(neighbor.x)?1:0);}kernel void NanToRightfloat1d (float value [], out float output<>, float twotoi, float maxvalue) { float nextPlaceToLook=(indexof output).x+twotoi; float neighbor; valueAtfloat1d(value,nextPlaceToLook,neighbor,maxvalue,0); output = round(value[indexof(output)]+neighbor);}kernel void CountToRightfloat1d (float value [], out float output<>, float twotoi, float maxvalue) { float nextPlaceToLook=(indexof output).x+twotoi; float neighbor; valueAtfloat1d(value,nextPlaceToLook,neighbor,maxvalue,0); output = value[indexof(output)]+neighbor;}kernel void GatherGuessfloat1d(float scatterindex[], out float output<>, float value[], float twotologkminusi, float maxvalue, float halfk, float sign) { float neighbor; float nextPlaceToLook=(indexof output).x-sign*halfk; valueAtfloat1d(scatterindex,nextPlaceToLook,neighbor,maxvalue,0); if (neighbor>halfk) { output=halfk+twotologkminusi; }else { float actualValue; valueAtfloat1d(value,nextPlaceToLook,actualValue,maxvalue,0); if (neighbor==halfk&&!isinf(actualValue.x)) { output=halfk; }else { output = halfk-twotologkminusi; } }}kernel void EstablishGuessfloat1d(float scatterindex[], out float output<>, float value[], float twotologkminusi, float maxvalue, float halfk, float sign) { if (scatterindex[indexof(output)]==0) { output=0; } else { GatherGuessfloat1d(scatterindex, output, value, twotologkminusi, maxvalue, halfk, sign); }}kernel void UpdateGuessfloat1d(float scatterindex[], out float output<>, float value[], float twotologkminusi, float maxvalue, float lastguess<>, float sign) { GatherGuessfloat1d(scatterindex, output, value, twotologkminusi, maxvalue, lastguess, sign);}kernel void RelativeGatherfloat1d(out float output<>, float gatherindex[], float value[], float2 sign, float maxvalue) { float nextPlaceToLook=(indexof output).x-sign.x*gatherindex[indexof(output)]; valueAtfloat1d(value,nextPlaceToLook,output,maxvalue,sign.y); }float shiftValues1dfloat(float list_stream <>, float (*output_stream)<>, int WIDTH, int unused, int sign) { float tmp_stream<WIDTH>; float ret_stream<WIDTH>; float guess_stream<WIDTH>; unsigned int i; float maxvalue; unsigned int logN; unsigned int LogNMinusK; float maxshift; maxvalue=(float)WIDTH; logN=(unsigned int)ceil(log((float)WIDTH)/log(2.0f)); debugStreamPrint(list_stream,"Combined..."); NanToBoolRightfloat1d (list_stream,ret_stream,(float)sign,maxvalue); for (i=1;i<logN;++i) { streamSwap(ret_stream,tmp_stream); NanToRightfloat1d(tmp_stream,ret_stream,(float)sign*(1<<i),maxvalue); } debugStreamPrint(ret_stream,"scattering..."); { float item<1>; float index; if (sign==-1) { index = (float)(WIDTH-1); }else { index=0; } kernelReadItem1d(ret_stream,index,item); streamWrite(item,&maxshift); } LogNMinusK=logN-2; i= logN-1;//could make this k! rather than N // where k = num elements pushed (N-logN%2?ret_stream,tmp_stream EstablishGuessfloat1d(ret_stream, guess_stream, list_stream, (float)(1 << LogNMinusK), maxvalue, (float)(1<<i), (float)sign); for (i=1;i<logN;++i) { LogNMinusK=logN-1-i; streamSwap(tmp_stream,guess_stream); UpdateGuessfloat1d (ret_stream,//scatter values guess_stream,//new guess list_stream,//actual values (float)(1<<LogNMinusK), maxvalue, tmp_stream, //old guess (float)sign); } debugStreamPrint(guess_stream,"Gather Value"); if (1) { unsigned int size,width; size = (unsigned int)WIDTH - (unsigned int) maxshift; width = size; if (1) { float proper_output_stream<width>; if (width) { RelativeGatherfloat1d(proper_output_stream, guess_stream, list_stream, float2((float)sign,1.0f/(float)floor(.5)), maxvalue); } streamSwap(*output_stream,proper_output_stream); } debugStreamPrint(*output_stream, "Final Value"); } return maxshift;}kernel void float1dstreamCombine1(float input0[], float modulus, float offset, float oldoutput<>, out float output <>) { float newindex; float whichmod; calculateDividedIndex1d(indexof(output), modulus, newindex); calculateIndexModulus1d(indexof(output), modulus, offset, whichmod); if (whichmod==0||whichmod==modulus) { output=input0[newindex]; }else { output=oldoutput; }}kernel void float1dstreamCombine2f(float input0[], float input1[], float modulus, out float output <>) { float newindex; float whichmod; calculateDividedIndex1d(indexof(output), modulus, newindex); calculateIndexModulus1d(indexof(output), modulus, -0.5f, whichmod); if (whichmod==0||whichmod==modulus) { output=input0[newindex]; }else { output=input1[newindex]; }}kernel void float1dstreamCombine2(float input0[], float input1[], float modulus, float offset, float oldoutput<>, out float output <>) { float newindex; float whichmod; calculateDividedIndex1d(indexof(output), modulus, newindex); calculateIndexModulus1d(indexof(output), modulus, offset, whichmod); if (whichmod==0||whichmod==modulus) { output=input0[newindex]; }else if (whichmod==1) { output=input1[newindex]; }else { output=oldoutput; }}kernel void float1dstreamCombine3f(float input0[], float input1[], float input2[], float modulus, out float output <>) { float newindex; float whichmod; calculateDividedIndex1d(indexof(output), modulus, newindex); calculateIndexModulus1d(indexof(output), modulus, -0.5f, whichmod); if (whichmod==0||whichmod==modulus) { output=input0[newindex]; }else if (whichmod==1) { output=input1[newindex]; }else { output=input2[newindex]; }}kernel void float1dstreamCombine3(float input0[], float input1[], float input2[], float modulus, float offset, float oldoutput<>, out float output <>) { float newindex; float whichmod; calculateDividedIndex1d(indexof(output), modulus, newindex); calculateIndexModulus1d(indexof(output), modulus, offset, whichmod); if (whichmod==0||whichmod==modulus) { output=input0[newindex]; }else if (whichmod==1) { output=input1[newindex]; }else if (whichmod==2) { output=input2[newindex]; }else { output=oldoutput; }}kernel void float1dstreamCombine4f(float input0[], float input1[], float input2[], float input3[], float modulus, out float output <>) { float newindex; float whichmod; calculateDividedIndex1d(indexof(output), modulus, newindex); calculateIndexModulus1d(indexof(output), modulus, -0.5f, whichmod); if (whichmod==0||whichmod==modulus) { output=input0[newindex]; }else if (whichmod==1) { output=input1[newindex]; }else if (whichmod==2) { output=input2[newindex]; }else { output=input3[newindex]; }}kernel void float1dstreamCombine4(float input0[], float input1[], float input2[], float input3[], float modulus, float offset, float oldoutput<>, out float output <>) { float newindex; float whichmod; calculateDividedIndex1d(indexof(output), modulus, newindex); calculateIndexModulus1d(indexof(output), modulus, offset, whichmod); if (whichmod==0||whichmod==modulus) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -