📄 vout1d.br
字号:
#include <stdio.h>
#include <stdlib.h>
extern unsigned int debug_vout;
extern float absolutezero;
#define debugStreamPrint(stream,title) if (debug_vout) { printf ("%s\n",title); streamPrint(stream,1); printf ("\n\n"); }
kernel void kernelReadItem1d (float items[], float index, out float item<>) {
item = items[index];
}
void cpuGetIndexAt1d(float inputindex,
float shiftRight,
float maxvalue,
float *index) {
*index=inputindex+shiftRight;
*index=*index > maxvalue-1?maxvalue-1:*index<0?0:*index;
}
const unsigned int MAX_VOUT_STREAMS=31;//if you change this, change one 5 lines
kernel void calculateDividedIndex1d(float4 index,
float modulus,
out float newindex<>){
float epsilon=1.0f/32.0f;//this is needed because the division may result in
// loss of accuracy. We know that for a 2048 texture the mantissa holds
// 1/32 precision
newindex=floor(index.x/modulus+epsilon);
}
kernel void calculateIndexModulus1d (float4 index,
float modulus,
float offset,
out float which <>) {
which= floor(fmod(index.x,modulus)-offset);
}
#define VECTOR_TEMPLATIZED_FUNCTIONS
reduce void valueProducedBRT_TYPE1d (BRT_TYPE input <>,
reduce BRT_TYPE output<>) {
output=isinf(input.x)?output:input;
}
kernel void isFiniteKernelBRT_TYPE1d(BRT_TYPE inp<>, out float outp<>) {
outp=!isinf(inp.x);
}
int finiteValueProduced1dBRT_TYPE (BRT_TYPE input<>) {
BRT_TYPE output<1>;
float finiteout<1>;
BRT_TYPE rettype;
float ret;
debugStreamPrint (input,"Finite Values in...");
valueProducedBRT_TYPE1d(input,output);
streamWrite(output,&rettype);
isFiniteKernelBRT_TYPE1d(output,finiteout);
streamWrite(finiteout,&ret);
return (int)ret;
}
kernel void valueAtBRT_TYPE1d (BRT_TYPE value[],
float index,
out BRT_TYPE output<>,
float maxvalue,
float nothing) {
if (index>=maxvalue||index<-.1)
output = nothing;
else
output = value[index];
}
kernel void NanToBoolRightBRT_TYPE1d (BRT_TYPE value[],
out float output<>,
float sign,
float maxvalue) {
float nextPlaceToLook=(indexof output).x+sign;
BRT_TYPE neighbor;
valueAtBRT_TYPE1d(value,nextPlaceToLook,neighbor,maxvalue,0);
output = (isinf(value[indexof(output)].x)?1:0)
+ (isinf(neighbor.x)?1:0);
}
kernel void NanToRightBRT_TYPE1d (float value [],
out float output<>,
float twotoi,
float maxvalue) {
float nextPlaceToLook=(indexof output).x+twotoi;
float neighbor;
valueAtfloat1d(value,nextPlaceToLook,neighbor,maxvalue,0);
output = round(value[indexof(output)]+neighbor);
}
kernel void CountToRightBRT_TYPE1d (BRT_TYPE value [],
out BRT_TYPE output<>,
float twotoi,
float maxvalue) {
float nextPlaceToLook=(indexof output).x+twotoi;
BRT_TYPE neighbor;
valueAtBRT_TYPE1d(value,nextPlaceToLook,neighbor,maxvalue,0);
output = value[indexof(output)]+neighbor;
}
kernel void GatherGuessBRT_TYPE1d(float scatterindex[],
out float output<>,
BRT_TYPE value[],
float twotologkminusi,
float maxvalue,
float halfk,
float sign) {
float neighbor;
float nextPlaceToLook=(indexof output).x-sign*halfk;
valueAtfloat1d(scatterindex,nextPlaceToLook,neighbor,maxvalue,0);
if (neighbor>halfk) {
output=halfk+twotologkminusi;
}else {
BRT_TYPE actualValue;
valueAtBRT_TYPE1d(value,nextPlaceToLook,actualValue,maxvalue,0);
if (neighbor==halfk&&!isinf(actualValue.x)) {
output=halfk;
}else {
output = halfk-twotologkminusi;
}
}
}
kernel void EstablishGuessBRT_TYPE1d(float scatterindex[],
out float output<>,
BRT_TYPE value[],
float twotologkminusi,
float maxvalue,
float halfk,
float sign) {
if (scatterindex[indexof(output)]==0) {
output=0;
} else {
GatherGuessBRT_TYPE1d(scatterindex,
output,
value,
twotologkminusi,
maxvalue,
halfk,
sign);
}
}
kernel void UpdateGuessBRT_TYPE1d(float scatterindex[],
out float output<>,
BRT_TYPE value[],
float twotologkminusi,
float maxvalue,
float lastguess<>,
float sign) {
GatherGuessBRT_TYPE1d(scatterindex,
output,
value,
twotologkminusi,
maxvalue,
lastguess,
sign);
}
kernel void RelativeGatherBRT_TYPE1d(out BRT_TYPE output<>,
float gatherindex[],
BRT_TYPE value[],
float2 sign,
float maxvalue) {
float nextPlaceToLook=(indexof output).x-sign.x*gatherindex[indexof(output)];
valueAtBRT_TYPE1d(value,nextPlaceToLook,output,maxvalue,sign.y);
}
float shiftValues1dBRT_TYPE(BRT_TYPE list_stream <>,
BRT_TYPE (*output_stream)<>,
int WIDTH,
int unused,
int sign) {
float tmp_stream<WIDTH>;
float ret_stream<WIDTH>;
float guess_stream<WIDTH>;
unsigned int i;
float maxvalue;
unsigned int logN;
unsigned int LogNMinusK;
float maxshift;
maxvalue=(float)WIDTH;
logN=(unsigned int)ceil(log((float)WIDTH)/log(2.0f));
debugStreamPrint(list_stream,"Combined...");
NanToBoolRightBRT_TYPE1d (list_stream,ret_stream,(float)sign,maxvalue);
for (i=1;i<logN;++i) {
streamSwap(ret_stream,tmp_stream);
NanToRightBRT_TYPE1d(tmp_stream,ret_stream,(float)sign*(1<<i),maxvalue);
}
debugStreamPrint(ret_stream,"scattering...");
{
float item<1>;
float index;
if (sign==-1) {
index = (float)(WIDTH-1);
}else {
index=0;
}
kernelReadItem1d(ret_stream,index,item);
streamWrite(item,&maxshift);
}
LogNMinusK=logN-2;
i= logN-1;//could make this k! rather than N
// where k = num elements pushed (N-logN%2?ret_stream,tmp_stream
EstablishGuessBRT_TYPE1d(ret_stream,
guess_stream,
list_stream,
(float)(1 << LogNMinusK),
maxvalue,
(float)(1<<i),
(float)sign);
for (i=1;i<logN;++i) {
LogNMinusK=logN-1-i;
streamSwap(tmp_stream,guess_stream);
UpdateGuessBRT_TYPE1d (ret_stream,//scatter values
guess_stream,//new guess
list_stream,//actual values
(float)(1<<LogNMinusK),
maxvalue,
tmp_stream, //old guess
(float)sign);
}
debugStreamPrint(guess_stream,"Gather Value");
if (1) {
unsigned int size,width;
size = (unsigned int)WIDTH
- (unsigned int) maxshift;
width = size;
if (1) {
BRT_TYPE proper_output_stream<width>;
if (width) {
RelativeGatherBRT_TYPE1d(proper_output_stream,
guess_stream,
list_stream,
float2((float)sign,1.0f/absolutezero),
maxvalue);
}
streamSwap(*output_stream,proper_output_stream);
}
debugStreamPrint(*output_stream, "Final Value");
}
return maxshift;
}
kernel void BRT_TYPE1dstreamCombine1(BRT_TYPE input0[],
float modulus,
float offset,
BRT_TYPE oldoutput<>,
out BRT_TYPE output <>) {
float newindex;
float whichmod;
calculateDividedIndex1d(indexof(output), modulus, newindex);
calculateIndexModulus1d(indexof(output),
modulus,
offset,
whichmod);
if (whichmod==0||whichmod==modulus) {
output=input0[newindex];
}else {
output=oldoutput;
}
}
kernel void BRT_TYPE1dstreamCombine2f(BRT_TYPE input0[],
BRT_TYPE input1[],
float modulus,
out BRT_TYPE output <>) {
float newindex;
float whichmod;
calculateDividedIndex1d(indexof(output), modulus, newindex);
calculateIndexModulus1d(indexof(output),
modulus,
-0.5f,
whichmod);
if (whichmod==0||whichmod==modulus) {
output=input0[newindex];
}else {
output=input1[newindex];
}
}
kernel void BRT_TYPE1dstreamCombine2(BRT_TYPE input0[],
BRT_TYPE input1[],
float modulus,
float offset,
BRT_TYPE oldoutput<>,
out BRT_TYPE output <>) {
float newindex;
float whichmod;
calculateDividedIndex1d(indexof(output), modulus, newindex);
calculateIndexModulus1d(indexof(output),
modulus,
offset,
whichmod);
if (whichmod==0||whichmod==modulus) {
output=input0[newindex];
}else if (whichmod==1) {
output=input1[newindex];
}else {
output=oldoutput;
}
}
kernel void BRT_TYPE1dstreamCombine3f(BRT_TYPE input0[],
BRT_TYPE input1[],
BRT_TYPE input2[],
float modulus,
out BRT_TYPE output <>) {
float newindex;
float whichmod;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -