📄 vout.br
字号:
}else if (whichmod==5) {
output=input5[newindex];
}else if (whichmod==6) {
output=input6[newindex];
}else {
output=input7[newindex];
}
}
*/
void combineStreamsHelperBRT_TYPE (BRT_TYPE (**streams) <>,
unsigned int num,
BRT_TYPE output<>,
float modulus,
float offset,
float length,
float lengthmodmodulus) {
switch (num) {
case 1:
BRT_TYPEstreamCombine1(*streams[0],
modulus,
offset,
length,
lengthmodmodulus,
output,
output);
break;
case 2:
BRT_TYPEstreamCombine2(*streams[0],
*streams[1],
modulus,
offset,
length,
lengthmodmodulus,
output,
output);
break;
case 3:
BRT_TYPEstreamCombine3(*streams[0],
*streams[1],
*streams[2],
modulus,
offset,
length,
lengthmodmodulus,
output,
output);
break;
case 4:
default:
BRT_TYPEstreamCombine4(*streams[0],
*streams[1],
*streams[2],
*streams[3],
modulus,
offset,
length,
lengthmodmodulus,
output,
output);
if (num>4) {
combineStreamsHelperBRT_TYPE(streams+4,
num-4,
output,
modulus,
4+offset,
length,
lengthmodmodulus);
}
/*
case 5:
BRT_TYPEstreamCombine5(streams[0],
*streams[1],
*streams[2],
*streams[3],
*streams[4],
modulus,
offset,
length,
lengthmodmodulus,
output,
output);
break;
case 6:
BRT_TYPEstreamCombine6(*streams[0],
*streams[1],
*streams[2],
*streams[3],
*streams[4],
*streams[5],
modulus,
offset,
length,
lengthmodmodulus,
output,
output);
break;
default:
BRT_TYPEstreamCombine7(*streams[0],
*streams[1],
*streams[2],
*streams[3],
*streams[4],
*streams[5],
*streams[6],
modulus,
offset,
length,
lengthmodmodulus,
output,
output);
if (num>7) {
combineStreamsHelperBRT_TYPE(streams+7,
num-7,
output,
modulus,
7+offset,
length,
lengthmodmodulus);
}
*/
}
}
void combineStreamsBRT_TYPE (BRT_TYPE (**streams)<>,
unsigned int num,
unsigned int width,
unsigned int length,
BRT_TYPE (*output)<>) {
unsigned int lengthmodmodulus=num!=0?(length%num):0;
BRT_TYPE tmp<(num*width),length>;
streamSwap(tmp,*output);
switch (num) {
case 0:
return;
case 1:
streamSwap(*output,*streams[0]);
break;
case 2:
BRT_TYPEstreamCombine2f(*streams[0],
*streams[1],
(float)num,
(float)length,
(float)lengthmodmodulus,
*output);
break;
case 3:
BRT_TYPEstreamCombine3f(*streams[0],
*streams[1],
*streams[2],
(float)num,
(float)length,
(float)lengthmodmodulus,
*output);
break;
case 4:
BRT_TYPEstreamCombine4f(*streams[0],
*streams[1],
*streams[2],
*streams[3],
(float)num,
(float)length,
(float)lengthmodmodulus,
*output);
break;
case 5:
default:
BRT_TYPEstreamCombine5f(*streams[0],
*streams[1],
*streams[2],
*streams[3],
*streams[4],
(float)num,
(float)length,
(float)lengthmodmodulus,
*output);
if (num>MAX_VOUT_STREAMS) {
fprintf (stderr,
"vout stream not reconstituted properly. %s%d streams",
"Too little precision for more than ",MAX_VOUT_STREAMS);
}
if (num>5) {
combineStreamsHelperBRT_TYPE(streams+5,
num-5,
*output,
(float)num,
(float)(5-.5),
(float)length,
(float)lengthmodmodulus);
}
/*
break;
case 6:
BRT_TYPEstreamCombine6f(*streams[0],
*streams[1],
*streams[2],
*streams[3],
*streams[4],
*streams[5],
num,
length,
lengthmodmodulus,
output);
break;
case 7:
BRT_TYPEstreamCombine7f(*streams[0],
*streams[1],
*streams[2],
*streams[3],
*streams[4],
*streams[5],
*streams[6],
num,
length,
lengthmodmodulus,
output);
break;
default:
if (num>MAX_VOUT_STREAMS) {
fprintf (stderr,
"vout stream not reconstituted properly. %s%d streams",
"Too little precision for more than ",MAX_VOUT_STREAMS);
}
BRT_TYPEstreamCombine8f(*streams[0],
*streams[1],
*streams[2],
*streams[3],
*streams[4],
*streams[5],
*streams[6],
*streams[7],
num,
length,
lengthmodmodulus,
output);
if (num>8) {
combineStreamsHelperBRT_TYPE(streams+8,
num-8,
output,
num,
8,
length,
lengthmodmodulus);
}
break;
*/
}
}
kernel void combineBRT_TYPE (BRT_TYPE input1[][],
BRT_TYPE input2[][],
out BRT_TYPE output<>,
float2 startsecond,
float2 endsecond,
float2 maxinput2value,
float inf){
// getIndexAt(float4(maxinputvalue.x,maxinputvalue.y-1,0,0),
// -shift1,
// maxinput1value,
// startsecond);//moved outside kernel
if ((indexof(output)).y>startsecond.y
||((indexof(output)).y==startsecond.y
&&(indexof(output)).x>=startsecond.x)) {
float2 secondindex;
getIndexAt(float4((indexof(output)).x,
(indexof(output)).y-startsecond.y,
0,
0),
-startsecond.x,
maxinput2value,
secondindex);
//getIndexAt(float4(maxinputvalue.x,maxinputvalue.y-1,0,0),
// -shift2,
// maxinput1value,
// endsecond); // moved outside kernel
if (secondindex.y>endsecond.y
||(secondindex.y==endsecond.y
&&secondindex.x>=endsecond.x)) {
output=inf;
}else {
output = input2[secondindex];
}
}else {
output=input1[indexof(output)];
}
}
unsigned int voutCombineBRT_TYPE(BRT_TYPE outleft_stream<>,
float maxshiftleft,
unsigned int LEFTWIDTH,
BRT_TYPE outright_stream<>,
float maxshiftright,
unsigned int RIGHTWIDTH,
unsigned int LENGTH,
BRT_TYPE (*output)<>){
unsigned int totalsize;
float2 maxleftvalue,maxrightvalue;
maxleftvalue.x=(float)LENGTH;maxleftvalue.y=(float)LEFTWIDTH;
maxrightvalue.x=(float)LENGTH;maxrightvalue.y=(float)RIGHTWIDTH;
totalsize = (LEFTWIDTH*LENGTH
+RIGHTWIDTH*LENGTH
-(unsigned int)maxshiftleft-(unsigned int)maxshiftright);
if (1) {
unsigned int length = LENGTH;
unsigned int width = totalsize/length+((totalsize%length!=0)?1:0);
BRT_TYPE output_stream<width,length>;
float2 startsecond,endsecond;
cpuGetIndexAt(float2(maxleftvalue.x,maxleftvalue.y-1),
-maxshiftleft,
maxleftvalue,
&startsecond);
cpuGetIndexAt(float2(maxrightvalue.x,maxrightvalue.y-1),
-maxshiftright,
maxrightvalue,
&endsecond);
combineBRT_TYPE(outleft_stream,
outright_stream,
output_stream,
startsecond,
endsecond,
maxrightvalue,
1.0f/absolutezero);
streamSwap (*output,output_stream);
}
return totalsize;
}
unsigned int totalCombineVoutBRT_TYPE (BRT_TYPE list_stream<>,
unsigned int WIDTH,
BRT_TYPE list2_stream<>,
unsigned int WIDTH2,
unsigned int LENGTH,
BRT_TYPE (*output)<>){
float outleft_stream<WIDTH,LENGTH>;
float outright_stream<WIDTH2,LENGTH>;
float maxshiftleft =shiftValuesBRT_TYPE(list_stream,
&outleft_stream,
WIDTH,
LENGTH,
-1);
float maxshiftright=shiftValuesBRT_TYPE(list2_stream,
&outright_stream,
WIDTH2,
LENGTH,
-1);
return voutCombineBRT_TYPE (outleft_stream,
maxshiftleft,
WIDTH,
outright_stream,
maxshiftright,
WIDTH2,
LENGTH,
output);
}
#undef VECTOR_TEMPLATIZED_FUNCTIONS
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -