📄 transform.c
字号:
u_p[ 6][0] = -d10 +d13;
/*24*/ u_p[ 8][0] =d3;
u_p[10][0] = -d8 -d9 +d11 -d13;
/*28*/ u_p[12][0] = +d7;
u_p[14][0] = +d15;
/* the other 32 are stored for use with the next granule
*/
u_p = (float (*)[16]) &u[ch][!div][0][start];
/*0*/ u_p[16][0] = d0;
u_p[14][0] = -(+d8 );
/*4*/ u_p[12][0] = -(+d4 );
u_p[10][0] = -(-d8 +d12 );
/*8*/ u_p[ 8][0] = -(+d2 );
u_p[ 6][0] = -(+d8 +d10 -d12 );
/*12*/ u_p[ 4][0] = -(-d4 +d6 );
u_p[ 2][0] = -d14;
u_p[ 0][0] = -d1;
}
{
float c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15;
/* memory separation, second part
*/
/* 2
*/
c0=d16 + d24; c8= (d16 - d24) * b2;
c1=d17 + d25; c9= (d17 - d25) * b6;
c2=d18 + d26; c10= (d18 - d26) * b14;
c3=d19 + d27; c11= (d19 - d27) * b10;
c4=d20 + d28; c12= (d20 - d28) * b30;
c5=d21 + d29; c13= (d21 - d29) * b26;
c6=d22 + d30; c14= (d22 - d30) * b18;
c7=d23 + d31; c15= (d23 - d31) * b22;
/* 3
*/
d16= c0+ c4; d20= (c0 - c4) * b4;
d17= c1+ c5; d21= (c1 - c5) * b12;
d18= c2+ c6; d22= (c2 - c6) * b28;
d19= c3+ c7; d23= (c3 - c7) * b20;
d24= c8+ c12; d28= (c8 - c12) * b4;
d25= c9+ c13; d29= (c9 - c13) * b12;
d26= c10+ c14; d30= (c10 - c14) * b28;
d27= c11+ c15; d31= (c11 - c15) * b20;
/* 4
*/
{
float rb8 = b8;
float rb24 = b24;
/**/ c0= d16+ d18; c2= (d16 - d18) * rb8;
c1= d17+ d19; c3= (d17 - d19) * rb24;
/**/ c4= d20+ d22; c6= (d20 - d22) * rb8;
c5= d21+ d23; c7= (d21 - d23) * rb24;
/**/ c8= d24+ d26; c10= (d24 - d26) * rb8;
c9= d25+ d27; c11= (d25 - d27) * rb24;
/**/ c12= d28+ d30; c14= (d28 - d30) * rb8;
c13= d29+ d31; c15= (d29 - d31) * rb24;
}
/* 5
*/
{
float rb16 = b16;
d16= c0+ c1; d17= (c0 - c1) * rb16;
d18= c2+ c3; d19= (c2 - c3) * rb16;
d20= c4+ c5; d21= (c4 - c5) * rb16;
d20+=d16; d21+=d17;
d22= c6+ c7; d23= (c6 - c7) * rb16;
d22+=d16; d22+=d18;
d23+=d16; d23+=d17; d23+=d19;
d24= c8+ c9; d25= (c8 - c9) * rb16;
d26= c10+ c11; d27= (c10 - c11) * rb16;
d26+=d24;
d27+=d24; d27+=d25;
d28= c12+ c13; d29= (c12 - c13) * rb16;
d28-=d20; d29+=d28; d29-=d21;
d30= c14+ c15; d31= (c14 - c15) * rb16;
d30-=d22;
d31-=d23;
}
/* step 6: final resolving & reordering
* the other 32 are stored for use with the next granule
*/
u_p = (float (*)[16]) &u[ch][!div][0][start];
u_p[ 1][0] = -(+d30 );
u_p[ 3][0] = -(+d22 -d26 );
u_p[ 5][0] = -(-d18 -d20 +d26 );
u_p[ 7][0] = -(+d18 -d28 );
u_p[ 9][0] = -(+d28 );
u_p[11][0] = -(+d20 -d24 );
u_p[13][0] = -(-d16 +d24 );
u_p[15][0] = -(+d16 );
/* the other 32 are stored for use with the next granule
*/
u_p = (float (*)[16]) &u[ch][div][0][start];
u_p[15][0] = +d31;
u_p[13][0] = +d23 -d27;
u_p[11][0] = -d19 -d20 -d21 +d27;
u_p[ 9][0] = +d19 -d29;
u_p[ 7][0] = -d18 +d29;
u_p[ 5][0] = +d18 +d20 +d21 -d25 -d26;
u_p[ 3][0] = -d17 -d22 +d25 +d26;
u_p[ 1][0] = +d17 -d30;
}
}
#if defined(PENTIUM_RDTSC)
__asm__(".byte 0x0f,0x31" : "=a" (cnt3), "=d" (cnt4));
#endif
/* we're doing dewindowing and calculating final samples now
*/
#if defined(ARCH_i586)
/* x86 assembler optimisations. These optimisations are tuned
specifically for Intel Pentiums. */
asm("movl $15,%%eax\n\t"\
"1:\n\t"\
"flds (%0)\n\t"\
"fmuls (%1)\n\t"\
"flds 4(%0)\n\t"\
"fmuls 4(%1)\n\t"\
"flds 8(%0)\n\t"\
"fmuls 8(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 12(%0)\n\t"\
"fmuls 12(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 16(%0)\n\t"\
"fmuls 16(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 20(%0)\n\t"\
"fmuls 20(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 24(%0)\n\t"\
"fmuls 24(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 28(%0)\n\t"\
"fmuls 28(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 32(%0)\n\t"\
"fmuls 32(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 36(%0)\n\t"\
"fmuls 36(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 40(%0)\n\t"\
"fmuls 40(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 44(%0)\n\t"\
"fmuls 44(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 48(%0)\n\t"\
"fmuls 48(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 52(%0)\n\t"\
"fmuls 52(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 56(%0)\n\t"\
"fmuls 56(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 60(%0)\n\t"\
"fmuls 60(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"addl $64,%0\n\t"\
"addl $128,%1\n\t"\
"subl $4,%%esp\n\t"\
"faddp\n\t"\
"fistpl (%%esp)\n\t"\
"popl %%ecx\n\t"\
"cmpl $32767,%%ecx\n\t"\
"jle 2f\n\t"\
"movw $32767,%%cx\n\t"\
"jmp 3f\n\t"\
"2: cmpl $-32768,%%ecx\n\t"\
"jge 3f\n\t"\
"movw $-32768,%%cx\n\t"\
"3: movw %%cx,(%2)\n\t"\
"addl %3,%2\n\t"\
"decl %%eax\n\t"\
"jns 1b\n\t"\
"testb $1,%4\n\t"\
"je 4f\n\t"
"flds (%0)\n\t"\
"fmuls (%1)\n\t"\
"flds 8(%0)\n\t"\
"fmuls 8(%1)\n\t"\
"flds 16(%0)\n\t"\
"fmuls 16(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 24(%0)\n\t"\
"fmuls 24(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 32(%0)\n\t"\
"fmuls 32(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 40(%0)\n\t"\
"fmuls 40(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 48(%0)\n\t"\
"fmuls 48(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 56(%0)\n\t"\
"fmuls 56(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"subl $4,%%esp\n\t"\
"subl $64,%0\n\t"\
"subl $192,%1\n\t"\
"faddp\n\t"\
"fistpl (%%esp)\n\t"\
"popl %%ecx\n\t"\
"cmpl $32767,%%ecx\n\t"\
"jle 2f\n\t"\
"movw $32767,%%cx\n\t"\
"jmp 3f\n\t"\
"2: cmpl $-32768,%%ecx\n\t"\
"jge 3f\n\t"\
"movw $-32768,%%cx\n\t"\
"3: movw %%cx,(%2)\n\t"\
"movl %5,%%ecx\n\t"\
"sall $3,%%ecx\n\t"\
"addl %%ecx,%1\n\t"\
"addl %3,%2\n\t"\
"movl $14,%%eax\n\t"\
"1:flds 4(%0)\n\t"\
"fmuls 56(%1)\n\t"\
"flds (%0)\n\t"\
"fmuls 60(%1)\n\t"\
"flds 12(%0)\n\t"\
"fmuls 48(%1)\n\t"\
"fxch %%st(2)\n\t"\
"fsubp\n\t"\
"flds 8(%0)\n\t"\
"fmuls 52(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 20(%0)\n\t"\
"fmuls 40(%1)\n\t"\
"fxch %%st(2)\n\t"\
"fsubrp\n\t"\
"flds 16(%0)\n\t"\
"fmuls 44(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 28(%0)\n\t"\
"fmuls 32(%1)\n\t"\
"fxch %%st(2)\n\t"\
"fsubrp\n\t"\
"flds 24(%0)\n\t"\
"fmuls 36(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 36(%0)\n\t"\
"fmuls 24(%1)\n\t"\
"fxch %%st(2)\n\t"\
"fsubrp\n\t"\
"flds 32(%0)\n\t"\
"fmuls 28(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 44(%0)\n\t"\
"fmuls 16(%1)\n\t"\
"fxch %%st(2)\n\t"\
"fsubrp\n\t"\
"flds 40(%0)\n\t"\
"fmuls 20(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 52(%0)\n\t"\
"fmuls 8(%1)\n\t"\
"fxch %%st(2)\n\t"\
"fsubrp\n\t"\
"flds 48(%0)\n\t"\
"fmuls 12(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 60(%0)\n\t"\
"fmuls (%1)\n\t"\
"fxch %%st(2)\n\t"\
"fsubrp\n\t"\
"flds 56(%0)\n\t"\
"fmuls 4(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"subl $64,%0\n\t"\
"subl $128,%1\n\t"\
"subl $4,%%esp\n\t"\
"fsubp\n\t"\
"fistpl (%%esp)\n\t"\
"popl %%ecx\n\t"\
"cmpl $32767,%%ecx\n\t"\
"jle 2f\n\t"\
"movw $32767,%%cx\n\t"\
"jmp 3f\n\t"\
"2: cmpl $-32768,%%ecx\n\t"\
"jge 3f\n\t"\
"movw $-32768,%%cx\n\t"\
"3: movw %%cx,(%2)\n\t"\
"addl %3,%2\n\t"\
"decl %%eax\n\t"\
"jns 1b\n\t"\
"jmp 5f\n\t"\
"4:flds 4(%0)\n\t"\
"fmuls 4(%1)\n\t"\
"flds 12(%0)\n\t"\
"fmuls 12(%1)\n\t"\
"flds 20(%0)\n\t"\
"fmuls 20(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 28(%0)\n\t"\
"fmuls 28(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 36(%0)\n\t"\
"fmuls 36(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 44(%0)\n\t"\
"fmuls 44(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 52(%0)\n\t"\
"fmuls 52(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 60(%0)\n\t"\
"fmuls 60(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"subl $4,%%esp\n\t"\
"subl $64,%0\n\t"\
"subl $192,%1\n\t"\
"faddp\n\t"\
"fistpl (%%esp)\n\t"\
"popl %%ecx\n\t"\
"cmpl $32767,%%ecx\n\t"\
"jle 2f\n\t"\
"movw $32767,%%cx\n\t"\
"jmp 3f\n\t"\
"2: cmpl $-32768,%%ecx\n\t"\
"jge 3f\n\t"\
"movw $-32768,%%cx\n\t"\
"3: movw %%cx,(%2)\n\t"\
"movl %5,%%ecx\n\t"\
"sall $3,%%ecx\n\t"\
"addl %%ecx,%1\n\t"\
"addl %3,%2\n\t"\
"movl $14,%%eax\n\t"\
"1:flds (%0)\n\t"\
"fmuls 60(%1)\n\t"\
"flds 4(%0)\n\t"\
"fmuls 56(%1)\n\t"\
"flds 8(%0)\n\t"\
"fmuls 52(%1)\n\t"\
"fxch %%st(2)\n\t"\
"fsubp\n\t"\
"flds 12(%0)\n\t"\
"fmuls 48(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 16(%0)\n\t"\
"fmuls 44(%1)\n\t"\
"fxch %%st(2)\n\t"\
"fsubrp\n\t"\
"flds 20(%0)\n\t"\
"fmuls 40(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 24(%0)\n\t"\
"fmuls 36(%1)\n\t"\
"fxch %%st(2)\n\t"\
"fsubrp\n\t"\
"flds 28(%0)\n\t"\
"fmuls 32(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 32(%0)\n\t"\
"fmuls 28(%1)\n\t"\
"fxch %%st(2)\n\t"\
"fsubrp\n\t"\
"flds 36(%0)\n\t"\
"fmuls 24(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 40(%0)\n\t"\
"fmuls 20(%1)\n\t"\
"fxch %%st(2)\n\t"\
"fsubrp\n\t"\
"flds 44(%0)\n\t"\
"fmuls 16(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 48(%0)\n\t"\
"fmuls 12(%1)\n\t"\
"fxch %%st(2)\n\t"\
"fsubrp\n\t"\
"flds 52(%0)\n\t"\
"fmuls 8(%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"flds 56(%0)\n\t"\
"fmuls 4(%1)\n\t"\
"fxch %%st(2)\n\t"\
"fsubrp\n\t"\
"flds 60(%0)\n\t"\
"fmuls (%1)\n\t"\
"fxch %%st(2)\n\t"\
"faddp\n\t"\
"subl $64,%0\n\t"\
"subl $128,%1\n\t"\
"subl $4,%%esp\n\t"\
"fsubp\n\t"\
"fistpl (%%esp)\n\t"\
"popl %%ecx\n\t"\
"cmpl $32767,%%ecx\n\t"\
"jle 2f\n\t"\
"movw $32767,%%cx\n\t"\
"jmp 3f\n\t"\
"2: cmpl $-32768,%%ecx\n\t"\
"jge 3f\n\t"\
"movw $-32768,%%cx\n\t"\
"3: movw %%cx,(%2)\n\t"\
"addl %3,%2\n\t"\
"decl %%eax\n\t"\
"jns 1b\n\t"\
"5:"\
: : "b" (u[ch][div]), "d" (t_dewindow[0] + 16 - start), "S" (&sample_buffer[f>>(2-nch)][nch==2?0:(f&1?16:0)][ch]), "m" (sizeof(short) * nch), "m" (div), "m" (start)\
: "eax", "ecx", "memory");
#else
{
short *samples = (&sample_buffer[f>>(2-nch)][nch==2?0:(f&1?16:0)][ch]);
int out, j;
#define PUT_SAMPLE(out) \
if (out > 32767) \
*samples = 32767; \
else \
if (out < -32768) \
*samples = -32768; \
else \
*samples = out; \
\
samples += nch;
#if defined(SUPERHACK)
/* These is a simple implementation which should be nicer to the
cache; computation of samples are done in one pass rather than
two. However, for various reasons which I do not have time to
investigate, it runs quite a lot slower than two pass
computations. If you have time, you are welcome to look into
it. */
{
float (*u_ptr)[16] = u[ch][div];
const float *dewindow2 = t_dewindow[0] + start;
{
float outf1, outf2, outf3, outf4;
outf1 = u_ptr[0][ 0] * dewindow[0x0];
outf2 = u_ptr[0][ 1] * dewindow[0x1];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -