📄 transform.c
字号:
outf3 = u_ptr[0][ 2] * dewindow[0x2];
outf4 = u_ptr[0][ 3] * dewindow[0x3];
outf1 += u_ptr[0][ 4] * dewindow[0x4];
outf2 += u_ptr[0][ 5] * dewindow[0x5];
outf3 += u_ptr[0][ 6] * dewindow[0x6];
outf4 += u_ptr[0][ 7] * dewindow[0x7];
outf1 += u_ptr[0][ 8] * dewindow[0x8];
outf2 += u_ptr[0][ 9] * dewindow[0x9];
outf3 += u_ptr[0][10] * dewindow[0xa];
outf4 += u_ptr[0][11] * dewindow[0xb];
outf1 += u_ptr[0][12] * dewindow[0xc];
outf2 += u_ptr[0][13] * dewindow[0xd];
outf3 += u_ptr[0][14] * dewindow[0xe];
outf4 += u_ptr[0][15] * dewindow[0xf];
out = outf1 + outf2 + outf3 + outf4;
dewindow += 32;
dewindow2 += 32;
u_ptr++;
if (out > 32767)
samples[0] = 32767;
else
if (out < -32768)
samples[0] = -32768;
else
samples[0] = out;
}
if (div & 0x1) {
for (j = 1; j < 16; ++j) {
float outf1, outf2, outf3, outf4;
outf1 = u_ptr[0][ 0] * dewindow[0x0];
outf3 = u_ptr[0][ 0] * dewindow2[0xf];
outf2 = u_ptr[0][ 1] * dewindow[0x1];
outf4 = u_ptr[0][ 1] * dewindow2[0xe];
outf1 += u_ptr[0][ 2] * dewindow[0x2];
outf3 += u_ptr[0][ 2] * dewindow2[0xd];
outf2 += u_ptr[0][ 3] * dewindow[0x3];
outf4 += u_ptr[0][ 3] * dewindow2[0xc];
outf1 += u_ptr[0][ 4] * dewindow[0x4];
outf3 += u_ptr[0][ 4] * dewindow2[0xb];
outf2 += u_ptr[0][ 5] * dewindow[0x5];
outf4 += u_ptr[0][ 5] * dewindow2[0xa];
outf1 += u_ptr[0][ 6] * dewindow[0x6];
outf3 += u_ptr[0][ 6] * dewindow2[0x9];
outf2 += u_ptr[0][ 7] * dewindow[0x7];
outf4 += u_ptr[0][ 7] * dewindow2[0x8];
outf1 += u_ptr[0][ 8] * dewindow[0x8];
outf3 += u_ptr[0][ 8] * dewindow2[0x7];
outf2 += u_ptr[0][ 9] * dewindow[0x9];
outf4 += u_ptr[0][ 9] * dewindow2[0x6];
outf1 += u_ptr[0][10] * dewindow[0xa];
outf3 += u_ptr[0][10] * dewindow2[0x5];
outf2 += u_ptr[0][11] * dewindow[0xb];
outf4 += u_ptr[0][11] * dewindow2[0x4];
outf1 += u_ptr[0][12] * dewindow[0xc];
outf3 += u_ptr[0][12] * dewindow2[0x3];
outf2 += u_ptr[0][13] * dewindow[0xd];
outf4 += u_ptr[0][13] * dewindow2[0x2];
outf1 += u_ptr[0][14] * dewindow[0xe];
outf3 += u_ptr[0][14] * dewindow2[0x1];
outf2 += u_ptr[0][15] * dewindow[0xf];
outf4 += u_ptr[0][15] * dewindow2[0x0];
dewindow += 32;
dewindow2 += 32;
u_ptr++;
out = outf1 + outf2;
if (out > 32767)
samples[j * 2] = 32767;
else
if (out < -32768)
samples[j * 2] = -32768;
else
samples[j * 2] = out;
out = outf4 - outf3;
if (out > 32767)
samples[64 - (j * 2)] = 32767;
else
if (out < -32768)
samples[64 - (j * 2)] = -32768;
else
samples[64 - (j * 2)] = out;
}
{
float outf2, outf4;
outf2 = u_ptr[0][ 0] * dewindow[0x0];
outf4 = u_ptr[0][ 2] * dewindow[0x2];
outf2 += u_ptr[0][ 4] * dewindow[0x4];
outf4 += u_ptr[0][ 6] * dewindow[0x6];
outf2 += u_ptr[0][ 8] * dewindow[0x8];
outf4 += u_ptr[0][10] * dewindow[0xa];
outf2 += u_ptr[0][12] * dewindow[0xc];
outf4 += u_ptr[0][14] * dewindow[0xe];
out = outf2 + outf4;
if (out > 32767)
samples[16 * 2] = 32767;
else
if (out < -32768)
samples[16 * 2] = -32768;
else
samples[16 * 2] = out;
}
} else {
for (j = 1; j < 16; ++j) {
float outf1, outf2, outf3, outf4;
outf1 = u_ptr[0][ 0] * dewindow[0x0];
outf3 = u_ptr[0][ 0] * dewindow2[0xf];
outf2 = u_ptr[0][ 1] * dewindow[0x1];
outf4 = u_ptr[0][ 1] * dewindow2[0xe];
outf1 += u_ptr[0][ 2] * dewindow[0x2];
outf3 += u_ptr[0][ 2] * dewindow2[0xd];
outf2 += u_ptr[0][ 3] * dewindow[0x3];
outf4 += u_ptr[0][ 3] * dewindow2[0xc];
outf1 += u_ptr[0][ 4] * dewindow[0x4];
outf3 += u_ptr[0][ 4] * dewindow2[0xb];
outf2 += u_ptr[0][ 5] * dewindow[0x5];
outf4 += u_ptr[0][ 5] * dewindow2[0xa];
outf1 += u_ptr[0][ 6] * dewindow[0x6];
outf3 += u_ptr[0][ 6] * dewindow2[0x9];
outf2 += u_ptr[0][ 7] * dewindow[0x7];
outf4 += u_ptr[0][ 7] * dewindow2[0x8];
outf1 += u_ptr[0][ 8] * dewindow[0x8];
outf3 += u_ptr[0][ 8] * dewindow2[0x7];
outf2 += u_ptr[0][ 9] * dewindow[0x9];
outf4 += u_ptr[0][ 9] * dewindow2[0x6];
outf1 += u_ptr[0][10] * dewindow[0xa];
outf3 += u_ptr[0][10] * dewindow2[0x5];
outf2 += u_ptr[0][11] * dewindow[0xb];
outf4 += u_ptr[0][11] * dewindow2[0x4];
outf1 += u_ptr[0][12] * dewindow[0xc];
outf3 += u_ptr[0][12] * dewindow2[0x3];
outf2 += u_ptr[0][13] * dewindow[0xd];
outf4 += u_ptr[0][13] * dewindow2[0x2];
outf1 += u_ptr[0][14] * dewindow[0xe];
outf3 += u_ptr[0][14] * dewindow2[0x1];
outf2 += u_ptr[0][15] * dewindow[0xf];
outf4 += u_ptr[0][15] * dewindow2[0x0];
dewindow += 32;
dewindow2 += 32;
u_ptr++;
out = outf1 + outf2;
if (out > 32767)
samples[j * 2] = 32767;
else
if (out < -32768)
samples[j * 2] = -32768;
else
samples[j * 2] = out;
out = outf3 - outf4;
if (out > 32767)
samples[64 - (j * 2)] = 32767;
else
if (out < -32768)
samples[64 - (j * 2)] = -32768;
else
samples[64 - (j * 2)] = out;
}
{
float outf2, outf4;
outf2 = u_ptr[0][ 1] * dewindow[0x1];
outf4 = u_ptr[0][ 3] * dewindow[0x3];
outf2 += u_ptr[0][ 5] * dewindow[0x5];
outf4 += u_ptr[0][ 7] * dewindow[0x7];
outf2 += u_ptr[0][ 9] * dewindow[0x9];
outf4 += u_ptr[0][11] * dewindow[0xb];
outf2 += u_ptr[0][13] * dewindow[0xd];
outf4 += u_ptr[0][15] * dewindow[0xf];
out = outf2 + outf4;
if (out > 32767)
samples[16 * 2] = 32767;
else
if (out < -32768)
samples[16 * 2] = -32768;
else
samples[16 * 2] = out;
}
}
}
#elif defined(HAS_AUTOINCREMENT)
const float *dewindow = t_dewindow[0] + 15 - start;
/* This is tuned specifically for architectures with
autoincrement and -decrement. */
{
float *u_ptr = (float*) u[ch][div];
u_ptr--;
for (j = 0; j < 16; ++j) {
float outf1, outf2, outf3, outf4;
outf1 = *++u_ptr * *++dewindow;
outf2 = *++u_ptr * *++dewindow;
outf3 = *++u_ptr * *++dewindow;
outf4 = *++u_ptr * *++dewindow;
outf1 += *++u_ptr * *++dewindow;
outf2 += *++u_ptr * *++dewindow;
outf3 += *++u_ptr * *++dewindow;
outf4 += *++u_ptr * *++dewindow;
outf1 += *++u_ptr * *++dewindow;
outf2 += *++u_ptr * *++dewindow;
outf3 += *++u_ptr * *++dewindow;
outf4 += *++u_ptr * *++dewindow;
outf1 += *++u_ptr * *++dewindow;
outf2 += *++u_ptr * *++dewindow;
outf3 += *++u_ptr * *++dewindow;
outf4 += *++u_ptr * *++dewindow;
out = outf1 + outf2 + outf3 + outf4;
dewindow += 16;
PUT_SAMPLE(out)
}
if (div & 0x1) {
{
float outf2, outf4;
outf2 = u_ptr[ 1] * dewindow[0x1];
outf4 = u_ptr[ 3] * dewindow[0x3];
outf2 += u_ptr[ 5] * dewindow[0x5];
outf4 += u_ptr[ 7] * dewindow[0x7];
outf2 += u_ptr[ 9] * dewindow[0x9];
outf4 += u_ptr[11] * dewindow[0xb];
outf2 += u_ptr[13] * dewindow[0xd];
outf4 += u_ptr[15] * dewindow[0xf];
out = outf2 + outf4;
PUT_SAMPLE(out)
}
dewindow -= 31;
dewindow += start;
dewindow += start;
u_ptr -= 16;
for (; j < 31; ++j) {
float outf1, outf2, outf3, outf4;
outf1 = *++u_ptr * *--dewindow;
outf2 = *++u_ptr * *--dewindow;
outf3 = *++u_ptr * *--dewindow;
outf4 = *++u_ptr * *--dewindow;
outf1 += *++u_ptr * *--dewindow;
outf2 += *++u_ptr * *--dewindow;
outf3 += *++u_ptr * *--dewindow;
outf4 += *++u_ptr * *--dewindow;
outf1 += *++u_ptr * *--dewindow;
outf2 += *++u_ptr * *--dewindow;
outf3 += *++u_ptr * *--dewindow;
outf4 += *++u_ptr * *--dewindow;
outf1 += *++u_ptr * *--dewindow;
outf2 += *++u_ptr * *--dewindow;
outf3 += *++u_ptr * *--dewindow;
outf4 += *++u_ptr * *--dewindow;
out = outf2 - outf1 + outf4 - outf3;
dewindow -= 16;
u_ptr -= 32;
PUT_SAMPLE(out)
}
} else {
{
float outf2, outf4;
outf2 = u_ptr[ 2] * dewindow[ 0x2];
outf4 = u_ptr[ 4] * dewindow[ 0x4];
outf2 += u_ptr[ 6] * dewindow[ 0x6];
outf4 += u_ptr[ 8] * dewindow[ 0x8];
outf2 += u_ptr[10] * dewindow[ 0xa];
outf4 += u_ptr[12] * dewindow[ 0xc];
outf2 += u_ptr[14] * dewindow[ 0xe];
outf4 += u_ptr[16] * dewindow[0x10];
out = outf2 + outf4;
PUT_SAMPLE(out)
}
dewindow -= 31;
dewindow += start;
dewindow += start;
u_ptr -= 16;
for (; j < 31; ++j) {
float outf1, outf2, outf3, outf4;
outf1 = *++u_ptr * *--dewindow;
outf2 = *++u_ptr * *--dewindow;
outf3 = *++u_ptr * *--dewindow;
outf4 = *++u_ptr * *--dewindow;
outf1 += *++u_ptr * *--dewindow;
outf2 += *++u_ptr * *--dewindow;
outf3 += *++u_ptr * *--dewindow;
outf4 += *++u_ptr * *--dewindow;
outf1 += *++u_ptr * *--dewindow;
outf2 += *++u_ptr * *--dewindow;
outf3 += *++u_ptr * *--dewindow;
outf4 += *++u_ptr * *--dewindow;
outf1 += *++u_ptr * *--dewindow;
outf2 += *++u_ptr * *--dewindow;
outf3 += *++u_ptr * *--dewindow;
outf4 += *++u_ptr * *--dewindow;
out = outf1 - outf2 + outf3 - outf4;
dewindow -= 16;
u_ptr -= 32;
PUT_SAMPLE(out)
}
}
}
#else
const float *dewindow = t_dewindow[0] + 16 - start;
/* These optimisations are tuned specifically for architectures
without autoincrement and -decrement. */
{
float (*u_ptr)[16] = u[ch][div];
for (j = 0; j < 16; ++j) {
float outf1, outf2, outf3, outf4;
outf1 = u_ptr[0][ 0] * dewindow[0x0];
outf2 = u_ptr[0][ 1] * dewindow[0x1];
outf3 = u_ptr[0][ 2] * dewindow[0x2];
outf4 = u_ptr[0][ 3] * dewindow[0x3];
outf1 += u_ptr[0][ 4] * dewindow[0x4];
outf2 += u_ptr[0][ 5] * dewindow[0x5];
outf3 += u_ptr[0][ 6] * dewindow[0x6];
outf4 += u_ptr[0][ 7] * dewindow[0x7];
outf1 += u_ptr[0][ 8] * dewindow[0x8];
outf2 += u_ptr[0][ 9] * dewindow[0x9];
outf3 += u_ptr[0][10] * dewindow[0xa];
outf4 += u_ptr[0][11] * dewindow[0xb];
outf1 += u_ptr[0][12] * dewindow[0xc];
outf2 += u_ptr[0][13] * dewindow[0xd];
outf3 += u_ptr[0][14] * dewindow[0xe];
outf4 += u_ptr[0][15] * dewindow[0xf];
out = outf1 + outf2 + outf3 + outf4;
dewindow += 32;
u_ptr++;
PUT_SAMPLE(out)
}
if (div & 0x1) {
{
float outf2, outf4;
outf2 = u_ptr[0][ 0] * dewindow[0x0];
outf4 = u_ptr[0][ 2] * dewindow[0x2];
outf2 += u_ptr[0][ 4] * dewindow[0x4];
outf4 += u_ptr[0][ 6] * dewindow[0x6];
outf2 += u_ptr[0][ 8] * dewindow[0x8];
outf4 += u_ptr[0][10] * dewindow[0xa];
outf2 += u_ptr[0][12] * dewindow[0xc];
outf4 += u_ptr[0][14] * dewindow[0xe];
out = outf2 + outf4;
PUT_SAMPLE(out)
}
dewindow -= 48;
dewindow += start;
dewindow += start;
for (; j < 31; ++j) {
float outf1, outf2, outf3, outf4;
--u_ptr;
outf1 = u_ptr[0][ 0] * dewindow[0xf];
outf2 = u_ptr[0][ 1] * dewindow[0xe];
outf3 = u_ptr[0][ 2] * dewindow[0xd];
outf4 = u_ptr[0][ 3] * dewindow[0xc];
outf1 += u_ptr[0][ 4] * dewindow[0xb];
outf2 += u_ptr[0][ 5] * dewindow[0xa];
outf3 += u_ptr[0][ 6] * dewindow[0x9];
outf4 += u_ptr[0][ 7] * dewindow[0x8];
outf1 += u_ptr[0][ 8] * dewindow[0x7];
outf2 += u_ptr[0][ 9] * dewindow[0x6];
outf3 += u_ptr[0][10] * dewindow[0x5];
outf4 += u_ptr[0][11] * dewindow[0x4];
outf1 += u_ptr[0][12] * dewindow[0x3];
outf2 += u_ptr[0][13] * dewindow[0x2];
outf3 += u_ptr[0][14] * dewindow[0x1];
outf4 += u_ptr[0][15] * dewindow[0x0];
out = -outf1 + outf2 - outf3 + outf4;
dewindow -= 32;
PUT_SAMPLE(out)
}
} else {
{
float outf2, outf4;
outf2 = u_ptr[0][ 1] * dewindow[0x1];
outf4 = u_ptr[0][ 3] * dewindow[0x3];
outf2 += u_ptr[0][ 5] * dewindow[0x5];
outf4 += u_ptr[0][ 7] * dewindow[0x7];
outf2 += u_ptr[0][ 9] * dewindow[0x9];
outf4 += u_ptr[0][11] * dewindow[0xb];
outf2 += u_ptr[0][13] * dewindow[0xd];
outf4 += u_ptr[0][15] * dewindow[0xf];
out = outf2 + outf4;
PUT_SAMPLE(out)
}
dewindow -= 48;
dewindow += start;
dewindow += start;
for (; j < 31; ++j) {
float outf1, outf2, outf3, outf4;
--u_ptr;
outf1 = u_ptr[0][ 0] * dewindow[0xf];
outf2 = u_ptr[0][ 1] * dewindow[0xe];
outf3 = u_ptr[0][ 2] * dewindow[0xd];
outf4 = u_ptr[0][ 3] * dewindow[0xc];
outf1 += u_ptr[0][ 4] * dewindow[0xb];
outf2 += u_ptr[0][ 5] * dewindow[0xa];
outf3 += u_ptr[0][ 6] * dewindow[0x9];
outf4 += u_ptr[0][ 7] * dewindow[0x8];
outf1 += u_ptr[0][ 8] * dewindow[0x7];
outf2 += u_ptr[0][ 9] * dewindow[0x6];
outf3 += u_ptr[0][10] * dewindow[0x5];
outf4 += u_ptr[0][11] * dewindow[0x4];
outf1 += u_ptr[0][12] * dewindow[0x3];
outf2 += u_ptr[0][13] * dewindow[0x2];
outf3 += u_ptr[0][14] * dewindow[0x1];
outf4 += u_ptr[0][15] * dewindow[0x0];
out = outf1 - outf2 + outf3 - outf4;
dewindow -= 32;
PUT_SAMPLE(out)
}
}
}
#endif
}
#endif
--u_start[ch];
u_start[ch] &= 0xf;
u_div[ch]=u_div[ch] ? 0 : 1;
#if defined(PENTIUM_RDTSC)
__asm__(".byte 0x0f,0x31" : "=a" (cnt2), "=d" (cnt4));
if (cnt2-cnt1 < min_cycles) {
min_cycles = cnt2-cnt1;
printf("%d, %d cycles, %d\n", cnt3-cnt1, min_cycles, start);
}
#endif
}
void premultiply()
{
int i,t;
for (i = 0; i < 17; ++i)
for (t = 0; t < 32; ++t)
t_dewindow[i][t] *= 16383.5f;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -