📄 downmix.c
字号:
asm volatile(
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
"movl $-1024, %%esi \n\t"
".balign 16\n\t"
"1: \n\t"
"movq 1024(%0, %%esi), %%mm0 \n\t"
"movq 1032(%0, %%esi), %%mm1 \n\t"
"pfadd %%mm7, %%mm0 \n\t" //common
"pfadd %%mm7, %%mm1 \n\t" //common
"movq (%0, %%esi), %%mm2 \n\t"
"movq 8(%0, %%esi), %%mm3 \n\t"
"movq 2048(%0, %%esi), %%mm4 \n\t"
"movq 2056(%0, %%esi), %%mm5 \n\t"
"pfadd %%mm0, %%mm2 \n\t"
"pfadd %%mm1, %%mm3 \n\t"
"pfadd %%mm0, %%mm4 \n\t"
"pfadd %%mm1, %%mm5 \n\t"
"movq %%mm2, (%0, %%esi) \n\t"
"movq %%mm3, 8(%0, %%esi) \n\t"
"movq %%mm4, 1024(%0, %%esi) \n\t"
"movq %%mm5, 1032(%0, %%esi) \n\t"
"addl $16, %%esi \n\t"
" jnz 1b \n\t"
:: "r" (samples+256), "m" (bias)
: "%esi"
);
}
static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
{
asm volatile(
"movd %2, %%mm7 \n\t"
"punpckldq %2, %%mm7 \n\t"
"movl $-1024, %%esi \n\t"
".balign 16\n\t"
"1: \n\t"
"movq 1024(%1, %%esi), %%mm0 \n\t"
"movq 1032(%1, %%esi), %%mm1 \n\t"
"pfadd %%mm7, %%mm0 \n\t" //common
"pfadd %%mm7, %%mm1 \n\t" //common
"movq (%0, %%esi), %%mm2 \n\t"
"movq 8(%0, %%esi), %%mm3 \n\t"
"movq (%1, %%esi), %%mm4 \n\t"
"movq 8(%1, %%esi), %%mm5 \n\t"
"pfadd %%mm0, %%mm2 \n\t"
"pfadd %%mm1, %%mm3 \n\t"
"pfadd %%mm0, %%mm4 \n\t"
"pfadd %%mm1, %%mm5 \n\t"
"movq %%mm2, (%0, %%esi) \n\t"
"movq %%mm3, 8(%0, %%esi) \n\t"
"movq %%mm4, (%1, %%esi) \n\t"
"movq %%mm5, 8(%1, %%esi) \n\t"
"addl $16, %%esi \n\t"
" jnz 1b \n\t"
:: "r" (left+256), "r" (right+256), "m" (bias)
: "%esi"
);
}
static void mix21toS_3dnow (sample_t * samples, sample_t bias)
{
asm volatile(
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
"movl $-1024, %%esi \n\t"
".balign 16\n\t"
"1: \n\t"
"movq 2048(%0, %%esi), %%mm0 \n\t" // surround
"movq 2056(%0, %%esi), %%mm1 \n\t" // surround
"movq (%0, %%esi), %%mm2 \n\t"
"movq 8(%0, %%esi), %%mm3 \n\t"
"movq 1024(%0, %%esi), %%mm4 \n\t"
"movq 1032(%0, %%esi), %%mm5 \n\t"
"pfadd %%mm7, %%mm2 \n\t"
"pfadd %%mm7, %%mm3 \n\t"
"pfadd %%mm7, %%mm4 \n\t"
"pfadd %%mm7, %%mm5 \n\t"
"pfsub %%mm0, %%mm2 \n\t"
"pfsub %%mm1, %%mm3 \n\t"
"pfadd %%mm0, %%mm4 \n\t"
"pfadd %%mm1, %%mm5 \n\t"
"movq %%mm2, (%0, %%esi) \n\t"
"movq %%mm3, 8(%0, %%esi) \n\t"
"movq %%mm4, 1024(%0, %%esi) \n\t"
"movq %%mm5, 1032(%0, %%esi) \n\t"
"addl $16, %%esi \n\t"
" jnz 1b \n\t"
:: "r" (samples+256), "m" (bias)
: "%esi"
);
}
static void mix31to2_3dnow (sample_t * samples, sample_t bias)
{
asm volatile(
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
"movl $-1024, %%esi \n\t"
".balign 16\n\t"
"1: \n\t"
"movq 1024(%0, %%esi), %%mm0 \n\t"
"movq 1032(%0, %%esi), %%mm1 \n\t"
"pfadd 3072(%0, %%esi), %%mm0 \n\t"
"pfadd 3080(%0, %%esi), %%mm1 \n\t"
"pfadd %%mm7, %%mm0 \n\t" // common
"pfadd %%mm7, %%mm1 \n\t" // common
"movq (%0, %%esi), %%mm2 \n\t"
"movq 8(%0, %%esi), %%mm3 \n\t"
"movq 2048(%0, %%esi), %%mm4 \n\t"
"movq 2056(%0, %%esi), %%mm5 \n\t"
"pfadd %%mm0, %%mm2 \n\t"
"pfadd %%mm1, %%mm3 \n\t"
"pfadd %%mm0, %%mm4 \n\t"
"pfadd %%mm1, %%mm5 \n\t"
"movq %%mm2, (%0, %%esi) \n\t"
"movq %%mm3, 8(%0, %%esi) \n\t"
"movq %%mm4, 1024(%0, %%esi) \n\t"
"movq %%mm5, 1032(%0, %%esi) \n\t"
"addl $16, %%esi \n\t"
" jnz 1b \n\t"
:: "r" (samples+256), "m" (bias)
: "%esi"
);
}
static void mix31toS_3dnow (sample_t * samples, sample_t bias)
{
asm volatile(
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
"movl $-1024, %%esi \n\t"
".balign 16\n\t"
"1: \n\t"
"movq 1024(%0, %%esi), %%mm0 \n\t"
"movq 1032(%0, %%esi), %%mm1 \n\t"
"pfadd %%mm7, %%mm0 \n\t" // common
"pfadd %%mm7, %%mm1 \n\t" // common
"movq (%0, %%esi), %%mm2 \n\t"
"movq 8(%0, %%esi), %%mm3 \n\t"
"movq 2048(%0, %%esi), %%mm4 \n\t"
"movq 2056(%0, %%esi), %%mm5 \n\t"
"pfadd %%mm0, %%mm2 \n\t"
"pfadd %%mm1, %%mm3 \n\t"
"pfadd %%mm0, %%mm4 \n\t"
"pfadd %%mm1, %%mm5 \n\t"
"movq 3072(%0, %%esi), %%mm0 \n\t" // surround
"movq 3080(%0, %%esi), %%mm1 \n\t" // surround
"pfsub %%mm0, %%mm2 \n\t"
"pfsub %%mm1, %%mm3 \n\t"
"pfadd %%mm0, %%mm4 \n\t"
"pfadd %%mm1, %%mm5 \n\t"
"movq %%mm2, (%0, %%esi) \n\t"
"movq %%mm3, 8(%0, %%esi) \n\t"
"movq %%mm4, 1024(%0, %%esi) \n\t"
"movq %%mm5, 1032(%0, %%esi) \n\t"
"addl $16, %%esi \n\t"
" jnz 1b \n\t"
:: "r" (samples+256), "m" (bias)
: "%esi"
);
}
static void mix22toS_3dnow (sample_t * samples, sample_t bias)
{
asm volatile(
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
"movl $-1024, %%esi \n\t"
".balign 16\n\t"
"1: \n\t"
"movq 2048(%0, %%esi), %%mm0 \n\t"
"movq 2056(%0, %%esi), %%mm1 \n\t"
"pfadd 3072(%0, %%esi), %%mm0 \n\t" // surround
"pfadd 3080(%0, %%esi), %%mm1 \n\t" // surround
"movq (%0, %%esi), %%mm2 \n\t"
"movq 8(%0, %%esi), %%mm3 \n\t"
"movq 1024(%0, %%esi), %%mm4 \n\t"
"movq 1032(%0, %%esi), %%mm5 \n\t"
"pfadd %%mm7, %%mm2 \n\t"
"pfadd %%mm7, %%mm3 \n\t"
"pfadd %%mm7, %%mm4 \n\t"
"pfadd %%mm7, %%mm5 \n\t"
"pfsub %%mm0, %%mm2 \n\t"
"pfsub %%mm1, %%mm3 \n\t"
"pfadd %%mm0, %%mm4 \n\t"
"pfadd %%mm1, %%mm5 \n\t"
"movq %%mm2, (%0, %%esi) \n\t"
"movq %%mm3, 8(%0, %%esi) \n\t"
"movq %%mm4, 1024(%0, %%esi) \n\t"
"movq %%mm5, 1032(%0, %%esi) \n\t"
"addl $16, %%esi \n\t"
" jnz 1b \n\t"
:: "r" (samples+256), "m" (bias)
: "%esi"
);
}
static void mix32to2_3dnow (sample_t * samples, sample_t bias)
{
asm volatile(
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
"movl $-1024, %%esi \n\t"
".balign 16\n\t"
"1: \n\t"
"movq 1024(%0, %%esi), %%mm0 \n\t"
"movq 1032(%0, %%esi), %%mm1 \n\t"
"pfadd %%mm7, %%mm0 \n\t" // common
"pfadd %%mm7, %%mm1 \n\t" // common
"movq %%mm0, %%mm2 \n\t" // common
"movq %%mm1, %%mm3 \n\t" // common
"pfadd (%0, %%esi), %%mm0 \n\t"
"pfadd 8(%0, %%esi), %%mm1 \n\t"
"pfadd 2048(%0, %%esi), %%mm2 \n\t"
"pfadd 2056(%0, %%esi), %%mm3 \n\t"
"pfadd 3072(%0, %%esi), %%mm0 \n\t"
"pfadd 3080(%0, %%esi), %%mm1 \n\t"
"pfadd 4096(%0, %%esi), %%mm2 \n\t"
"pfadd 4104(%0, %%esi), %%mm3 \n\t"
"movq %%mm0, (%0, %%esi) \n\t"
"movq %%mm1, 8(%0, %%esi) \n\t"
"movq %%mm2, 1024(%0, %%esi) \n\t"
"movq %%mm3, 1032(%0, %%esi) \n\t"
"addl $16, %%esi \n\t"
" jnz 1b \n\t"
:: "r" (samples+256), "m" (bias)
: "%esi"
);
}
/* todo: should be optimized better */
static void mix32toS_3dnow (sample_t * samples, sample_t bias)
{
asm volatile(
"movl $-1024, %%esi \n\t"
".balign 16\n\t"
"1: \n\t"
"movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t"
"movq 1024(%0, %%esi), %%mm0 \n\t"
"movq 1032(%0, %%esi), %%mm1 \n\t"
"movq 3072(%0, %%esi), %%mm4 \n\t"
"movq 3080(%0, %%esi), %%mm5 \n\t"
"pfadd %%mm7, %%mm0 \n\t" // common
"pfadd %%mm7, %%mm1 \n\t" // common
"pfadd 4096(%0, %%esi), %%mm4 \n\t" // surround
"pfadd 4104(%0, %%esi), %%mm5 \n\t" // surround
"movq (%0, %%esi), %%mm2 \n\t"
"movq 8(%0, %%esi), %%mm3 \n\t"
"movq 2048(%0, %%esi), %%mm6 \n\t"
"movq 2056(%0, %%esi), %%mm7 \n\t"
"pfsub %%mm4, %%mm2 \n\t"
"pfsub %%mm5, %%mm3 \n\t"
"pfadd %%mm4, %%mm6 \n\t"
"pfadd %%mm5, %%mm7 \n\t"
"pfadd %%mm0, %%mm2 \n\t"
"pfadd %%mm1, %%mm3 \n\t"
"pfadd %%mm0, %%mm6 \n\t"
"pfadd %%mm1, %%mm7 \n\t"
"movq %%mm2, (%0, %%esi) \n\t"
"movq %%mm3, 8(%0, %%esi) \n\t"
"movq %%mm6, 1024(%0, %%esi) \n\t"
"movq %%mm7, 1032(%0, %%esi) \n\t"
"addl $16, %%esi \n\t"
" jnz 1b \n\t"
:: "r" (samples+256), "m" (bias)
: "%esi"
);
}
static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias)
{
asm volatile(
"movd %2, %%mm7 \n\t"
"punpckldq %2, %%mm7 \n\t"
"movl $-1024, %%esi \n\t"
".balign 16\n\t"
"1: \n\t"
"movq (%0, %%esi), %%mm0 \n\t"
"movq 8(%0, %%esi), %%mm1 \n\t"
"movq 16(%0, %%esi), %%mm2 \n\t"
"movq 24(%0, %%esi), %%mm3 \n\t"
"pfadd 1024(%0, %%esi), %%mm0 \n\t"
"pfadd 1032(%0, %%esi), %%mm1 \n\t"
"pfadd 1040(%0, %%esi), %%mm2 \n\t"
"pfadd 1048(%0, %%esi), %%mm3 \n\t"
"pfadd %%mm7, %%mm0 \n\t"
"pfadd %%mm7, %%mm1 \n\t"
"pfadd %%mm7, %%mm2 \n\t"
"pfadd %%mm7, %%mm3 \n\t"
"movq %%mm0, (%1, %%esi) \n\t"
"movq %%mm1, 8(%1, %%esi) \n\t"
"movq %%mm2, 16(%1, %%esi) \n\t"
"movq %%mm3, 24(%1, %%esi) \n\t"
"addl $32, %%esi \n\t"
" jnz 1b \n\t"
:: "r" (src+256), "r" (dest+256), "m" (bias)
: "%esi"
);
}
static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias,
sample_t clev, sample_t slev)
{
switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
case CONVERT (A52_CHANNEL, A52_CHANNEL2):
memcpy (samples, samples + 256, 256 * sizeof (sample_t));
break;
case CONVERT (A52_CHANNEL, A52_MONO):
case CONVERT (A52_STEREO, A52_MONO):
mix_2to1_3dnow:
mix2to1_3dnow (samples, samples + 256, bias);
break;
case CONVERT (A52_2F1R, A52_MONO):
if (slev == 0)
goto mix_2to1_3dnow;
case CONVERT (A52_3F, A52_MONO):
mix_3to1_3dnow:
mix3to1_3dnow (samples, bias);
break;
case CONVERT (A52_3F1R, A52_MONO):
if (slev == 0)
goto mix_3to1_3dnow;
case CONVERT (A52_2F2R, A52_MONO):
if (slev == 0)
goto mix_2to1_3dnow;
mix4to1_3dnow (samples, bias);
break;
case CONVERT (A52_3F2R, A52_MONO):
if (slev == 0)
goto mix_3to1_3dnow;
mix5to1_3dnow (samples, bias);
break;
case CONVERT (A52_MONO, A52_DOLBY):
memcpy (samples + 256, samples, 256 * sizeof (sample_t));
break;
case CONVERT (A52_3F, A52_STEREO):
case CONVERT (A52_3F, A52_DOLBY):
mix_3to2_3dnow:
mix3to2_3dnow (samples, bias);
break;
case CONVERT (A52_2F1R, A52_STEREO):
if (slev == 0)
break;
mix21to2_3dnow (samples, samples + 256, bias);
break;
case CONVERT (A52_2F1R, A52_DOLBY):
mix21toS_3dnow (samples, bias);
break;
case CONVERT (A52_3F1R, A52_STEREO):
if (slev == 0)
goto mix_3to2_3dnow;
mix31to2_3dnow (samples, bias);
break;
case CONVERT (A52_3F1R, A52_DOLBY):
mix31toS_3dnow (samples, bias);
break;
case CONVERT (A52_2F2R, A52_STEREO):
if (slev == 0)
break;
mix2to1_3dnow (samples, samples + 512, bias);
mix2to1_3dnow (samples + 256, samples + 768, bias);
break;
case CONVERT (A52_2F2R, A52_DOLBY):
mix22toS_3dnow (samples, bias);
break;
case CONVERT (A52_3F2R, A52_STEREO):
if (slev == 0)
goto mix_3to2_3dnow;
mix32to2_3dnow (samples, bias);
break;
case CONVERT (A52_3F2R, A52_DOLBY):
mix32toS_3dnow (samples, bias);
break;
case CONVERT (A52_3F1R, A52_3F):
if (slev == 0)
break;
mix21to2_3dnow (samples, samples + 512, bias);
break;
case CONVERT (A52_3F2R, A52_3F):
if (slev == 0)
break;
mix2to1_3dnow (samples, samples + 768, bias);
mix2to1_3dnow (samples + 512, samples + 1024, bias);
break;
case CONVERT (A52_3F1R, A52_2F1R):
mix3to2_3dnow (samples, bias);
memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
break;
case CONVERT (A52_2F2R, A52_2F1R):
mix2to1_3dnow (samples + 512, samples + 768, bias);
break;
case CONVERT (A52_3F2R, A52_2F1R):
mix3to2_3dnow (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
move2to1_3dnow (samples + 768, samples + 512, bias);
break;
case CONVERT (A52_3F2R, A52_3F1R):
mix2to1_3dnow (samples + 768, samples + 1024, bias);
break;
case CONVERT (A52_2F1R, A52_2F2R):
memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
break;
case CONVERT (A52_3F1R, A52_2F2R):
mix3to2_3dnow (samples, bias);
memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
break;
case CONVERT (A52_3F2R, A52_2F2R):
mix3to2_3dnow (samples, bias);
memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
break;
case CONVERT (A52_3F1R, A52_3F2R):
memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
break;
}
__asm __volatile("femms":::"memory");
}
#endif //ARCH_X86
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -