⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 downmix.c

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 C
📖 第 1 页 / 共 4 页
字号:
	asm volatile(
	"movd  %1, %%mm7	\n\t"
	"punpckldq %1, %%mm7	\n\t"
	"movl $-1024, %%esi	\n\t"
	".balign 16\n\t"
	"1:			\n\t"
	"movq   1024(%0, %%esi), %%mm0	\n\t"
	"movq   1032(%0, %%esi), %%mm1	\n\t"
	"pfadd  %%mm7, %%mm0		\n\t" //common
	"pfadd  %%mm7, %%mm1		\n\t" //common
	"movq   (%0, %%esi), %%mm2	\n\t"
	"movq   8(%0, %%esi), %%mm3	\n\t"
	"movq   2048(%0, %%esi), %%mm4	\n\t"
	"movq   2056(%0, %%esi), %%mm5	\n\t"
	"pfadd  %%mm0, %%mm2		\n\t"
	"pfadd  %%mm1, %%mm3		\n\t"
	"pfadd  %%mm0, %%mm4		\n\t"
	"pfadd  %%mm1, %%mm5		\n\t"
	"movq   %%mm2, (%0, %%esi)	\n\t"
	"movq   %%mm3, 8(%0, %%esi)	\n\t"
	"movq   %%mm4, 1024(%0, %%esi)	\n\t"
	"movq   %%mm5, 1032(%0, %%esi)	\n\t"
	"addl $16, %%esi		\n\t"
	" jnz 1b			\n\t"
	:: "r" (samples+256), "m" (bias)
	: "%esi"
	);
}

static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
{
	asm volatile(
		"movd  %2, %%mm7	\n\t"
		"punpckldq %2, %%mm7	\n\t"
		"movl $-1024, %%esi	\n\t"
		".balign 16\n\t"
		"1:			\n\t"
		"movq  1024(%1, %%esi), %%mm0	\n\t"
		"movq  1032(%1, %%esi), %%mm1	\n\t"
		"pfadd %%mm7, %%mm0		\n\t" //common
		"pfadd %%mm7, %%mm1		\n\t" //common
		"movq  (%0, %%esi), %%mm2	\n\t"
		"movq  8(%0, %%esi), %%mm3	\n\t"
		"movq  (%1, %%esi), %%mm4	\n\t"
		"movq  8(%1, %%esi), %%mm5	\n\t"
		"pfadd %%mm0, %%mm2		\n\t"
		"pfadd %%mm1, %%mm3		\n\t"
		"pfadd %%mm0, %%mm4		\n\t"
		"pfadd %%mm1, %%mm5		\n\t"
		"movq  %%mm2, (%0, %%esi)	\n\t"
		"movq  %%mm3, 8(%0, %%esi)	\n\t"
		"movq  %%mm4, (%1, %%esi)	\n\t"
		"movq  %%mm5, 8(%1, %%esi)	\n\t"
		"addl $16, %%esi		\n\t"
		" jnz 1b			\n\t"
	:: "r" (left+256), "r" (right+256), "m" (bias)
	: "%esi"
	);
}

static void mix21toS_3dnow (sample_t * samples, sample_t bias)
{
	asm volatile(
		"movd  %1, %%mm7	\n\t"
		"punpckldq %1, %%mm7	\n\t"
		"movl $-1024, %%esi	\n\t"
		".balign 16\n\t"
		"1:			\n\t"
		"movq  2048(%0, %%esi), %%mm0	\n\t"  // surround
		"movq  2056(%0, %%esi), %%mm1	\n\t"  // surround
		"movq  (%0, %%esi), %%mm2	\n\t"
		"movq  8(%0, %%esi), %%mm3	\n\t"
		"movq  1024(%0, %%esi), %%mm4	\n\t"
		"movq  1032(%0, %%esi), %%mm5	\n\t"
		"pfadd %%mm7, %%mm2		\n\t"
		"pfadd %%mm7, %%mm3		\n\t"
		"pfadd %%mm7, %%mm4		\n\t"
		"pfadd %%mm7, %%mm5		\n\t"
		"pfsub %%mm0, %%mm2		\n\t"
		"pfsub %%mm1, %%mm3		\n\t"
		"pfadd %%mm0, %%mm4		\n\t"
		"pfadd %%mm1, %%mm5		\n\t"
		"movq  %%mm2, (%0, %%esi)	\n\t"
		"movq  %%mm3, 8(%0, %%esi)	\n\t"
		"movq  %%mm4, 1024(%0, %%esi)	\n\t"
		"movq  %%mm5, 1032(%0, %%esi)	\n\t"
		"addl $16, %%esi		\n\t"
		" jnz 1b			\n\t"
	:: "r" (samples+256), "m" (bias)
	: "%esi"
	);
}

static void mix31to2_3dnow (sample_t * samples, sample_t bias)
{
	asm volatile(
		"movd  %1, %%mm7	\n\t"
		"punpckldq %1, %%mm7	\n\t"
		"movl $-1024, %%esi	\n\t"
		".balign 16\n\t"
		"1:			\n\t"
		"movq  1024(%0, %%esi), %%mm0	\n\t"
		"movq  1032(%0, %%esi), %%mm1	\n\t"
		"pfadd 3072(%0, %%esi), %%mm0	\n\t"
		"pfadd 3080(%0, %%esi), %%mm1	\n\t"
		"pfadd %%mm7, %%mm0		\n\t" // common
		"pfadd %%mm7, %%mm1		\n\t" // common
		"movq  (%0, %%esi), %%mm2	\n\t"
		"movq  8(%0, %%esi), %%mm3	\n\t"
		"movq  2048(%0, %%esi), %%mm4	\n\t"
		"movq  2056(%0, %%esi), %%mm5	\n\t"
		"pfadd %%mm0, %%mm2		\n\t"
		"pfadd %%mm1, %%mm3		\n\t"
		"pfadd %%mm0, %%mm4		\n\t"
		"pfadd %%mm1, %%mm5		\n\t"
		"movq  %%mm2, (%0, %%esi)	\n\t"
		"movq  %%mm3, 8(%0, %%esi)	\n\t"
		"movq  %%mm4, 1024(%0, %%esi)	\n\t"
		"movq  %%mm5, 1032(%0, %%esi)	\n\t"
		"addl $16, %%esi		\n\t"
		" jnz 1b			\n\t"
	:: "r" (samples+256), "m" (bias)
	: "%esi"
	);
}

static void mix31toS_3dnow (sample_t * samples, sample_t bias)
{
	asm volatile(
		"movd  %1, %%mm7	\n\t"
		"punpckldq %1, %%mm7	\n\t"
		"movl $-1024, %%esi	\n\t"
		".balign 16\n\t"
		"1:			\n\t"
		"movq   1024(%0, %%esi), %%mm0	\n\t"
		"movq   1032(%0, %%esi), %%mm1	\n\t"
		"pfadd  %%mm7, %%mm0		\n\t" // common
		"pfadd  %%mm7, %%mm1		\n\t" // common
		"movq   (%0, %%esi), %%mm2	\n\t"
		"movq   8(%0, %%esi), %%mm3	\n\t"
		"movq   2048(%0, %%esi), %%mm4	\n\t"
		"movq   2056(%0, %%esi), %%mm5	\n\t"
		"pfadd  %%mm0, %%mm2		\n\t"
		"pfadd  %%mm1, %%mm3		\n\t"
		"pfadd  %%mm0, %%mm4		\n\t"
		"pfadd  %%mm1, %%mm5		\n\t"
		"movq   3072(%0, %%esi), %%mm0	\n\t" // surround
		"movq   3080(%0, %%esi), %%mm1	\n\t" // surround
		"pfsub  %%mm0, %%mm2		\n\t"
		"pfsub  %%mm1, %%mm3		\n\t"
		"pfadd  %%mm0, %%mm4		\n\t"
		"pfadd  %%mm1, %%mm5		\n\t"
		"movq   %%mm2, (%0, %%esi)	\n\t"
		"movq   %%mm3, 8(%0, %%esi)	\n\t"
		"movq   %%mm4, 1024(%0, %%esi)	\n\t"
		"movq   %%mm5, 1032(%0, %%esi)	\n\t"
		"addl $16, %%esi		\n\t"
		" jnz 1b			\n\t"
	:: "r" (samples+256), "m" (bias)
	: "%esi"
	);
}

static void mix22toS_3dnow (sample_t * samples, sample_t bias)
{
	asm volatile(
		"movd  %1, %%mm7	\n\t"
		"punpckldq %1, %%mm7	\n\t"
		"movl $-1024, %%esi	\n\t"
		".balign 16\n\t"
		"1:			\n\t"
		"movq  2048(%0, %%esi), %%mm0	\n\t"
		"movq  2056(%0, %%esi), %%mm1	\n\t"
		"pfadd 3072(%0, %%esi), %%mm0	\n\t" // surround
		"pfadd 3080(%0, %%esi), %%mm1	\n\t" // surround
		"movq  (%0, %%esi), %%mm2	\n\t"
		"movq  8(%0, %%esi), %%mm3	\n\t"
		"movq  1024(%0, %%esi), %%mm4	\n\t"
		"movq  1032(%0, %%esi), %%mm5	\n\t"
		"pfadd %%mm7, %%mm2		\n\t"
		"pfadd %%mm7, %%mm3		\n\t"
		"pfadd %%mm7, %%mm4		\n\t"
		"pfadd %%mm7, %%mm5		\n\t"
		"pfsub %%mm0, %%mm2		\n\t"
		"pfsub %%mm1, %%mm3		\n\t"
		"pfadd %%mm0, %%mm4		\n\t"
		"pfadd %%mm1, %%mm5		\n\t"
		"movq  %%mm2, (%0, %%esi)	\n\t"
		"movq  %%mm3, 8(%0, %%esi)	\n\t"
		"movq  %%mm4, 1024(%0, %%esi)	\n\t"
		"movq  %%mm5, 1032(%0, %%esi)	\n\t"
		"addl $16, %%esi		\n\t"
		" jnz 1b			\n\t"
	:: "r" (samples+256), "m" (bias)
	: "%esi"
	);
}

static void mix32to2_3dnow (sample_t * samples, sample_t bias)
{
	asm volatile(
	"movd  %1, %%mm7	\n\t"
	"punpckldq %1, %%mm7	\n\t"
	"movl $-1024, %%esi	\n\t"
	".balign 16\n\t"
	"1:			\n\t"
	"movq   1024(%0, %%esi), %%mm0	\n\t"
	"movq   1032(%0, %%esi), %%mm1	\n\t"
	"pfadd  %%mm7, %%mm0		\n\t" // common
	"pfadd  %%mm7, %%mm1		\n\t" // common
	"movq   %%mm0, %%mm2		\n\t" // common
	"movq   %%mm1, %%mm3		\n\t" // common
	"pfadd  (%0, %%esi), %%mm0	\n\t"
	"pfadd  8(%0, %%esi), %%mm1	\n\t"
	"pfadd  2048(%0, %%esi), %%mm2	\n\t"
	"pfadd  2056(%0, %%esi), %%mm3	\n\t"
	"pfadd  3072(%0, %%esi), %%mm0	\n\t"
	"pfadd  3080(%0, %%esi), %%mm1	\n\t"
	"pfadd  4096(%0, %%esi), %%mm2	\n\t"
	"pfadd  4104(%0, %%esi), %%mm3	\n\t"
	"movq   %%mm0, (%0, %%esi)	\n\t"
	"movq   %%mm1, 8(%0, %%esi)	\n\t"
	"movq   %%mm2, 1024(%0, %%esi)	\n\t"
	"movq   %%mm3, 1032(%0, %%esi)	\n\t"
	"addl $16, %%esi		\n\t"
	" jnz 1b			\n\t"
	:: "r" (samples+256), "m" (bias)
	: "%esi"
	);
}

/* todo: should be optimized better */
static void mix32toS_3dnow (sample_t * samples, sample_t bias)
{
	asm volatile(
	"movl $-1024, %%esi	\n\t"
	".balign 16\n\t"
	"1:			\n\t"
	"movd  %1, %%mm7		\n\t"
	"punpckldq %1, %%mm7		\n\t"
	"movq  1024(%0, %%esi), %%mm0	\n\t"
	"movq  1032(%0, %%esi), %%mm1	\n\t"
	"movq  3072(%0, %%esi), %%mm4	\n\t"
	"movq  3080(%0, %%esi), %%mm5	\n\t"
	"pfadd %%mm7, %%mm0		\n\t" // common
	"pfadd %%mm7, %%mm1		\n\t" // common
	"pfadd 4096(%0, %%esi), %%mm4	\n\t" // surround
	"pfadd 4104(%0, %%esi), %%mm5	\n\t" // surround
	"movq  (%0, %%esi), %%mm2	\n\t"
	"movq  8(%0, %%esi), %%mm3	\n\t"
	"movq  2048(%0, %%esi), %%mm6	\n\t"
	"movq  2056(%0, %%esi), %%mm7	\n\t"
	"pfsub %%mm4, %%mm2		\n\t"
	"pfsub %%mm5, %%mm3		\n\t"
	"pfadd %%mm4, %%mm6		\n\t"
	"pfadd %%mm5, %%mm7		\n\t"
	"pfadd %%mm0, %%mm2		\n\t"
	"pfadd %%mm1, %%mm3		\n\t"
	"pfadd %%mm0, %%mm6		\n\t"
	"pfadd %%mm1, %%mm7		\n\t"
	"movq  %%mm2, (%0, %%esi)	\n\t"
	"movq  %%mm3, 8(%0, %%esi)	\n\t"
	"movq  %%mm6, 1024(%0, %%esi)	\n\t"
	"movq  %%mm7, 1032(%0, %%esi)	\n\t"
	"addl $16, %%esi		\n\t"
	" jnz 1b			\n\t"
	:: "r" (samples+256), "m" (bias)
	: "%esi"
	);
}

static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias)
{
	asm volatile(
		"movd  %2, %%mm7	\n\t"
		"punpckldq %2, %%mm7	\n\t"
		"movl $-1024, %%esi	\n\t"
		".balign 16\n\t"
		"1:			\n\t"
		"movq  (%0, %%esi), %%mm0	\n\t"
		"movq  8(%0, %%esi), %%mm1	\n\t"
		"movq  16(%0, %%esi), %%mm2	\n\t"
		"movq  24(%0, %%esi), %%mm3	\n\t"
		"pfadd 1024(%0, %%esi), %%mm0	\n\t"
		"pfadd 1032(%0, %%esi), %%mm1	\n\t"
		"pfadd 1040(%0, %%esi), %%mm2	\n\t"
		"pfadd 1048(%0, %%esi), %%mm3	\n\t"
		"pfadd %%mm7, %%mm0		\n\t"
		"pfadd %%mm7, %%mm1		\n\t"
		"pfadd %%mm7, %%mm2		\n\t"
		"pfadd %%mm7, %%mm3		\n\t"
		"movq  %%mm0, (%1, %%esi)	\n\t"
		"movq  %%mm1, 8(%1, %%esi)	\n\t"
		"movq  %%mm2, 16(%1, %%esi)	\n\t"
		"movq  %%mm3, 24(%1, %%esi)	\n\t"
		"addl $32, %%esi		\n\t"
		" jnz 1b			\n\t"
	:: "r" (src+256), "r" (dest+256), "m" (bias)
	: "%esi"
	);
}

static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias,
	      sample_t clev, sample_t slev)
{
    switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {

    case CONVERT (A52_CHANNEL, A52_CHANNEL2):
	memcpy (samples, samples + 256, 256 * sizeof (sample_t));
	break;

    case CONVERT (A52_CHANNEL, A52_MONO):
    case CONVERT (A52_STEREO, A52_MONO):
    mix_2to1_3dnow:
	mix2to1_3dnow (samples, samples + 256, bias);
	break;

    case CONVERT (A52_2F1R, A52_MONO):
	if (slev == 0)
	    goto mix_2to1_3dnow;
    case CONVERT (A52_3F, A52_MONO):
    mix_3to1_3dnow:
	mix3to1_3dnow (samples, bias);
	break;

    case CONVERT (A52_3F1R, A52_MONO):
	if (slev == 0)
	    goto mix_3to1_3dnow;
    case CONVERT (A52_2F2R, A52_MONO):
	if (slev == 0)
	    goto mix_2to1_3dnow;
	mix4to1_3dnow (samples, bias);
	break;

    case CONVERT (A52_3F2R, A52_MONO):
	if (slev == 0)
	    goto mix_3to1_3dnow;
	mix5to1_3dnow (samples, bias);
	break;

    case CONVERT (A52_MONO, A52_DOLBY):
	memcpy (samples + 256, samples, 256 * sizeof (sample_t));
	break;

    case CONVERT (A52_3F, A52_STEREO):
    case CONVERT (A52_3F, A52_DOLBY):
    mix_3to2_3dnow:
	mix3to2_3dnow (samples, bias);
	break;

    case CONVERT (A52_2F1R, A52_STEREO):
	if (slev == 0)
	    break;
	mix21to2_3dnow (samples, samples + 256, bias);
	break;

    case CONVERT (A52_2F1R, A52_DOLBY):
	mix21toS_3dnow (samples, bias);
	break;

    case CONVERT (A52_3F1R, A52_STEREO):
	if (slev == 0)
	    goto mix_3to2_3dnow;
	mix31to2_3dnow (samples, bias);
	break;

    case CONVERT (A52_3F1R, A52_DOLBY):
	mix31toS_3dnow (samples, bias);
	break;

    case CONVERT (A52_2F2R, A52_STEREO):
	if (slev == 0)
	    break;
	mix2to1_3dnow (samples, samples + 512, bias);
	mix2to1_3dnow (samples + 256, samples + 768, bias);
	break;

    case CONVERT (A52_2F2R, A52_DOLBY):
	mix22toS_3dnow (samples, bias);
	break;

    case CONVERT (A52_3F2R, A52_STEREO):
	if (slev == 0)
	    goto mix_3to2_3dnow;
	mix32to2_3dnow (samples, bias);
	break;

    case CONVERT (A52_3F2R, A52_DOLBY):
	mix32toS_3dnow (samples, bias);
	break;

    case CONVERT (A52_3F1R, A52_3F):
	if (slev == 0)
	    break;
	mix21to2_3dnow (samples, samples + 512, bias);
	break;

    case CONVERT (A52_3F2R, A52_3F):
	if (slev == 0)
	    break;
	mix2to1_3dnow (samples, samples + 768, bias);
	mix2to1_3dnow (samples + 512, samples + 1024, bias);
	break;

    case CONVERT (A52_3F1R, A52_2F1R):
	mix3to2_3dnow (samples, bias);
	memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
	break;

    case CONVERT (A52_2F2R, A52_2F1R):
	mix2to1_3dnow (samples + 512, samples + 768, bias);
	break;

    case CONVERT (A52_3F2R, A52_2F1R):
	mix3to2_3dnow (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
	move2to1_3dnow (samples + 768, samples + 512, bias);
	break;

    case CONVERT (A52_3F2R, A52_3F1R):
	mix2to1_3dnow (samples + 768, samples + 1024, bias);
	break;

    case CONVERT (A52_2F1R, A52_2F2R):
	memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
	break;

    case CONVERT (A52_3F1R, A52_2F2R):
	mix3to2_3dnow (samples, bias);
	memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
	break;

    case CONVERT (A52_3F2R, A52_2F2R):
	mix3to2_3dnow (samples, bias);
	memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
	memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
	break;

    case CONVERT (A52_3F1R, A52_3F2R):
	memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
	break;
    }
    __asm __volatile("femms":::"memory");
}

#endif //ARCH_X86

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -