⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dct64_k7.c

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 C
📖 第 1 页 / 共 2 页
字号:
/*
* This code was taken from http://www.mpg123.org
* See ChangeLog of mpg123-0.59s-pre.1 for detail
* Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
* Partial 3dnowex-DSP! optimization by Nick Kurshev
*
* TODO: optimize scalar 3dnow! code
* Warning: Phases 7 & 8 are not tested
*/
#define real float /* ugly - but only way */

#include "../config.h"
#include "../mangle.h"

static unsigned long long int attribute_used __attribute__((aligned(8))) x_plus_minus_3dnow = 0x8000000000000000ULL;
static float attribute_used plus_1f = 1.0;

void dct64_MMX_3dnowex(short *a,short *b,real *c)
{
  char tmp[256];
    __asm __volatile(
"	movl %2,%%eax\n\t"

"	leal 128+%3,%%edx\n\t"
"	movl %0,%%esi\n\t"
"	movl %1,%%edi\n\t"
"	movl $"MANGLE(costab_mmx)",%%ebx\n\t"
"	leal %3,%%ecx\n\t"

/* Phase 1*/
"	movq	(%%eax), %%mm0\n\t"
"	movq	8(%%eax), %%mm4\n\t"
"	movq	%%mm0, %%mm3\n\t"
"	movq	%%mm4, %%mm7\n\t"
"	pswapd	120(%%eax), %%mm1\n\t"
"	pswapd	112(%%eax), %%mm5\n\t"
"	pfadd	%%mm1, %%mm0\n\t"
"	pfadd	%%mm5, %%mm4\n\t"
"	movq	%%mm0, (%%edx)\n\t"
"	movq	%%mm4, 8(%%edx)\n\t"
"	pfsub	%%mm1, %%mm3\n\t"
"	pfsub	%%mm5, %%mm7\n\t"
"	pfmul	(%%ebx), %%mm3\n\t"
"	pfmul	8(%%ebx), %%mm7\n\t"
"	pswapd	%%mm3, %%mm3\n\t"
"	pswapd	%%mm7, %%mm7\n\t"
"	movq	%%mm3, 120(%%edx)\n\t"
"	movq	%%mm7, 112(%%edx)\n\t"

"	movq	16(%%eax), %%mm0\n\t"
"	movq	24(%%eax), %%mm4\n\t"
"	movq	%%mm0, %%mm3\n\t"
"	movq	%%mm4, %%mm7\n\t"
"	pswapd	104(%%eax), %%mm1\n\t"
"	pswapd	96(%%eax), %%mm5\n\t"
"	pfadd	%%mm1, %%mm0\n\t"
"	pfadd	%%mm5, %%mm4\n\t"
"	movq	%%mm0, 16(%%edx)\n\t"
"	movq	%%mm4, 24(%%edx)\n\t"
"	pfsub	%%mm1, %%mm3\n\t"
"	pfsub	%%mm5, %%mm7\n\t"
"	pfmul	16(%%ebx), %%mm3\n\t"
"	pfmul	24(%%ebx), %%mm7\n\t"
"	pswapd	%%mm3, %%mm3\n\t"
"	pswapd	%%mm7, %%mm7\n\t"
"	movq	%%mm3, 104(%%edx)\n\t"
"	movq	%%mm7, 96(%%edx)\n\t"

"	movq	32(%%eax), %%mm0\n\t"
"	movq	40(%%eax), %%mm4\n\t"
"	movq	%%mm0, %%mm3\n\t"
"	movq	%%mm4, %%mm7\n\t"
"	pswapd	88(%%eax), %%mm1\n\t"
"	pswapd	80(%%eax), %%mm5\n\t"
"	pfadd	%%mm1, %%mm0\n\t"
"	pfadd	%%mm5, %%mm4\n\t"
"	movq	%%mm0, 32(%%edx)\n\t"
"	movq	%%mm4, 40(%%edx)\n\t"
"	pfsub	%%mm1, %%mm3\n\t"
"	pfsub	%%mm5, %%mm7\n\t"
"	pfmul	32(%%ebx), %%mm3\n\t"
"	pfmul	40(%%ebx), %%mm7\n\t"
"	pswapd	%%mm3, %%mm3\n\t"
"	pswapd	%%mm7, %%mm7\n\t"
"	movq	%%mm3, 88(%%edx)\n\t"
"	movq	%%mm7, 80(%%edx)\n\t"

"	movq	48(%%eax), %%mm0\n\t"
"	movq	56(%%eax), %%mm4\n\t"
"	movq	%%mm0, %%mm3\n\t"
"	movq	%%mm4, %%mm7\n\t"
"	pswapd	72(%%eax), %%mm1\n\t"
"	pswapd	64(%%eax), %%mm5\n\t"
"	pfadd	%%mm1, %%mm0\n\t"
"	pfadd	%%mm5, %%mm4\n\t"
"	movq	%%mm0, 48(%%edx)\n\t"
"	movq	%%mm4, 56(%%edx)\n\t"
"	pfsub	%%mm1, %%mm3\n\t"
"	pfsub	%%mm5, %%mm7\n\t"
"	pfmul	48(%%ebx), %%mm3\n\t"
"	pfmul	56(%%ebx), %%mm7\n\t"
"	pswapd	%%mm3, %%mm3\n\t"
"	pswapd	%%mm7, %%mm7\n\t"
"	movq	%%mm3, 72(%%edx)\n\t"
"	movq	%%mm7, 64(%%edx)\n\t"

/* Phase 2*/

"	movq	(%%edx), %%mm0\n\t"
"	movq	8(%%edx), %%mm4\n\t"
"	movq	%%mm0, %%mm3\n\t"
"	movq	%%mm4, %%mm7\n\t"
"	pswapd	56(%%edx), %%mm1\n\t"
"	pswapd	48(%%edx), %%mm5\n\t"
"	pfadd	%%mm1, %%mm0\n\t"
"	pfadd	%%mm5, %%mm4\n\t"
"	movq	%%mm0, (%%ecx)\n\t"
"	movq	%%mm4, 8(%%ecx)\n\t"
"	pfsub	%%mm1, %%mm3\n\t"
"	pfsub	%%mm5, %%mm7\n\t"
"	pfmul	64(%%ebx), %%mm3\n\t"
"	pfmul	72(%%ebx), %%mm7\n\t"
"	pswapd	%%mm3, %%mm3\n\t"
"	pswapd	%%mm7, %%mm7\n\t"
"	movq	%%mm3, 56(%%ecx)\n\t"
"	movq	%%mm7, 48(%%ecx)\n\t"

"	movq	16(%%edx), %%mm0\n\t"
"	movq	24(%%edx), %%mm4\n\t"
"	movq	%%mm0, %%mm3\n\t"
"	movq	%%mm4, %%mm7\n\t"
"	pswapd	40(%%edx), %%mm1\n\t"
"	pswapd	32(%%edx), %%mm5\n\t"
"	pfadd	%%mm1, %%mm0\n\t"
"	pfadd	%%mm5, %%mm4\n\t"
"	movq	%%mm0, 16(%%ecx)\n\t"
"	movq	%%mm4, 24(%%ecx)\n\t"
"	pfsub	%%mm1, %%mm3\n\t"
"	pfsub	%%mm5, %%mm7\n\t"
"	pfmul	80(%%ebx), %%mm3\n\t"
"	pfmul	88(%%ebx), %%mm7\n\t"
"	pswapd	%%mm3, %%mm3\n\t"
"	pswapd	%%mm7, %%mm7\n\t"
"	movq	%%mm3, 40(%%ecx)\n\t"
"	movq	%%mm7, 32(%%ecx)\n\t"

/* Phase 3*/

"	movq	64(%%edx), %%mm0\n\t"
"	movq	72(%%edx), %%mm4\n\t"
"	movq	%%mm0, %%mm3\n\t"
"	movq	%%mm4, %%mm7\n\t"
"	pswapd	120(%%edx), %%mm1\n\t"
"	pswapd	112(%%edx), %%mm5\n\t"
"	pfadd	%%mm1, %%mm0\n\t"
"	pfadd	%%mm5, %%mm4\n\t"
"	movq	%%mm0, 64(%%ecx)\n\t"
"	movq	%%mm4, 72(%%ecx)\n\t"
"	pfsubr	%%mm1, %%mm3\n\t"
"	pfsubr	%%mm5, %%mm7\n\t"
"	pfmul	64(%%ebx), %%mm3\n\t"
"	pfmul	72(%%ebx), %%mm7\n\t"
"	pswapd	%%mm3, %%mm3\n\t"
"	pswapd	%%mm7, %%mm7\n\t"
"	movq	%%mm3, 120(%%ecx)\n\t"
"	movq	%%mm7, 112(%%ecx)\n\t"

"	movq	80(%%edx), %%mm0\n\t"
"	movq	88(%%edx), %%mm4\n\t"
"	movq	%%mm0, %%mm3\n\t"
"	movq	%%mm4, %%mm7\n\t"
"	pswapd	104(%%edx), %%mm1\n\t"
"	pswapd	96(%%edx), %%mm5\n\t"
"	pfadd	%%mm1, %%mm0\n\t"
"	pfadd	%%mm5, %%mm4\n\t"
"	movq	%%mm0, 80(%%ecx)\n\t"
"	movq	%%mm4, 88(%%ecx)\n\t"
"	pfsubr	%%mm1, %%mm3\n\t"
"	pfsubr	%%mm5, %%mm7\n\t"
"	pfmul	80(%%ebx), %%mm3\n\t"
"	pfmul	88(%%ebx), %%mm7\n\t"
"	pswapd	%%mm3, %%mm3\n\t"
"	pswapd	%%mm7, %%mm7\n\t"
"	movq	%%mm3, 104(%%ecx)\n\t"
"	movq	%%mm7, 96(%%ecx)\n\t"

/* Phase 4*/

"	movq	96(%%ebx), %%mm2\n\t"
"	movq	104(%%ebx), %%mm6\n\t"

"	movq	(%%ecx), %%mm0\n\t"
"	movq	8(%%ecx), %%mm4\n\t"
"	movq	%%mm0, %%mm3\n\t"
"	movq	%%mm4, %%mm7\n\t"
"	pswapd	24(%%ecx), %%mm1\n\t"
"	pswapd	16(%%ecx), %%mm5\n\t"
"	pfadd	%%mm1, %%mm0\n\t"
"	pfadd	%%mm5, %%mm4\n\t"
"	movq	%%mm0, (%%edx)\n\t"
"	movq	%%mm4, 8(%%edx)\n\t"
"	pfsub	%%mm1, %%mm3\n\t"
"	pfsub	%%mm5, %%mm7\n\t"
"	pfmul	%%mm2, %%mm3\n\t"
"	pfmul	%%mm6, %%mm7\n\t"
"	pswapd	%%mm3, %%mm3\n\t"
"	pswapd	%%mm7, %%mm7\n\t"
"	movq	%%mm3, 24(%%edx)\n\t"
"	movq	%%mm7, 16(%%edx)\n\t"

"	movq	32(%%ecx), %%mm0\n\t"
"	movq	40(%%ecx), %%mm4\n\t"
"	movq	%%mm0, %%mm3\n\t"
"	movq	%%mm4, %%mm7\n\t"
"	pswapd	56(%%ecx), %%mm1\n\t"
"	pswapd	48(%%ecx), %%mm5\n\t"
"	pfadd	%%mm1, %%mm0\n\t"
"	pfadd	%%mm5, %%mm4\n\t"
"	movq	%%mm0, 32(%%edx)\n\t"
"	movq	%%mm4, 40(%%edx)\n\t"
"	pfsubr	%%mm1, %%mm3\n\t"
"	pfsubr	%%mm5, %%mm7\n\t"
"	pfmul	%%mm2, %%mm3\n\t"
"	pfmul	%%mm6, %%mm7\n\t"
"	pswapd	%%mm3, %%mm3\n\t"
"	pswapd	%%mm7, %%mm7\n\t"
"	movq	%%mm3, 56(%%edx)\n\t"
"	movq	%%mm7, 48(%%edx)\n\t"

"	movq	64(%%ecx), %%mm0\n\t"
"	movq	72(%%ecx), %%mm4\n\t"
"	movq	%%mm0, %%mm3\n\t"
"	movq	%%mm4, %%mm7\n\t"
"	pswapd	88(%%ecx), %%mm1\n\t"
"	pswapd	80(%%ecx), %%mm5\n\t"
"	pfadd	%%mm1, %%mm0\n\t"
"	pfadd	%%mm5, %%mm4\n\t"
"	movq	%%mm0, 64(%%edx)\n\t"
"	movq	%%mm4, 72(%%edx)\n\t"
"	pfsub	%%mm1, %%mm3\n\t"
"	pfsub	%%mm5, %%mm7\n\t"
"	pfmul	%%mm2, %%mm3\n\t"
"	pfmul	%%mm6, %%mm7\n\t"
"	pswapd	%%mm3, %%mm3\n\t"
"	pswapd	%%mm7, %%mm7\n\t"
"	movq	%%mm3, 88(%%edx)\n\t"
"	movq	%%mm7, 80(%%edx)\n\t"

"	movq	96(%%ecx), %%mm0\n\t"
"	movq	104(%%ecx), %%mm4\n\t"
"	movq	%%mm0, %%mm3\n\t"
"	movq	%%mm4, %%mm7\n\t"
"	pswapd	120(%%ecx), %%mm1\n\t"
"	pswapd	112(%%ecx), %%mm5\n\t"
"	pfadd	%%mm1, %%mm0\n\t"
"	pfadd	%%mm5, %%mm4\n\t"
"	movq	%%mm0, 96(%%edx)\n\t"
"	movq	%%mm4, 104(%%edx)\n\t"
"	pfsubr	%%mm1, %%mm3\n\t"
"	pfsubr	%%mm5, %%mm7\n\t"
"	pfmul	%%mm2, %%mm3\n\t"
"	pfmul	%%mm6, %%mm7\n\t"
"	pswapd	%%mm3, %%mm3\n\t"
"	pswapd	%%mm7, %%mm7\n\t"
"	movq	%%mm3, 120(%%edx)\n\t"
"	movq	%%mm7, 112(%%edx)\n\t"

/* Phase 5 */

"	movq	112(%%ebx), %%mm2\n\t"

"	movq	(%%edx), %%mm0\n\t"
"	movq	16(%%edx), %%mm4\n\t"
"	movq	%%mm0, %%mm3\n\t"
"	movq	%%mm4, %%mm7\n\t"
"	pswapd	8(%%edx), %%mm1\n\t"
"	pswapd	24(%%edx), %%mm5\n\t"
"	pfadd	%%mm1, %%mm0\n\t"
"	pfadd	%%mm5, %%mm4\n\t"
"	movq	%%mm0, (%%ecx)\n\t"
"	movq	%%mm4, 16(%%ecx)\n\t"
"	pfsub	%%mm1, %%mm3\n\t"
"	pfsubr	%%mm5, %%mm7\n\t"
"	pfmul	%%mm2, %%mm3\n\t"
"	pfmul	%%mm2, %%mm7\n\t"
"	pswapd	%%mm3, %%mm3\n\t"
"	pswapd	%%mm7, %%mm7\n\t"
"	movq	%%mm3, 8(%%ecx)\n\t"
"	movq	%%mm7, 24(%%ecx)\n\t"

"	movq	32(%%edx), %%mm0\n\t"
"	movq	48(%%edx), %%mm4\n\t"
"	movq	%%mm0, %%mm3\n\t"
"	movq	%%mm4, %%mm7\n\t"
"	pswapd	40(%%edx), %%mm1\n\t"
"	pswapd	56(%%edx), %%mm5\n\t"
"	pfadd	%%mm1, %%mm0\n\t"
"	pfadd	%%mm5, %%mm4\n\t"
"	movq	%%mm0, 32(%%ecx)\n\t"
"	movq	%%mm4, 48(%%ecx)\n\t"
"	pfsub	%%mm1, %%mm3\n\t"
"	pfsubr	%%mm5, %%mm7\n\t"
"	pfmul	%%mm2, %%mm3\n\t"
"	pfmul	%%mm2, %%mm7\n\t"
"	pswapd	%%mm3, %%mm3\n\t"
"	pswapd	%%mm7, %%mm7\n\t"
"	movq	%%mm3, 40(%%ecx)\n\t"
"	movq	%%mm7, 56(%%ecx)\n\t"

"	movq	64(%%edx), %%mm0\n\t"
"	movq	80(%%edx), %%mm4\n\t"
"	movq	%%mm0, %%mm3\n\t"
"	movq	%%mm4, %%mm7\n\t"
"	pswapd	72(%%edx), %%mm1\n\t"
"	pswapd	88(%%edx), %%mm5\n\t"
"	pfadd	%%mm1, %%mm0\n\t"
"	pfadd	%%mm5, %%mm4\n\t"
"	movq	%%mm0, 64(%%ecx)\n\t"
"	movq	%%mm4, 80(%%ecx)\n\t"
"	pfsub	%%mm1, %%mm3\n\t"
"	pfsubr	%%mm5, %%mm7\n\t"
"	pfmul	%%mm2, %%mm3\n\t"
"	pfmul	%%mm2, %%mm7\n\t"
"	pswapd	%%mm3, %%mm3\n\t"
"	pswapd	%%mm7, %%mm7\n\t"
"	movq	%%mm3, 72(%%ecx)\n\t"
"	movq	%%mm7, 88(%%ecx)\n\t"

"	movq	96(%%edx), %%mm0\n\t"
"	movq	112(%%edx), %%mm4\n\t"
"	movq	%%mm0, %%mm3\n\t"
"	movq	%%mm4, %%mm7\n\t"
"	pswapd	104(%%edx), %%mm1\n\t"
"	pswapd	120(%%edx), %%mm5\n\t"
"	pfadd	%%mm1, %%mm0\n\t"
"	pfadd	%%mm5, %%mm4\n\t"
"	movq	%%mm0, 96(%%ecx)\n\t"
"	movq	%%mm4, 112(%%ecx)\n\t"
"	pfsub	%%mm1, %%mm3\n\t"
"	pfsubr	%%mm5, %%mm7\n\t"
"	pfmul	%%mm2, %%mm3\n\t"
"	pfmul	%%mm2, %%mm7\n\t"
"	pswapd	%%mm3, %%mm3\n\t"
"	pswapd	%%mm7, %%mm7\n\t"
"	movq	%%mm3, 104(%%ecx)\n\t"
"	movq	%%mm7, 120(%%ecx)\n\t"


/* Phase 6. This is the end of easy road. */
/* Code below is coded in scalar mode. Should be optimized */

"	movd	"MANGLE(plus_1f)", %%mm6\n\t"
"	punpckldq 120(%%ebx), %%mm6\n\t"      /* mm6 = 1.0 | 120(%%ebx)*/
"	movq	"MANGLE(x_plus_minus_3dnow)", %%mm7\n\t" /* mm7 = +1 | -1 */

"	movq	32(%%ecx), %%mm0\n\t"
"	movq	64(%%ecx), %%mm2\n\t"
"	movq	%%mm0, %%mm1\n\t"
"	movq	%%mm2, %%mm3\n\t"
"	pxor	%%mm7, %%mm1\n\t"
"	pxor	%%mm7, %%mm3\n\t"
"	pfacc	%%mm1, %%mm0\n\t"
"	pfacc	%%mm3, %%mm2\n\t"
"	pfmul	%%mm6, %%mm0\n\t"
"	pfmul	%%mm6, %%mm2\n\t"
"	movq	%%mm0, 32(%%edx)\n\t"
"	movq	%%mm2, 64(%%edx)\n\t"

"	movd	44(%%ecx), %%mm0\n\t"
"	movd	40(%%ecx), %%mm2\n\t"
"	movd	120(%%ebx), %%mm3\n\t"
"	punpckldq 76(%%ecx), %%mm0\n\t"
"	punpckldq 72(%%ecx), %%mm2\n\t"
"	punpckldq %%mm3, %%mm3\n\t"
"	movq	%%mm0, %%mm4\n\t"
"	movq	%%mm2, %%mm5\n\t"
"	pfsub	%%mm2, %%mm0\n\t"
"	pfmul	%%mm3, %%mm0\n\t"
"	movq	%%mm0, %%mm1\n\t"
"	pfadd	%%mm5, %%mm0\n\t"
"	pfadd	%%mm4, %%mm0\n\t"
"	movq	%%mm0, %%mm2\n\t"
"	punpckldq %%mm1, %%mm0\n\t"
"	punpckhdq %%mm1, %%mm2\n\t"

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -