📄 dct64_3dnow.c
字号:
/*
* This code was taken from http://www.mpg123.org
* See ChangeLog of mpg123-0.59s-pre.1 for detail
* Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
* Partial 3dnow! optimization by Nick Kurshev
*
* TODO: optimize scalar 3dnow! code
* Warning: Phases 7 & 8 are not tested
*/
#define real float /* ugly - but only way */
#include "../config.h"
#include "../mangle.h"
static unsigned long long int attribute_used __attribute__((aligned(8))) x_plus_minus_3dnow = 0x8000000000000000ULL;
static float attribute_used plus_1f = 1.0;
void dct64_MMX_3dnow(short *a,short *b,real *c)
{
char tmp[256];
__asm __volatile(
" movl %2,%%eax\n\t"
" leal 128+%3,%%edx\n\t"
" movl %0,%%esi\n\t"
" movl %1,%%edi\n\t"
" movl $"MANGLE(costab_mmx)",%%ebx\n\t"
" leal %3,%%ecx\n\t"
/* Phase 1*/
" movq (%%eax), %%mm0\n\t"
" movq 8(%%eax), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
" movq 120(%%eax), %%mm1\n\t"
" movq 112(%%eax), %%mm5\n\t"
/* n.b.: pswapd*/
" movq %%mm1, %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm5\n\t"
" punpckldq %%mm2, %%mm1\n\t"
" punpckldq %%mm6, %%mm5\n\t"
/**/
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, (%%edx)\n\t"
" movq %%mm4, 8(%%edx)\n\t"
" pfsub %%mm1, %%mm3\n\t"
" pfsub %%mm5, %%mm7\n\t"
" pfmul (%%ebx), %%mm3\n\t"
" pfmul 8(%%ebx), %%mm7\n\t"
" movd %%mm3, 124(%%edx)\n\t"
" movd %%mm7, 116(%%edx)\n\t"
" psrlq $32, %%mm3\n\t"
" psrlq $32, %%mm7\n\t"
" movd %%mm3, 120(%%edx)\n\t"
" movd %%mm7, 112(%%edx)\n\t"
" movq 16(%%eax), %%mm0\n\t"
" movq 24(%%eax), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
" movq 104(%%eax), %%mm1\n\t"
" movq 96(%%eax), %%mm5\n\t"
/* n.b.: pswapd*/
" movq %%mm1, %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm5\n\t"
" punpckldq %%mm2, %%mm1\n\t"
" punpckldq %%mm6, %%mm5\n\t"
/**/
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 16(%%edx)\n\t"
" movq %%mm4, 24(%%edx)\n\t"
" pfsub %%mm1, %%mm3\n\t"
" pfsub %%mm5, %%mm7\n\t"
" pfmul 16(%%ebx), %%mm3\n\t"
" pfmul 24(%%ebx), %%mm7\n\t"
" movd %%mm3, 108(%%edx)\n\t"
" movd %%mm7, 100(%%edx)\n\t"
" psrlq $32, %%mm3\n\t"
" psrlq $32, %%mm7\n\t"
" movd %%mm3, 104(%%edx)\n\t"
" movd %%mm7, 96(%%edx)\n\t"
" movq 32(%%eax), %%mm0\n\t"
" movq 40(%%eax), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
" movq 88(%%eax), %%mm1\n\t"
" movq 80(%%eax), %%mm5\n\t"
/* n.b.: pswapd*/
" movq %%mm1, %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm5\n\t"
" punpckldq %%mm2, %%mm1\n\t"
" punpckldq %%mm6, %%mm5\n\t"
/**/
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 32(%%edx)\n\t"
" movq %%mm4, 40(%%edx)\n\t"
" pfsub %%mm1, %%mm3\n\t"
" pfsub %%mm5, %%mm7\n\t"
" pfmul 32(%%ebx), %%mm3\n\t"
" pfmul 40(%%ebx), %%mm7\n\t"
" movd %%mm3, 92(%%edx)\n\t"
" movd %%mm7, 84(%%edx)\n\t"
" psrlq $32, %%mm3\n\t"
" psrlq $32, %%mm7\n\t"
" movd %%mm3, 88(%%edx)\n\t"
" movd %%mm7, 80(%%edx)\n\t"
" movq 48(%%eax), %%mm0\n\t"
" movq 56(%%eax), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
" movq 72(%%eax), %%mm1\n\t"
" movq 64(%%eax), %%mm5\n\t"
/* n.b.: pswapd*/
" movq %%mm1, %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm5\n\t"
" punpckldq %%mm2, %%mm1\n\t"
" punpckldq %%mm6, %%mm5\n\t"
/**/
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 48(%%edx)\n\t"
" movq %%mm4, 56(%%edx)\n\t"
" pfsub %%mm1, %%mm3\n\t"
" pfsub %%mm5, %%mm7\n\t"
" pfmul 48(%%ebx), %%mm3\n\t"
" pfmul 56(%%ebx), %%mm7\n\t"
" movd %%mm3, 76(%%edx)\n\t"
" movd %%mm7, 68(%%edx)\n\t"
" psrlq $32, %%mm3\n\t"
" psrlq $32, %%mm7\n\t"
" movd %%mm3, 72(%%edx)\n\t"
" movd %%mm7, 64(%%edx)\n\t"
/* Phase 2*/
" movq (%%edx), %%mm0\n\t"
" movq 8(%%edx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
" movq 56(%%edx), %%mm1\n\t"
" movq 48(%%edx), %%mm5\n\t"
/* n.b.: pswapd*/
" movq %%mm1, %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm5\n\t"
" punpckldq %%mm2, %%mm1\n\t"
" punpckldq %%mm6, %%mm5\n\t"
/**/
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, (%%ecx)\n\t"
" movq %%mm4, 8(%%ecx)\n\t"
" pfsub %%mm1, %%mm3\n\t"
" pfsub %%mm5, %%mm7\n\t"
" pfmul 64(%%ebx), %%mm3\n\t"
" pfmul 72(%%ebx), %%mm7\n\t"
" movd %%mm3, 60(%%ecx)\n\t"
" movd %%mm7, 52(%%ecx)\n\t"
" psrlq $32, %%mm3\n\t"
" psrlq $32, %%mm7\n\t"
" movd %%mm3, 56(%%ecx)\n\t"
" movd %%mm7, 48(%%ecx)\n\t"
" movq 16(%%edx), %%mm0\n\t"
" movq 24(%%edx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
" movq 40(%%edx), %%mm1\n\t"
" movq 32(%%edx), %%mm5\n\t"
/* n.b.: pswapd*/
" movq %%mm1, %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm5\n\t"
" punpckldq %%mm2, %%mm1\n\t"
" punpckldq %%mm6, %%mm5\n\t"
/**/
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 16(%%ecx)\n\t"
" movq %%mm4, 24(%%ecx)\n\t"
" pfsub %%mm1, %%mm3\n\t"
" pfsub %%mm5, %%mm7\n\t"
" pfmul 80(%%ebx), %%mm3\n\t"
" pfmul 88(%%ebx), %%mm7\n\t"
" movd %%mm3, 44(%%ecx)\n\t"
" movd %%mm7, 36(%%ecx)\n\t"
" psrlq $32, %%mm3\n\t"
" psrlq $32, %%mm7\n\t"
" movd %%mm3, 40(%%ecx)\n\t"
" movd %%mm7, 32(%%ecx)\n\t"
/* Phase 3*/
" movq 64(%%edx), %%mm0\n\t"
" movq 72(%%edx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
" movq 120(%%edx), %%mm1\n\t"
" movq 112(%%edx), %%mm5\n\t"
/* n.b.: pswapd*/
" movq %%mm1, %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm5\n\t"
" punpckldq %%mm2, %%mm1\n\t"
" punpckldq %%mm6, %%mm5\n\t"
/**/
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 64(%%ecx)\n\t"
" movq %%mm4, 72(%%ecx)\n\t"
" pfsubr %%mm1, %%mm3\n\t"
" pfsubr %%mm5, %%mm7\n\t"
" pfmul 64(%%ebx), %%mm3\n\t"
" pfmul 72(%%ebx), %%mm7\n\t"
" movd %%mm3, 124(%%ecx)\n\t"
" movd %%mm7, 116(%%ecx)\n\t"
" psrlq $32, %%mm3\n\t"
" psrlq $32, %%mm7\n\t"
" movd %%mm3, 120(%%ecx)\n\t"
" movd %%mm7, 112(%%ecx)\n\t"
" movq 80(%%edx), %%mm0\n\t"
" movq 88(%%edx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
" movq 104(%%edx), %%mm1\n\t"
" movq 96(%%edx), %%mm5\n\t"
/* n.b.: pswapd*/
" movq %%mm1, %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm5\n\t"
" punpckldq %%mm2, %%mm1\n\t"
" punpckldq %%mm6, %%mm5\n\t"
/**/
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 80(%%ecx)\n\t"
" movq %%mm4, 88(%%ecx)\n\t"
" pfsubr %%mm1, %%mm3\n\t"
" pfsubr %%mm5, %%mm7\n\t"
" pfmul 80(%%ebx), %%mm3\n\t"
" pfmul 88(%%ebx), %%mm7\n\t"
" movd %%mm3, 108(%%ecx)\n\t"
" movd %%mm7, 100(%%ecx)\n\t"
" psrlq $32, %%mm3\n\t"
" psrlq $32, %%mm7\n\t"
" movd %%mm3, 104(%%ecx)\n\t"
" movd %%mm7, 96(%%ecx)\n\t"
/* Phase 4*/
" movq (%%ecx), %%mm0\n\t"
" movq 8(%%ecx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
" movq 24(%%ecx), %%mm1\n\t"
" movq 16(%%ecx), %%mm5\n\t"
/* n.b.: pswapd*/
" movq %%mm1, %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm5\n\t"
" punpckldq %%mm2, %%mm1\n\t"
" punpckldq %%mm6, %%mm5\n\t"
/**/
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, (%%edx)\n\t"
" movq %%mm4, 8(%%edx)\n\t"
" pfsub %%mm1, %%mm3\n\t"
" pfsub %%mm5, %%mm7\n\t"
" pfmul 96(%%ebx), %%mm3\n\t"
" pfmul 104(%%ebx), %%mm7\n\t"
" movd %%mm3, 28(%%edx)\n\t"
" movd %%mm7, 20(%%edx)\n\t"
" psrlq $32, %%mm3\n\t"
" psrlq $32, %%mm7\n\t"
" movd %%mm3, 24(%%edx)\n\t"
" movd %%mm7, 16(%%edx)\n\t"
" movq 32(%%ecx), %%mm0\n\t"
" movq 40(%%ecx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
" movq 56(%%ecx), %%mm1\n\t"
" movq 48(%%ecx), %%mm5\n\t"
/* n.b.: pswapd*/
" movq %%mm1, %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm5\n\t"
" punpckldq %%mm2, %%mm1\n\t"
" punpckldq %%mm6, %%mm5\n\t"
/**/
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 32(%%edx)\n\t"
" movq %%mm4, 40(%%edx)\n\t"
" pfsubr %%mm1, %%mm3\n\t"
" pfsubr %%mm5, %%mm7\n\t"
" pfmul 96(%%ebx), %%mm3\n\t"
" pfmul 104(%%ebx), %%mm7\n\t"
" movd %%mm3, 60(%%edx)\n\t"
" movd %%mm7, 52(%%edx)\n\t"
" psrlq $32, %%mm3\n\t"
" psrlq $32, %%mm7\n\t"
" movd %%mm3, 56(%%edx)\n\t"
" movd %%mm7, 48(%%edx)\n\t"
" movq 64(%%ecx), %%mm0\n\t"
" movq 72(%%ecx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
" movq 88(%%ecx), %%mm1\n\t"
" movq 80(%%ecx), %%mm5\n\t"
/* n.b.: pswapd*/
" movq %%mm1, %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm5\n\t"
" punpckldq %%mm2, %%mm1\n\t"
" punpckldq %%mm6, %%mm5\n\t"
/**/
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 64(%%edx)\n\t"
" movq %%mm4, 72(%%edx)\n\t"
" pfsub %%mm1, %%mm3\n\t"
" pfsub %%mm5, %%mm7\n\t"
" pfmul 96(%%ebx), %%mm3\n\t"
" pfmul 104(%%ebx), %%mm7\n\t"
" movd %%mm3, 92(%%edx)\n\t"
" movd %%mm7, 84(%%edx)\n\t"
" psrlq $32, %%mm3\n\t"
" psrlq $32, %%mm7\n\t"
" movd %%mm3, 88(%%edx)\n\t"
" movd %%mm7, 80(%%edx)\n\t"
" movq 96(%%ecx), %%mm0\n\t"
" movq 104(%%ecx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
" movq 120(%%ecx), %%mm1\n\t"
" movq 112(%%ecx), %%mm5\n\t"
/* n.b.: pswapd*/
" movq %%mm1, %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm5\n\t"
" punpckldq %%mm2, %%mm1\n\t"
" punpckldq %%mm6, %%mm5\n\t"
/**/
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 96(%%edx)\n\t"
" movq %%mm4, 104(%%edx)\n\t"
" pfsubr %%mm1, %%mm3\n\t"
" pfsubr %%mm5, %%mm7\n\t"
" pfmul 96(%%ebx), %%mm3\n\t"
" pfmul 104(%%ebx), %%mm7\n\t"
" movd %%mm3, 124(%%edx)\n\t"
" movd %%mm7, 116(%%edx)\n\t"
" psrlq $32, %%mm3\n\t"
" psrlq $32, %%mm7\n\t"
" movd %%mm3, 120(%%edx)\n\t"
" movd %%mm7, 112(%%edx)\n\t"
/* Phase 5 */
" movq (%%edx), %%mm0\n\t"
" movq 16(%%edx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
" movq 8(%%edx), %%mm1\n\t"
" movq 24(%%edx), %%mm5\n\t"
/* n.b.: pswapd*/
" movq %%mm1, %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm5\n\t"
" punpckldq %%mm2, %%mm1\n\t"
" punpckldq %%mm6, %%mm5\n\t"
/**/
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, (%%ecx)\n\t"
" movq %%mm4, 16(%%ecx)\n\t"
" pfsub %%mm1, %%mm3\n\t"
" pfsubr %%mm5, %%mm7\n\t"
" pfmul 112(%%ebx), %%mm3\n\t"
" pfmul 112(%%ebx), %%mm7\n\t"
" movd %%mm3, 12(%%ecx)\n\t"
" movd %%mm7, 28(%%ecx)\n\t"
" psrlq $32, %%mm3\n\t"
" psrlq $32, %%mm7\n\t"
" movd %%mm3, 8(%%ecx)\n\t"
" movd %%mm7, 24(%%ecx)\n\t"
" movq 32(%%edx), %%mm0\n\t"
" movq 48(%%edx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
" movq 40(%%edx), %%mm1\n\t"
" movq 56(%%edx), %%mm5\n\t"
/* n.b.: pswapd*/
" movq %%mm1, %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm5\n\t"
" punpckldq %%mm2, %%mm1\n\t"
" punpckldq %%mm6, %%mm5\n\t"
/**/
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 32(%%ecx)\n\t"
" movq %%mm4, 48(%%ecx)\n\t"
" pfsub %%mm1, %%mm3\n\t"
" pfsubr %%mm5, %%mm7\n\t"
" pfmul 112(%%ebx), %%mm3\n\t"
" pfmul 112(%%ebx), %%mm7\n\t"
" movd %%mm3, 44(%%ecx)\n\t"
" movd %%mm7, 60(%%ecx)\n\t"
" psrlq $32, %%mm3\n\t"
" psrlq $32, %%mm7\n\t"
" movd %%mm3, 40(%%ecx)\n\t"
" movd %%mm7, 56(%%ecx)\n\t"
" movq 64(%%edx), %%mm0\n\t"
" movq 80(%%edx), %%mm4\n\t"
" movq %%mm0, %%mm3\n\t"
" movq %%mm4, %%mm7\n\t"
" movq 72(%%edx), %%mm1\n\t"
" movq 88(%%edx), %%mm5\n\t"
/* n.b.: pswapd*/
" movq %%mm1, %%mm2\n\t"
" movq %%mm5, %%mm6\n\t"
" psrlq $32, %%mm1\n\t"
" psrlq $32, %%mm5\n\t"
" punpckldq %%mm2, %%mm1\n\t"
" punpckldq %%mm6, %%mm5\n\t"
/**/
" pfadd %%mm1, %%mm0\n\t"
" pfadd %%mm5, %%mm4\n\t"
" movq %%mm0, 64(%%ecx)\n\t"
" movq %%mm4, 80(%%ecx)\n\t"
" pfsub %%mm1, %%mm3\n\t"
" pfsubr %%mm5, %%mm7\n\t"
" pfmul 112(%%ebx), %%mm3\n\t"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -