📄 dct.cpp
字号:
* appear in this code nor any responsibility to update it. * * * Other brands and names are the property of their respective * owners. * * Copyright (c) 1997, Intel Corporation. All rights reserved. ***************************************************************************//* This implementation is based on an algorithm described in * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. * The primary algorithm described there uses 11 multiplies and 29 adds. * We use their alternate method with 12 multiplies and 32 adds. * The advantage of this method is that no data path contains more than one * multiplication; this allows a very simple and accurate implementation in * scaled fixed-point arithmetic, with a minimal number of shifts. */#if MMX_DCT_ENABLED/* This version will compile with the GNU compiler */#ifdef __GNUC__void domidct8x8llmW(short *inptr, short *quantptr, int *wsptr, u_char *outptr, int stride){ static u_int64_t fix_029_n089n196 __asm__("fix_029_n089n196") = 0x098ea46e098ea46eLL; static u_int64_t fix_n196_n089 __asm__("fix_n196_n089") = 0xc13be333c13be333LL; static u_int64_t fix_205_n256n039 __asm__("fix_205_n256n039") = 0x41b3a18141b3a181LL; static u_int64_t fix_n039_n256 __asm__("fix_n039_n256") = 0xf384adfdf384adfdLL; static u_int64_t fix_307n256_n196 __asm__("fix_307n256_n196") = 0x1051c13b1051c13bLL; static u_int64_t fix_n256_n196 __asm__("fix_n256_n196") = 0xadfdc13badfdc13bLL; static u_int64_t fix_150_n089n039 __asm__("fix_150_n089n039") = 0x300bd6b7300bd6b7LL; static u_int64_t fix_n039_n089 __asm__("fix_n039_n089") = 0xf384e333f384e333LL; static u_int64_t fix_117_117 __asm__("fix_117_117") = 0x25a125a125a125a1LL; static u_int64_t fix_054_054p076 __asm__("fix_054_054p076") = 0x115129cf115129cfLL; static u_int64_t fix_054n184_054 __asm__("fix_054n184_054") = 0xd6301151d6301151LL; static u_int64_t fix_054n184 __asm__("fix_054n184") = 0xd630d630d630d630LL; static u_int64_t fix_054 __asm__("fix_054") = 0x1151115111511151LL; static u_int64_t fix_054p076 __asm__("fix_054p076") = 0x29cf29cf29cf29cfLL; static u_int64_t fix_n196p307n256 __asm__("fix_n196p307n256") = 0xd18cd18cd18cd18cLL; static u_int64_t fix_n089n039p150 __asm__("fix_n089n039p150") = 0x06c206c206c206c2LL; static u_int64_t fix_n256 __asm__("fix_n256") = 0xadfdadfdadfdadfdLL; static u_int64_t fix_n039 __asm__("fix_n039") = 0xf384f384f384f384LL; static u_int64_t fix_n256n039p205 __asm__("fix_n256n039p205") = 0xe334e334e334e334LL; static u_int64_t fix_n196 __asm__("fix_n196") = 0xc13bc13bc13bc13bLL; static u_int64_t fix_n089 __asm__("fix_n089") = 0xe333e333e333e333LL; static u_int64_t fixn089n196p029 __asm__("fixn089n196p029") = 0xadfcadfcadfcadfcLL; static u_int64_t const_0x2xx8 __asm__("const_0x2xx8") = 0x0000010000000100LL; static u_int64_t const_0x0808 __asm__("const_0x0808") = 0x0808080808080808LL; __asm__ __volatile__( "#quantptr is in %%edi\n" "#inptr is in %%ebx\n" "#wsptr is in %%esi\n" "#outptr is in 20(%%ebp)\n" "#stride is in 24(%%ebp)\n" "addl $0x07,%%esi #align wsptr to qword\n" "andl $0xfffffff8,%%esi #align wsptr to qword\n" "movl %%esi,%%eax\n" "movq 8*4(%%ebx),%%mm0 #p1(1,0)\n" "pmullw 8*4(%%edi),%%mm0 #p1(1,1)\n" "movq 8*12(%%ebx),%%mm1 #p1(2,0)\n" "pmullw 8*12(%%edi),%%mm1 #p1(2,1)\n" "movq 8*0(%%ebx),%%mm6 #p1(5,0)\n" "pmullw 8*0(%%edi),%%mm6 #p1(5,1)\n" "movq %%mm0,%%mm2 #p1(3,0)\n" "movq 8*8(%%ebx),%%mm7 #p1(6,0)\n" "punpcklwd %%mm1,%%mm0 #p1(3,1)\n" "pmullw 8*8(%%edi),%%mm7 #p1(6,1)\n" "movq %%mm0,%%mm4 #p1(3,2)\n" "punpckhwd %%mm1,%%mm2 #p1(3,4)\n" "pmaddwd fix_054n184_054,%%mm0 #p1(3,3)\n" "movq %%mm2,%%mm5 #p1(3,5)\n" "pmaddwd fix_054n184_054,%%mm2 #p1(3,6)\n" "pxor %%mm1,%%mm1 #p1(7,0)\n" "pmaddwd fix_054_054p076,%%mm4 #p1(4,0)\n" "punpcklwd %%mm6,%%mm1 #p1(7,1)\n" "pmaddwd fix_054_054p076,%%mm5 #p1(4,1)\n" "psrad $3,%%mm1 #p1(7,2)\n" "pxor %%mm3,%%mm3 #p1(7,3)\n" "punpcklwd %%mm7,%%mm3 #p1(7,4)\n" "psrad $3,%%mm3 #p1(7,5)\n" "paddd %%mm3,%%mm1 #p1(7,6)\n" "movq %%mm1,%%mm3 #p1(7,7)\n" "paddd %%mm4,%%mm1 #p1(7,8)\n" "psubd %%mm4,%%mm3 #p1(7,9)\n" "movq %%mm1,8*16(%%esi) #p1(7,10)\n" "pxor %%mm4,%%mm4 #p1(7,12)\n" "movq %%mm3,8*22(%%esi) #p1(7,11)\n" "punpckhwd %%mm6,%%mm4 #p1(7,13)\n" "psrad $3,%%mm4 #p1(7,14)\n" "pxor %%mm1,%%mm1 #p1(7,15)\n" "punpckhwd %%mm7,%%mm1 #p1(7,16)\n" "psrad $3,%%mm1 #p1(7,17)\n" "paddd %%mm1,%%mm4 #p1(7,18)\n" "movq %%mm4,%%mm3 #p1(7,19)\n" "pxor %%mm1,%%mm1 #p1(8,0)\n" "paddd %%mm5,%%mm3 #p1(7,20)\n" "punpcklwd %%mm6,%%mm1 #p1(8,1)\n" "psubd %%mm5,%%mm4 #p1(7,21)\n" "psrad $3,%%mm1 #p1(8,2)\n" "movq %%mm3,8*17(%%esi) #p1(7,22)\n" "pxor %%mm5,%%mm5 #p1(8,3)\n" "movq %%mm4,8*23(%%esi) #p1(7,23)\n" "punpcklwd %%mm7,%%mm5 #p1(8,4)\n" "psrad $3,%%mm5 #p1(8,5)\n" "pxor %%mm4,%%mm4 #p1(8,12)\n" "psubd %%mm5,%%mm1 #p1(8,6)\n" "punpckhwd %%mm6,%%mm4 #p1(8,13)\n" "movq %%mm1,%%mm3 #p1(8,7)\n" "psrad $3,%%mm4 #p1(8,14)\n" "paddd %%mm0,%%mm1 #p1(8,8)\n" "pxor %%mm5,%%mm5 #p1(8,15)\n" "psubd %%mm0,%%mm3 #p1(8,9)\n" "movq 8*14(%%ebx),%%mm0 #p1(9,0)\n" "punpckhwd %%mm7,%%mm5 #p1(8,16)\n" "pmullw 8*14(%%edi),%%mm0 #p1(9,1)\n" "movq %%mm1,8*18(%%esi) #p1(8,10)\n" "psrad $3,%%mm5 #p1(8,17)\n" "movq %%mm3,8*20(%%esi) #p1(8,11)\n" "psubd %%mm5,%%mm4 #p1(8,18)\n" "movq %%mm4,%%mm3 #p1(8,19)\n" "movq 8*6(%%ebx),%%mm1 #p1(10,0)\n" "paddd %%mm2,%%mm3 #p1(8,20)\n" "pmullw 8*6(%%edi),%%mm1 #p1(10,1)\n" "psubd %%mm2,%%mm4 #p1(8,21)\n" "movq %%mm0,%%mm5 #p1(11,1)\n" "movq %%mm4,8*21(%%esi) #p1(8,23)\n" "movq %%mm3,8*19(%%esi) #p1(8,22)\n" "movq %%mm0,%%mm4 #p1(11,0)\n" "punpcklwd %%mm1,%%mm4 #p1(11,2)\n" "movq 8*10(%%ebx),%%mm2 #p1(12,0)\n" "punpckhwd %%mm1,%%mm5 #p1(11,4)\n" "pmullw 8*10(%%edi),%%mm2 #p1(12,1)\n" "movq 8*2(%%ebx),%%mm3 #p1(13,0)\n" "pmullw 8*2(%%edi),%%mm3 #p1(13,1)\n" "movq %%mm2,%%mm6 #p1(14,0)\n" "pmaddwd fix_117_117,%%mm4 #p1(11,3)\n" "movq %%mm2,%%mm7 #p1(14,1)\n" "pmaddwd fix_117_117,%%mm5 #p1(11,5)\n" "punpcklwd %%mm3,%%mm6 #p1(14,2)\n" "pmaddwd fix_117_117,%%mm6 #p1(14,3)\n" "punpckhwd %%mm3,%%mm7 #p1(14,4)\n" "pmaddwd fix_117_117,%%mm7 #p1(14,5)\n" "paddd %%mm6,%%mm4 #p1(15,0)\n" "paddd %%mm7,%%mm5 #p1(15,1)\n" "movq %%mm4,8*24(%%esi) #p1(15,2)\n" "movq %%mm5,8*25(%%esi) #p1(15,3)\n" "movq %%mm0,%%mm6 #p1(16,0)\n" "movq %%mm3,%%mm7 #p1(16,3)\n" "punpcklwd %%mm2,%%mm6 #p1(16,1)\n" "punpcklwd %%mm3,%%mm7 #p1(16,4)\n" "pmaddwd fix_n039_n089,%%mm6 #p1(16,2)\n" "pmaddwd fix_150_n089n039,%%mm7 #p1(16,5)\n" "movq %%mm0,%%mm4 #p1(16,12)\n" "paddd 8*24(%%esi),%%mm6 #p1(16,6)\n" "punpckhwd %%mm2,%%mm4 #p1(16,13)\n" "paddd %%mm7,%%mm6 #p1(16,7)\n" "pmaddwd fix_n039_n089,%%mm4 #p1(16,14)\n" "movq %%mm6,%%mm7 #p1(16,8)\n" "paddd 8*25(%%esi),%%mm4 #p1(16,18)\n" "movq %%mm3,%%mm5 #p1(16,15)\n" "paddd 8*16(%%esi),%%mm6 #p1(16,9)\n" "punpckhwd %%mm3,%%mm5 #p1(16,16)\n" "paddd const_0x2xx8,%%mm6 #p1(16,10)\n" "psrad $9,%%mm6 #p1(16,11)\n" "pmaddwd fix_150_n089n039,%%mm5 #p1(16,17)\n" "paddd %%mm5,%%mm4 #p1(16,19)\n" "movq %%mm4,%%mm5 #p1(16,20)\n" "paddd 8*17(%%esi),%%mm4 #p1(16,21)\n" "paddd const_0x2xx8,%%mm4 #p1(16,22)\n" "psrad $9,%%mm4 #p1(16,23)\n" "packssdw %%mm4,%%mm6 #p1(16,24)\n" "movq %%mm6,8*0(%%esi) #p1(16,25)\n" "movq 8*16(%%esi),%%mm4 #p1(16,26)\n" "psubd %%mm7,%%mm4 #p1(16,27)\n" "movq 8*17(%%esi),%%mm6 #p1(16,30)\n" "paddd const_0x2xx8,%%mm4 #p1(16,28)\n" "movq %%mm1,%%mm7 #p1(17,3)\n" "psrad $9,%%mm4 #p1(16,29)\n" "psubd %%mm5,%%mm6 #p1(16,31)\n" "paddd const_0x2xx8,%%mm6 #p1(16,32)\n" "punpcklwd %%mm1,%%mm7 #p1(17,4)\n" "pmaddwd fix_307n256_n196,%%mm7 #p1(17,5)\n" "psrad $9,%%mm6 #p1(16,33)\n" "packssdw %%mm6,%%mm4 #p1(16,34)\n" "movq %%mm4,8*14(%%esi) #p1(16,35)\n" "movq %%mm0,%%mm6 #p1(17,0)\n" "movq %%mm0,%%mm4 #p1(17,12)\n" "punpcklwd %%mm2,%%mm6 #p1(17,1)\n" "punpckhwd %%mm2,%%mm4 #p1(17,13)\n" "pmaddwd fix_n256_n196,%%mm6 #p1(17,2)\n" "movq %%mm1,%%mm5 #p1(17,15)\n" "paddd 8*24(%%esi),%%mm6 #p1(17,6)\n" "punpckhwd %%mm1,%%mm5 #p1(17,16)\n" "paddd %%mm7,%%mm6 #p1(17,7)\n" "pmaddwd fix_n256_n196,%%mm4 #p1(17,14)\n" "movq %%mm6,%%mm7 #p1(17,8)\n" "pmaddwd fix_307n256_n196,%%mm5 #p1(17,17)\n" "paddd 8*18(%%esi),%%mm6 #p1(17,9)\n" "paddd const_0x2xx8,%%mm6 #p1(17,10)\n" "psrad $9,%%mm6 #p1(17,11)\n" "paddd 8*25(%%esi),%%mm4 #p1(17,18)\n" "paddd %%mm5,%%mm4 #p1(17,19)\n" "movq %%mm4,%%mm5 #p1(17,20)\n" "paddd 8*19(%%esi),%%mm4 #p1(17,21)\n" "paddd const_0x2xx8,%%mm4 #p1(17,22)\n" "psrad $9,%%mm4 #p1(17,23)\n" "packssdw %%mm4,%%mm6 #p1(17,24)\n" "movq %%mm6,8*2(%%esi) #p1(17,25)\n" "movq 8*18(%%esi),%%mm4 #p1(17,26)\n" "movq 8*19(%%esi),%%mm6 #p1(17,30)\n" "psubd %%mm7,%%mm4 #p1(17,27)\n" "paddd const_0x2xx8,%%mm4 #p1(17,28)\n" "psubd %%mm5,%%mm6 #p1(17,31)\n" "psrad $9,%%mm4 #p1(17,29)\n" "paddd const_0x2xx8,%%mm6 #p1(17,32)\n" "psrad $9,%%mm6 #p1(17,33)\n" "movq %%mm2,%%mm7 #p1(18,3)\n" "packssdw %%mm6,%%mm4 #p1(17,34)\n" "movq %%mm4,8*12(%%esi) #p1(17,35)\n" "movq %%mm1,%%mm6 #p1(18,0)\n" "punpcklwd %%mm2,%%mm7 #p1(18,4)\n" "punpcklwd %%mm3,%%mm6 #p1(18,1)\n" "pmaddwd fix_205_n256n039,%%mm7 #p1(18,5)\n" "pmaddwd fix_n039_n256,%%mm6 #p1(18,2)\n" "movq %%mm1,%%mm4 #p1(18,12)\n" "paddd 8*24(%%esi),%%mm6 #p1(18,6)\n" "punpckhwd %%mm3,%%mm4 #p1(18,13)\n" "paddd %%mm7,%%mm6 #p1(18,7)\n" "pmaddwd fix_n039_n256,%%mm4 #p1(18,14)\n" "movq %%mm6,%%mm7 #p1(18,8)\n" "movq %%mm2,%%mm5 #p1(18,15)\n" "paddd 8*20(%%esi),%%mm6 #p1(18,9)\n" "punpckhwd %%mm2,%%mm5 #p1(18,16)\n" "paddd const_0x2xx8,%%mm6 #p1(18,10)\n" "psrad $9,%%mm6 #p1(18,11)\n" "pmaddwd fix_205_n256n039,%%mm5 #p1(18,17)\n" "paddd 8*25(%%esi),%%mm4 #p1(18,18)\n" "paddd %%mm5,%%mm4 #p1(18,19)\n" "movq %%mm4,%%mm5 #p1(18,20)\n" "paddd 8*21(%%esi),%%mm4 #p1(18,21)\n" "paddd const_0x2xx8,%%mm4 #p1(18,22)\n" "psrad $9,%%mm4 #p1(18,23)\n" "packssdw %%mm4,%%mm6 #p1(18,24)\n" "movq %%mm6,8*4(%%esi) #p1(18,25)\n" "movq 8*20(%%esi),%%mm4 #p1(18,26)\n" "psubd %%mm7,%%mm4 #p1(18,27)\n" "paddd const_0x2xx8,%%mm4 #p1(18,28)\n" "movq %%mm0,%%mm7 #p1(19,3)\n" "psrad $9,%%mm4 #p1(18,29)\n" "movq 8*21(%%esi),%%mm6 #p1(18,30)\n" "psubd %%mm5,%%mm6 #p1(18,31)\n" "punpcklwd %%mm0,%%mm7 #p1(19,4)\n" "paddd const_0x2xx8,%%mm6 #p1(18,32)\n" "psrad $9,%%mm6 #p1(18,33)\n" "pmaddwd fix_029_n089n196,%%mm7 #p1(19,5)\n" "packssdw %%mm6,%%mm4 #p1(18,34)\n" "movq %%mm4,8*10(%%esi) #p1(18,35)\n" "movq %%mm3,%%mm6 #p1(19,0)\n" "punpcklwd %%mm1,%%mm6 #p1(19,1)\n" "movq %%mm0,%%mm5 #p1(19,15)\n" "pmaddwd fix_n196_n089,%%mm6 #p1(19,2)\n" "punpckhwd %%mm0,%%mm5 #p1(19,16)\n" "paddd 8*24(%%esi),%%mm6 #p1(19,6)\n" "movq %%mm3,%%mm4 #p1(19,12)\n" "paddd %%mm7,%%mm6 #p1(19,7)\n" "punpckhwd %%mm1,%%mm4 #p1(19,13)\n"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -