⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 vdec_idctmmx.s

📁 vlc stand 0.1.99 ist sehr einfach
💻 S
📖 第 1 页 / 共 3 页
字号:
/***************************************************************************** * vdec_idctmmx.S : MMX IDCT implementation ***************************************************************************** * Copyright (C) 1999, 2000 VideoLAN * * Authors: * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. *  * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA. *****************************************************************************//* * the input data is tranposed and each 16 bit element in the 8x8 matrix * is left aligned: * for example in 11...1110000 format * If the iDCT is of I macroblock then 0.5 needs to be added to the;DC Component * (element[0][0] of the matrix) */.data        .align 16        .type    preSC,@objectpreSC:  .short  16384,22725,21407,19266,16384,12873,8867,4520        .short  22725,31521,29692,26722,22725,17855,12299,6270        .short  21407,29692,27969,25172,21407,16819,11585,5906        .short  19266,26722,25172,22654,19266,15137,10426,5315        .short  16384,22725,21407,19266,16384,12873,8867,4520        .short  12873,17855,16819,15137,25746,20228,13933,7103        .short  17734,24598,23170,20853,17734,13933,9597,4892        .short  18081,25080,23624,21261,18081,14206,9785,4988        .size    preSC,128        .align 8        .type   x0005000200010001,@object        .size   x0005000200010001,8x0005000200010001:        .long   0x00010001,0x00050002        .align 8        .type   x0040000000000000,@object        .size   x0040000000000000,8x0040000000000000:        .long   0, 0x00400000        .align 8        .type   x5a825a825a825a82,@object        .size   x5a825a825a825a82,8x5a825a825a825a82:        .long   0x5a825a82, 0x5a825a82        .align 8        .type   x539f539f539f539f,@object        .size   x539f539f539f539f,8x539f539f539f539f:        .long   0x539f539f,0x539f539f        .align 8        .type   x4546454645464546,@object        .size   x4546454645464546,8x4546454645464546:        .long   0x45464546,0x45464546        .align 8        .type   x61f861f861f861f8,@object        .size   x61f861f861f861f8,8x61f861f861f861f8:        .long   0x61f861f8,0x61f861f8        .align 8        .type    scratch1,@object        .size    scratch1,8scratch1:        .long 0,0        .align 8        .type    scratch3,@object        .size    scratch3,8scratch3:        .long 0,0        .align 8        .type    scratch5,@object        .size    scratch5,8scratch5:        .long 0,0        .align 8        .type    scratch7,@object        .size    scratch7,8scratch7:        .long 0,0        .type    x0,@object        .size    x0,8x0:        .long 0,0        .align 8.text        .align 4.globl vdec_IDCT        .type    vdec_IDCT,@functionvdec_IDCT:        pushl %ebp        movl %esp,%ebp        pushl %ebx        pushl %ecx        pushl %edx        pushl %esi        pushl %edi        leal preSC, %ecx        movl 12(%ebp),%esi        movq (%esi), %mm0        movq 8(%esi), %mm1        movq 16(%esi), %mm2        movq 24(%esi), %mm3        movq 32(%esi), %mm4        movq 40(%esi), %mm5        movq 48(%esi), %mm6        movq 56(%esi), %mm7        psllw $4, %mm0        psllw $4, %mm1        psllw $4, %mm2        psllw $4, %mm3        psllw $4, %mm4        psllw $4, %mm5        psllw $4, %mm6        psllw $4, %mm7        movq %mm0,  (%esi)        movq %mm1, 8(%esi)        movq %mm2,16(%esi)        movq %mm3,24(%esi)        movq %mm4,32(%esi)        movq %mm5,40(%esi)        movq %mm6,48(%esi)        movq %mm7,56(%esi)        movq 64(%esi), %mm0        movq 72(%esi), %mm1        movq 80(%esi), %mm2        movq 88(%esi), %mm3        movq 96(%esi), %mm4        movq 104(%esi), %mm5        movq 112(%esi), %mm6        movq 120(%esi), %mm7        psllw $4, %mm0        psllw $4, %mm1        psllw $4, %mm2        psllw $4, %mm3        psllw $4, %mm4        psllw $4, %mm5        psllw $4, %mm6        psllw $4, %mm7        movq %mm0,64(%esi)        movq %mm1,72(%esi)        movq %mm2,80(%esi)        movq %mm3,88(%esi)        movq %mm4,96(%esi)        movq %mm5,104(%esi)        movq %mm6,112(%esi)        movq %mm7,120(%esi)/* column 0: even part * use V4, V12, V0, V8 to produce V22..V25 */        movq 8*12(%ecx), %mm0   /* maybe the first mul can be done together */                                /* with the dequantization in iHuff module */        pmulhw 8*12(%esi), %mm0         /* V12 */        movq 8*4(%ecx), %mm1        pmulhw 8*4(%esi), %mm1		/* V4 */        movq (%ecx), %mm3        psraw $1, %mm0			/* t64=t66 */        pmulhw (%esi), %mm3		/* V0 */        movq 8*8(%ecx), %mm5		/* duplicate V4 */        movq %mm1, %mm2			/* added 11/1/96 */        pmulhw 8*8(%esi),%mm5		/* V8 */        psubsw %mm0, %mm1		/* V16 */        pmulhw x5a825a825a825a82, %mm1	/* 23170 ->V18 */        paddsw %mm0, %mm2		/* V17 */        movq %mm2, %mm0			/* duplicate V17 */        psraw $1, %mm2			/* t75=t82 */        psraw $2, %mm0			/* t72 */        movq %mm3, %mm4			/* duplicate V0 */        paddsw %mm5, %mm3		/* V19 */        psubsw %mm5, %mm4		/* V20 ;mm5 free *//* moved from the block below */        movq 8*10(%ecx), %mm7        psraw $1, %mm3			/* t74=t81 */        movq %mm3, %mm6			/* duplicate t74=t81 */        psraw $2, %mm4			/* t77=t79 */        psubsw %mm0, %mm1		/* V21 ; mm0 free */        paddsw %mm2, %mm3		/* V22 */        movq %mm1, %mm5			/* duplicate V21 */        paddsw %mm4, %mm1		/* V23 */        movq %mm3, 8*4(%esi)		/* V22 */        psubsw %mm5, %mm4		/* V24; mm5 free */        movq %mm1, 8*12(%esi)		/* V23 */        psubsw %mm2, %mm6		/* V25; mm2 free */        movq %mm4, (%esi)		/* V24 *//* keep mm6 alive all along the next block */        /* movq %mm6, 8*8(%esi) 	V25 *//* column 0: odd part * use V2, V6, V10, V14 to produce V31, V39, V40, V41 *//* moved above: movq 8*10(%ecx), %mm7 */        pmulhw 8*10(%esi), %mm7		/* V10 */        movq 8*6(%ecx), %mm0        pmulhw 8*6(%esi), %mm0		/* V6 */        movq 8*2(%ecx), %mm5        movq %mm7, %mm3			/* duplicate V10 */        pmulhw 8*2(%esi), %mm5		/* V2 */        movq 8*14(%ecx), %mm4        psubsw %mm0, %mm7		/* V26 */        pmulhw 8*14(%esi), %mm4		/* V14 */        paddsw %mm0, %mm3		/* V29 ; free mm0 */        movq %mm7, %mm1			/* duplicate V26 */        psraw $1, %mm3			/* t91=t94 */        pmulhw x539f539f539f539f,%mm7	/* V33 */        psraw $1, %mm1			/* t96 */        movq %mm5, %mm0			/* duplicate V2 */        psraw $2, %mm4			/* t85=t87 */        paddsw %mm4,%mm5		/* V27 */        psubsw %mm4, %mm0		/* V28 ; free mm4 */        movq %mm0, %mm2			/* duplicate V28 */        psraw $1, %mm5			/* t90=t93 */        pmulhw x4546454645464546,%mm0	/* V35 */        psraw $1, %mm2			/* t97 */        movq %mm5, %mm4			/* duplicate t90=t93 */        psubsw %mm2, %mm1		/* V32 ; free mm2 */        pmulhw x61f861f861f861f8,%mm1	/* V36 */        psllw $1, %mm7			/* t107 */        paddsw %mm3, %mm5		/* V31 */        psubsw %mm3, %mm4		/* V30 ; free mm3 */        pmulhw x5a825a825a825a82,%mm4	/* V34 */        nop        psubsw %mm1, %mm0		/* V38 */        psubsw %mm7, %mm1		/* V37 ; free mm7 */        psllw $1, %mm1			/* t114 *//* move from the next block */        movq %mm6, %mm3			/* duplicate V25 *//* move from the next block */        movq 8*4(%esi), %mm7		/* V22 */        psllw $1, %mm0			/* t110 */        psubsw %mm5, %mm0		/* V39 (mm5 needed for next block) */        psllw $2, %mm4			/* t112 *//* moved from the next block */        movq 8*12(%esi), %mm2		/* V23 */        psubsw %mm0, %mm4		/* V40 */        paddsw %mm4, %mm1		/* V41; free mm0 *//* moved from the next block */        psllw $1, %mm2			/* t117=t125 *//* column 0: output butterfly *//* moved above: * movq %mm6, %mm3			duplicate V25 * movq 8*4(%esi), %mm7			V22 * movq 8*12(%esi), %mm2		V23 * psllw $1, %mm2			t117=t125 */        psubsw %mm1, %mm6		/* tm6 */        paddsw %mm1, %mm3		/* tm8; free mm1 */        movq %mm7, %mm1			/* duplicate V22 */        paddsw %mm5, %mm7		/* tm0 */        movq %mm3, 8*8(%esi)		/* tm8; free mm3 */        psubsw %mm5, %mm1		/* tm14; free mm5 */        movq %mm6, 8*6(%esi)		/* tm6; free mm6 */        movq %mm2, %mm3			/* duplicate t117=t125 */        movq (%esi), %mm6		/* V24 */        paddsw %mm0, %mm2		/* tm2 */        movq %mm7, (%esi)		/* tm0; free mm7 */        psubsw %mm0, %mm3		/* tm12; free mm0 */        movq %mm1, 8*14(%esi)		/* tm14; free mm1 */        psllw $1, %mm6			/* t119=t123 */        movq %mm2, 8*2(%esi)		/* tm2; free mm2 */        movq %mm6, %mm0			/* duplicate t119=t123 */        movq %mm3, 8*12(%esi)		/* tm12; free mm3 */        paddsw %mm4, %mm6		/* tm4 *//* moved from next block */        movq 8*5(%ecx), %mm1

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -