⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 vdec_idctmmx.s

📁 vlc stand 0.1.99 ist sehr einfach
💻 S
📖 第 1 页 / 共 3 页
字号:
        psubsw %mm4, %mm0		/* tm10; free mm4 *//* moved from next block */        pmulhw 8*5(%esi), %mm1		/* V5 */        movq %mm6, 8*4(%esi)		/* tm4; free mm6 */        movq %mm0, 8*10(%esi)		/* tm10; free mm0 *//* column 1: even part * use V5, V13, V1, V9 to produce V56..V59 *//* moved to prev block: *	movq 8*5(%ecx), %mm1 *	pmulhw 8*5(%esi), %mm1		 V5 */        movq 8*13(%ecx), %mm7        psllw $1, %mm1			/* t128=t130 */        pmulhw 8*13(%esi), %mm7		/* V13 */        movq %mm1, %mm2			/* duplicate t128=t130 */        movq 8(%ecx), %mm3        pmulhw 8(%esi), %mm3		/* V1 */        movq 8*9(%ecx), %mm5        psubsw %mm7, %mm1		/* V50 */        pmulhw 8*9(%esi), %mm5		/* V9 */        paddsw %mm7, %mm2		/* V51 */        pmulhw x5a825a825a825a82, %mm1	/* 23170 ->V52 */        movq %mm2, %mm6			/* duplicate V51 */        psraw $1, %mm2			/* t138=t144 */        movq %mm3, %mm4			/* duplicate V1 */        psraw $2, %mm6			/* t136 */        paddsw %mm5, %mm3		/* V53 */        psubsw %mm5, %mm4		/* V54 ;mm5 free */        movq %mm3, %mm7			/* duplicate V53 *//* moved from next block */        movq 8*11(%ecx), %mm0        psraw $1, %mm4			/* t140=t142 */        psubsw %mm6, %mm1		/* V55 ; mm6 free */        paddsw %mm2, %mm3		/* V56 */        movq %mm4, %mm5			/* duplicate t140=t142 */        paddsw %mm1, %mm4		/* V57 */        movq %mm3, 8*5(%esi)		/* V56 */        psubsw %mm1, %mm5		/* V58; mm1 free */        movq %mm4, 8*13(%esi)		/* V57 */        psubsw %mm2, %mm7		/* V59; mm2 free */        movq %mm5, 8*9(%esi)		/* V58 *//* keep mm7 alive all along the next block *	movq %mm7, 8(%esi)		V59 * moved above *	movq 8*11(%ecx), %mm0 */        pmulhw 8*11(%esi), %mm0		/* V11 */        movq 8*7(%ecx), %mm6        pmulhw 8*7(%esi), %mm6		/* V7 */        movq 8*15(%ecx), %mm4        movq %mm0, %mm3			/* duplicate V11 */        pmulhw 8*15(%esi), %mm4		/* V15 */        movq 8*3(%ecx), %mm5        psllw $1, %mm6			/* t146=t152 */        pmulhw 8*3(%esi), %mm5		/* V3 */        paddsw %mm6, %mm0		/* V63 *//* note that V15 computation has a correction step: * this is a 'magic' constant that rebiases the results to be closer to the * expected result.  this magic constant can be refined to reduce the error * even more by doing the correction step in a later stage when the number * is actually multiplied by 16 */        paddw x0005000200010001, %mm4        psubsw %mm6, %mm3		/* V60 ; free mm6 */        psraw $1, %mm0			/* t154=t156 */        movq %mm3, %mm1			/* duplicate V60 */        pmulhw x539f539f539f539f, %mm1	/* V67 */        movq %mm5, %mm6			/* duplicate V3 */        psraw $2, %mm4			/* t148=t150 */        paddsw %mm4, %mm5		/* V61 */        psubsw %mm4, %mm6		/* V62 ; free mm4 */        movq %mm5, %mm4			/* duplicate V61 */        psllw $1, %mm1			/* t169 */        paddsw %mm0, %mm5		/* V65 -> result */        psubsw %mm0, %mm4		/* V64 ; free mm0 */        pmulhw x5a825a825a825a82, %mm4	/* V68 */        psraw $1, %mm3			/* t158 */        psubsw %mm6, %mm3		/* V66 */        movq %mm5, %mm2			/* duplicate V65 */        pmulhw x61f861f861f861f8, %mm3	/* V70 */        psllw $1, %mm6			/* t165 */        pmulhw x4546454645464546, %mm6	/* V69 */        psraw $1, %mm2			/* t172 *//* moved from next block */        movq 8*5(%esi), %mm0		/* V56 */        psllw $1, %mm4			/* t174 *//* moved from next block */        psraw $1, %mm0			/* t177=t188 */        nop        psubsw %mm3, %mm6		/* V72 */        psubsw %mm1, %mm3		/* V71 ; free mm1 */        psubsw %mm2, %mm6		/* V73 ; free mm2 *//* moved from next block */        psraw $1, %mm5			/* t178=t189 */        psubsw %mm6, %mm4		/* V74 *//* moved from next block */        movq %mm0, %mm1			/* duplicate t177=t188 */        paddsw %mm4, %mm3		/* V75 *//* moved from next block */        paddsw %mm5, %mm0		/* tm1 *//* location *  5 - V56 * 13 - V57 *  9 - V58 *  X - V59, mm7 *  X - V65, mm5 *  X - V73, mm6 *  X - V74, mm4 *  X - V75, mm3 * free mm0, mm1 & mm2 * moved above *	movq 8*5(%esi), %mm0		V56 *	psllw $1, %mm0			t177=t188 ! new !! *	psllw $1, %mm5			t178=t189 ! new !! *	movq %mm0, %mm1			duplicate t177=t188 *	paddsw %mm5, %mm0		tm1 */        movq 8*13(%esi), %mm2		/* V57 */        psubsw %mm5, %mm1		/* tm15; free mm5 */        movq %mm0, 8(%esi)		/* tm1; free mm0 */        psraw $1, %mm7			/* t182=t184 ! new !! *//* save the store as used directly in the transpose *	movq %mm1, 120(%esi)		tm15; free mm1 */        movq %mm7, %mm5			/* duplicate t182=t184 */        psubsw %mm3, %mm7		/* tm7 */        paddsw %mm3, %mm5		/* tm9; free mm3 */        movq 8*9(%esi), %mm0		/* V58 */        movq %mm2, %mm3			/* duplicate V57 */        movq %mm7, 8*7(%esi)		/* tm7; free mm7 */        psubsw %mm6, %mm3		/* tm13 */        paddsw %mm6, %mm2		/* tm3 ; free mm6 *//* moved up from the transpose */        movq %mm3, %mm7/* moved up from the transpose */        punpcklwd %mm1, %mm3        movq %mm0, %mm6			/* duplicate V58 */        movq %mm2, 8*3(%esi)		/* tm3; free mm2 */        paddsw %mm4, %mm0		/* tm5 */        psubsw %mm4, %mm6		/* tm11; free mm4 *//* moved up from the transpose */        punpckhwd %mm1, %mm7        movq %mm0, 8*5(%esi)		/* tm5; free mm0 *//* moved up from the transpose */        movq %mm5, %mm2/* transpose - M4 part *  ---------       --------- * | M1 | M2 |     | M1'| M3'| *  ---------  -->  --------- * | M3 | M4 |     | M2'| M4'| *  ---------       --------- * Two alternatives: use full mmword approach so the following code can be * scheduled before the transpose is done without stores, or use the faster * half mmword stores (when possible) */        movd %mm3, 8*9+4(%esi)		/* MS part of tmt9 */        punpcklwd %mm6, %mm5        movd %mm7, 8*13+4(%esi)		/* MS part of tmt13 */        punpckhwd %mm6, %mm2        movd %mm5, 8*9(%esi)		/* LS part of tmt9 */        punpckhdq %mm3, %mm5		/* free mm3 */        movd %mm2, 8*13(%esi)		/* LS part of tmt13 */        punpckhdq %mm7, %mm2		/* free mm7 *//* moved up from the M3 transpose */        movq 8*8(%esi), %mm0/* moved up from the M3 transpose */        movq 8*10(%esi), %mm1/* moved up from the M3 transpose */        movq %mm0, %mm3/* shuffle the rest of the data, and write it with 2 mmword writes */        movq %mm5, 8*11(%esi)		/* tmt11 *//* moved up from the M3 transpose */        punpcklwd %mm1, %mm0        movq %mm2, 8*15(%esi)		/* tmt15 *//* moved up from the M3 transpose */        punpckhwd %mm1, %mm3/* transpose - M3 part * moved up to previous code section *	movq 8*8(%esi), %mm0 *	movq 8*10(%esi), %mm1 *	movq %mm0, %mm3 *	punpcklwd %mm1, %mm0 *	punpckhwd %mm1, %mm3 */        movq 8*12(%esi), %mm6        movq 8*14(%esi), %mm4        movq %mm6, %mm2/* shuffle the data and write the lower parts of the transposed in 4 dwords */        punpcklwd %mm4, %mm6        movq %mm0, %mm1        punpckhdq %mm6, %mm1        movq %mm3, %mm7        punpckhwd %mm4, %mm2		/* free mm4 */        punpckldq %mm6, %mm0		/* free mm6 *//* moved from next block */        movq 8*13(%esi), %mm4		/* tmt13 */        punpckldq %mm2, %mm3        punpckhdq %mm2, %mm7		/* free mm2 *//* moved from next block */        movq %mm3, %mm5			/* duplicate tmt5 *//* column 1: even part (after transpose)* moved above*	movq %mm3, %mm5			duplicate tmt5*	movq 8*13(%esi), %mm4		tmt13*/        psubsw %mm4, %mm3		/* V134 */        pmulhw x5a825a825a825a82, %mm3	/* 23170 ->V136 */        movq 8*9(%esi), %mm6		/* tmt9 */        paddsw %mm4, %mm5		/* V135 ; mm4 free */        movq %mm0, %mm4			/* duplicate tmt1 */        paddsw %mm6, %mm0		/* V137 */        psubsw %mm6, %mm4		/* V138 ; mm6 free */        psllw $2, %mm3			/* t290 */        psubsw %mm5, %mm3		/* V139 */        movq %mm0, %mm6			/* duplicate V137 */        paddsw %mm5, %mm0		/* V140 */        movq %mm4, %mm2			/* duplicate V138 */        paddsw %mm3, %mm2		/* V141 */        psubsw %mm3, %mm4		/* V142 ; mm3 free */        movq %mm0, 8*9(%esi)		/* V140 */        psubsw %mm5, %mm6		/* V143 ; mm5 free *//* moved from next block */        movq 8*11(%esi), %mm0		/* tmt11 */        movq %mm2, 8*13(%esi)		/* V141 *//* moved from next block */        movq %mm0, %mm2			/* duplicate tmt11 *//* column 1: odd part (after transpose) *//* moved up to the prev block *	movq 8*11(%esi), %mm0		tmt11 *	movq %mm0, %mm2			duplicate tmt11 */        movq 8*15(%esi), %mm5		/* tmt15 */        psubsw %mm7, %mm0		/* V144 */        movq %mm0, %mm3			/* duplicate V144 */        paddsw %mm7, %mm2		/* V147 ; free mm7 */        pmulhw x539f539f539f539f, %mm0	/* 21407-> V151 */        movq %mm1, %mm7			/* duplicate tmt3 */        paddsw %mm5, %mm7		/* V145 */        psubsw %mm5, %mm1		/* V146 ; free mm5 */        psubsw %mm1, %mm3		/* V150 */        movq %mm7, %mm5			/* duplicate V145 */        pmulhw x4546454645464546, %mm1	/* 17734-> V153 */        psubsw %mm2, %mm5		/* V148 */        pmulhw x61f861f861f861f8, %mm3	/* 25080-> V154 */        psllw $2, %mm0			/* t311 */        pmulhw x5a825a825a825a82, %mm5	/* 23170-> V152 */        paddsw %mm2, %mm7		/* V149 ; free mm2 */        psllw $1, %mm1			/* t313 */        nop	/* without the nop - freeze here for one clock */        movq %mm3, %mm2			/* duplicate V154 */        psubsw %mm0, %mm3		/* V155 ; free mm0 */        psubsw %mm2, %mm1		/* V156 ; free mm2 *//* moved from the next block */        movq %mm6, %mm2			/* duplicate V143 *//* moved from the next block */        movq 8*13(%esi), %mm0		/* V141 */        psllw $1, %mm1			/* t315 */        psubsw %mm7, %mm1		/* V157 (keep V149) */        psllw $2, %mm5			/* t317 */        psubsw %mm1, %mm5		/* V158 */        psllw $1, %mm3			/* t319 */        paddsw %mm5, %mm3		/* V159 *//* column 1: output butterfly (after transform) * moved to the prev block *	movq %mm6, %mm2			duplicate V143 *	movq 8*13(%esi), %mm0		V141 */        psubsw %mm3, %mm2		/* V163 */        paddsw %mm3, %mm6		/* V164 ; free mm3 */        movq %mm4, %mm3			/* duplicate V142 */        psubsw %mm5, %mm4		/* V165 ; free mm5 */        movq %mm2, scratch7		/* out7 */        psraw $4, %mm6        psraw $4, %mm4        paddsw %mm5, %mm3		/* V162 */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -