⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 idct_llm_mmx.asm

📁 由bmp生成mpeg2 的I_frame 数据
💻 ASM
📖 第 1 页 / 共 2 页
字号:
; mm4 - o1_o2
; mm6 - o3_o4
;
		movq       mm1, mm0
		movq       mm3, mm2
		paddd      mm0, mm6           ; w[1]_w[0]
		paddd      mm2, mm4           ; w[3]_w[2]
		psubd      mm1, mm6           ; w[6]_w[7]
		psubd      mm3, mm4           ; w[4]_w[5]
		movq       mm5, mm1           ; w[6]_w[7]
		movq       mm7, mm3           ; w[4]_w[5]
		psllq      mm7, 32            ; w[5]_____
		psllq      mm5, 32            ; w[7]_____
		punpckhdq  mm3, mm7           ; w[5]_w[4]
		punpckhdq  mm1, mm5           ; w[7]_w[6]
		paddd      mm0, half_10bit
		paddd      mm1, half_10bit
		paddd      mm2, half_10bit
		paddd      mm3, half_10bit
		psrad      mm0, 10
		psrad      mm1, 10
		psrad      mm2, 10
		psrad      mm3, 10
		movq       [edi   ], mm0
		movq       [edi+ 8], mm2
		movq       [edi+16], mm3
		movq       [edi+24], mm1
;add 6, sub 2, psxl 4, psxa 4,
;-------------------------------------------------------------------
; 廔抂僠僃僢僋
idct_llm_mmx_raw_last:
		lea        esi, [esi+16]
		lea        edi, [edi+32]
		test       ecx, ecx
		jnz        idct_llm_mmx_raw_loop
;-------------------------------------------------------------------
; 擖弌椡僷儔儊乕僞廋惓

		mov        edi, esi
		sub        edi, 128
		lea        esi, [esp+36]
		mov        ecx, 4
;-------------------------------------------------------------------
; IDCT_COL
idct_llm_mmx_col_loop:
		dec        ecx
;-------------------------------------------------------------------
; 戞侾抜 婏悢崁傪寁嶼偡傞
;
;		w0 = w[7*8];
;		w1 = w[5*8];
;		w2 = w[3*8];
;		w3 = w[1*8];
;
;		z1 = w0 + w3;
;		z2 = w1 + w2;
;		z3 = w0 + w2;
;		z4 = w1 + w3;
;		z5 = (z3 + z4) * FIX_1_175875602;
;
;		w0 *= FIX_0_298631336;
;		w1 *= FIX_2_053119869;
;		w2 *= FIX_3_072711026;
;		w3 *= FIX_1_501321110;
;		z1 *= (- FIX_0_899976223);
;		z2 *= (- FIX_2_562915447);
;		z3 *= (- FIX_1_961570560);
;		Z4 *= (- FIX_0_390180644);
;
;		z3 += z5;
;		z4 += z5;
;
;		w0 += z1 + z3;
;		w1 += z2 + z4;
;		w2 += z2 + z3;
;		w3 += z1 + z4;
;
		movq       mm0, [esi+7*32]
		movq       mm1, [esi+5*32]
		movq       mm2, [esi+3*32]
		movq       mm3, [esi+1*32]

		movq       mm4, mm0
		movq       mm5, mm1
		paddd      mm4, mm3           ; z1
		paddd      mm5, mm2           ; z2
		movq       mm6, mm0
		movq       mm7, mm1
		paddd      mm6, mm2           ; z3
		paddd      mm7, mm3           ; z4
		pmaddwd    mm0, p0_298631336
		pmaddwd    mm1, p2_053119869
		pmaddwd    mm4, m0_899976223
		pmaddwd    mm5, m2_562915447
		pmaddwd    mm2, p3_072711026
		pmaddwd    mm3, p1_501321110
		paddd      mm0, mm4
		paddd      mm1, mm5
		paddd      mm2, mm5
		paddd      mm3, mm4
		movq       mm5, mm6           ; z5
		paddd      mm5, mm7
		pmaddwd    mm5, p1_175875602
		pmaddwd    mm6, m1_961570560
		pmaddwd    mm7, m0_390180644
		paddd      mm6, mm5
		paddd      mm7, mm5
		paddd      mm0, mm6
		paddd      mm1, mm7
		paddd      mm2, mm6
		paddd      mm3, mm7
		movq       [esp+ 4], mm0
		movq       [esp+12], mm1
		movq       [esp+20], mm2
		movq       [esp+28], mm3
;-------------------------------------------------------------------
; 戞俀抜 嬼悢崁傪寁嶼偡傞
;
;		z2 = w[2*8];
;		z3 = w[6*8];
;
;		z1 = (z2+z3) * FIX_0_541196100;
;		w2 = z1 + (z3 * (- FIX_1_847759065));
;		w3 = z1 + (z2 * FIX_0_765366865);
;
;		w0 = (w[0*8] + w[4*8]) << 13;
;		w1 = (w[0*8] - w[4*8]) << 13;
;
;		w4 = w0 + w3;
;		w7 = w0 - w3;
;		w5 = w1 + w2;
;		w6 = w1 - w2;
;
		movq       mm0, [esi]         ;
		movq       mm2, [esi+32*2]    ;
		movq       mm4, [esi+32*4]    ;
		movq       mm6, [esi+32*6]    ;
		movq       mm3, mm2           ; z1
		movq       mm5, mm0           ; 
		paddd      mm0, mm4           ; 
		psubd      mm5, mm4
		paddd      mm3, mm6
		pslld      mm0, 13            ; w0
		pslld      mm5, 13            ; w1
		pmaddwd    mm2, p0_765366865
		pmaddwd    mm6, m1_847759065
		pmaddwd    mm3, p0_541196100
		paddd      mm2, mm3           ; w3
		paddd      mm6, mm3           ; w2
		movq       mm7, mm0           ; w0
		movq       mm3, mm5           ; w1
		paddd      mm0, mm2           ; w4
		paddd      mm3, mm6           ; w5
		psubd      mm7, mm2           ; w7
		psubd      mm5, mm6           ; w6
		movq       mm1, mm0
		movq       mm2, mm3
		movq       mm4, mm5
		movq       mm6, mm7
;-------------------------------------------------------------------
; 戞俁抜 婏悢崁偲嬼悢崁偐傜嵟廔弌椡傪寁嶼偟偰尦偺 block 偵栠偡
;
;		(w4+w3) >> 20;
;		(w5+w2) >> 20;
;		(w6+w1) >> 20;
;		(w7+w0) >> 20;
;		(w7-w0) >> 20;
;		(w6-w1) >> 20;
;		(w5-w2) >> 20;
;		(w4-w3) >> 20;		
;
		paddd      mm6, [esp+ 4]      ; d[3]
		paddd      mm4, [esp+12]      ; d[2]
		paddd      mm2, [esp+20]      ; d[1]
		paddd      mm0, [esp+28]      ; d[0]
		psubd      mm7, [esp+ 4]      ; d[4]
		psubd      mm5, [esp+12]      ; d[5]
		psubd      mm3, [esp+20]      ; d[6]
		psubd      mm1, [esp+28]      ; d[7]
		paddd      mm6, half_19bit
		paddd      mm4, half_19bit
		paddd      mm2, half_19bit
		paddd      mm0, half_19bit
		paddd      mm7, half_19bit
		paddd      mm5, half_19bit
		paddd      mm3, half_19bit
		paddd      mm1, half_19bit
		psrad      mm6, 19
		psrad      mm4, 19
		psrad      mm2, 19
		psrad      mm0, 19
		psrad      mm7, 19
		psrad      mm5, 19
		psrad      mm3, 19
		psrad      mm1, 19
		packssdw   mm0, mm7           ; d[4]_d[0]
		packssdw   mm2, mm5           ; d[5]_d[1]
		packssdw   mm4, mm3           ; d[6]_d[2]
		packssdw   mm6, mm1           ; d[7]_d[3]
		movd       [edi     ], mm0
		movd       [edi+16*1], mm2
		movd       [edi+16*2], mm4
		movd       [edi+16*3], mm6
		psrlq      mm0, 32
		psrlq      mm2, 32
		psrlq      mm4, 32
		psrlq      mm6, 32
		movd       [edi+16*4], mm0
		movd       [edi+16*5], mm2
		movd       [edi+16*6], mm4
		movd       [edi+16*7], mm6
;-------------------------------------------------------------------
; 廔抂僠僃僢僋
		lea        esi, [esi+8]
		lea        edi, [edi+4]
		test       ecx, ecx
		jnz        idct_llm_mmx_col_loop
;-------------------------------------------------------------------
; 屻巒枛
		add        esp, 292
		add        esp, eax

		pop        ebx
		pop        eax
		pop        ecx
		pop        edi
		pop        esi

		ret        4
;-------------------------------------------------------------------
_idct_llm_mmx@4 ENDP
;-------------------------------------------------------------------
; 廔椆

END

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -