📄 idct_ap922_sse2.asm
字号:
psrad xmm0, 11
paddd xmm5, xmm6
packssdw xmm0, xmm2
paddd xmm4, xmm7
movdqa xmm6, xmm5
psubd xmm6, xmm4
psrad xmm6, 11
paddd xmm4, xmm5
psrad xmm4, 11
pshufd xmm6, xmm6, 01Bh
packssdw xmm4, xmm6
movdqa [esi+4*16], xmm0
movdqa [esi+6*16], xmm4
; row 3, row 1
movdqa xmm0, [esi+3*16]
lea eax, table_35
movdqa xmm4, [esi+1*16]
lea ebx, table_17
pshuflw xmm0, xmm0, 0D8h
pshufhw xmm0, xmm0, 0D8h
pshufd xmm3, xmm0, 055h
pshufd xmm1, xmm0, 0
pshufd xmm2, xmm0, 0AAh
pshufd xmm0, xmm0, 0FFh
pmaddwd xmm1, [eax]
pmaddwd xmm2, [eax+16]
pmaddwd xmm3, [eax+32]
pmaddwd xmm0, [eax+48]
paddd xmm0, xmm3
pshuflw xmm4, xmm4, 0D8h
pshufhw xmm4, xmm4, 0D8h
movdqa xmm7, half11
paddd xmm1, xmm7
pshufd xmm6, xmm4, 0AAh
pshufd xmm5, xmm4, 0
pmaddwd xmm5, [ebx]
paddd xmm5, xmm7
pmaddwd xmm6, [ebx+16]
pshufd xmm7, xmm4, 055h
pmaddwd xmm7, [ebx+32]
pshufd xmm4, xmm4, 0FFh
pmaddwd xmm4, [ebx+48]
paddd xmm1, xmm2
movdqa xmm2, xmm1
psubd xmm2, xmm0
psrad xmm2, 11
pshufd xmm2, xmm2, 01Bh
paddd xmm0, xmm1
psrad xmm0, 11
paddd xmm5, xmm6
packssdw xmm0, xmm2
paddd xmm4, xmm7
movdqa xmm6, xmm5
psubd xmm6, xmm4
psrad xmm6, 11
paddd xmm4, xmm5
psrad xmm4, 11
pshufd xmm6, xmm6, 01Bh
packssdw xmm4, xmm6
movdqa [esi+3*16], xmm0
movdqa [esi+1*16], xmm4
; row 5, row 7
movdqa xmm0, [esi+5*16]
movdqa xmm4, [esi+7*16]
pshuflw xmm0, xmm0, 0D8h
pshufhw xmm0, xmm0, 0D8h
pshufd xmm3, xmm0, 055h
pshufd xmm1, xmm0, 0
pshufd xmm2, xmm0, 0AAh
pshufd xmm0, xmm0, 0FFh
pmaddwd xmm1, [eax]
pmaddwd xmm2, [eax+16]
pmaddwd xmm3, [eax+32]
pmaddwd xmm0, [eax+48]
paddd xmm0, xmm3
pshuflw xmm4, xmm4, 0D8h
pshufhw xmm4, xmm4, 0D8h
movdqa xmm7, half11
paddd xmm1, xmm7
pshufd xmm6, xmm4, 0AAh
pshufd xmm5, xmm4, 0
pmaddwd xmm5, [ebx]
paddd xmm5, xmm7
pmaddwd xmm6, [ebx+16]
pshufd xmm7, xmm4, 055h
pmaddwd xmm7, [ebx+32]
pshufd xmm4, xmm4, 0FFh
pmaddwd xmm4, [ebx+48]
paddd xmm1, xmm2
movdqa xmm2, xmm1
psubd xmm2, xmm0
psrad xmm2, 11
pshufd xmm2, xmm2, 01Bh
paddd xmm0, xmm1
psrad xmm0, 11
paddd xmm5, xmm6
packssdw xmm0, xmm2
paddd xmm4, xmm7
movdqa xmm6, xmm5
psubd xmm6, xmm4
psrad xmm6, 11
paddd xmm4, xmm5
psrad xmm4, 11
pshufd xmm6, xmm6, 01Bh
packssdw xmm4, xmm6
; col 0-7
movdqa xmm6, xmm4
movdqa xmm2, xmm0
movdqa xmm3, [esi+3*16]
movdqa xmm1, tan_3
pmulhw xmm0, xmm1
movdqa xmm5, tan_1
pmulhw xmm1, xmm3
paddsw xmm1, xmm3
pmulhw xmm4, xmm5
movdqa xmm7, [esi+6*16]
pmulhw xmm5, [esi+1*16]
psubsw xmm5, xmm6
movdqa xmm6, xmm5
paddsw xmm4, [esi+1*16]
paddsw xmm0, xmm2
paddsw xmm0, xmm3
psubsw xmm2, xmm1
movdqa xmm1, xmm0
movdqa xmm3, tan_2
pmulhw xmm7, xmm3
pmulhw xmm3, [esi+2*16]
paddsw xmm0, xmm4
psubsw xmm4, xmm1
paddsw xmm0, one
movdqa [esi+7*16], xmm0
psubsw xmm5, xmm2
paddsw xmm5, one
paddsw xmm6, xmm2
movdqa [esi+3*16], xmm6
movdqa xmm1, xmm4
movdqa xmm0, cos_4
movdqa xmm2, xmm0
paddsw xmm4, xmm5
psubsw xmm1, xmm5
paddsw xmm7, [esi+2*16]
psubsw xmm3, [esi+6*16]
movdqa xmm6, [esi]
pmulhw xmm0, xmm1
movdqa xmm5, [esi+4*16]
paddsw xmm5, xmm6
psubsw xmm6, [esi+4*16]
pmulhw xmm2, xmm4
paddsw xmm4, xmm2
movdqa xmm2, xmm5
psubsw xmm2, xmm7
por xmm4, one
paddsw xmm0, xmm1
por xmm0, one
paddsw xmm5, xmm7
paddsw xmm5, half06
movdqa xmm1, xmm6
movdqa xmm7, [esi+7*16]
paddsw xmm7, xmm5
psraw xmm7, 6
movdqa [esi], xmm7
paddsw xmm6, xmm3
paddsw xmm6, half06
psubsw xmm1, xmm3
paddsw xmm1, half06a
movdqa xmm7, xmm1
movdqa xmm3, xmm6
paddsw xmm6, xmm4
paddsw xmm2, half06a
psraw xmm6, 6
movdqa [esi+1*16], xmm6
paddsw xmm1, xmm0
psraw xmm1, 6
movdqa [esi+2*16], xmm1
movdqa xmm1, [esi+3*16]
movdqa xmm6, xmm1
psubsw xmm7, xmm0
psraw xmm7, 6
movdqa [esi+5*16], xmm7
psubsw xmm5, [esi+7*16]
psraw xmm5, 6
movdqa [esi+7*16], xmm5
psubsw xmm3, xmm4
paddsw xmm6, xmm2
psubsw xmm2, xmm1
psraw xmm6, 6
movdqa [esi+3*16], xmm6
psraw xmm2, 6
movdqa [esi+4*16], xmm2
psraw xmm3, 6
movdqa [esi+6*16], xmm3
;-------------------------------------------------------------------
; 屻巒枛
pop ebx
pop eax
pop esi
ret 4
;-------------------------------------------------------------------
_idct_ap922_sse2@4 ENDP
;-------------------------------------------------------------------
; 廔椆
END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -