📄 vectdct.asm
字号:
.nolist
INCLUDE iammx.inc ; IAMMX Emulator Macros
.list
.586p
.model FLAT
.data
.const
.code
COMMENT ^
void MatrixMulti(
short *src1,
short *src2,
short *Result);
^
MatrixMulti PROC NEAR C USES ESI,
src1:PTR SWORD,src2:PTR SWORD,
Result:PTR SWORD
mov esi, src1
push edx
push edi
mov edi, src2
movq mm0, [esi]
movq mm1, [esi+8]
movq mm6, mm0
pmaddwd mm0, [edi]
movq mm7, mm1
pmaddwd mm1, [edi+8]
movq mm3, mm6
pmaddwd mm3, [edi+16]
movq mm4, mm7
pmaddwd mm4, [edi+24]
mov edx, Result
paddd mm0, mm1
movq mm2, mm0
movq mm1, mm7
pmaddwd mm1, [edi+40]
psrlq mm2, 32
paddd mm2, mm0
movq mm0, mm6
pmaddwd mm0, [edi+32]
paddd mm3, mm4
movq mm4, mm7
movq mm5, mm3
pmaddwd mm4, [edi+56]
psrlq mm5, 32
paddd mm0, mm1
paddd mm3, mm5
punpckldq mm2,mm3
movq mm3, mm6
pmaddwd mm3, [edi+48]
psrad mm2, 15
;mm2--Result1 Result0
movq mm5, mm0
psrlq mm0, 32
paddd mm5, mm0
paddd mm3, mm4
movq mm1, mm3
movq mm0, mm6
pmaddwd mm0, [edi+64]
psrlq mm1, 32
paddd mm3, mm1
movq mm1, mm7
pmaddwd mm1, [edi+72]
punpckldq mm5, mm3
movq mm3, mm6
psrad mm5, 15
;mm5--Result3 Result2
packssdw mm2,mm5
movq mm4, mm7
pmaddwd mm3, [edi+80]
paddd mm0, mm1
movq [edx], mm2
movq mm1, mm7
pmaddwd mm4, [edi+88]
movq mm2, mm0
pmaddwd mm1, [edi+104]
psrlq mm2, 32
paddd mm2, mm0
movq mm0, mm6
pmaddwd mm0, [edi+96]
paddd mm3, mm4
movq mm4, mm7
movq mm5, mm3
pmaddwd mm4, [edi+120]
psrlq mm5, 32
paddd mm3, mm5
paddd mm0, mm1
punpckldq mm2,mm3
movq mm3, mm6
pmaddwd mm3, [edi+112]
psrad mm2, 15
;mm2--Result1 Result0
movq mm5, mm0
psrlq mm0, 32
paddd mm5, mm0
movq mm0, [esi+16]
paddd mm3, mm4
movq mm1, mm3
psrlq mm3, 32
movq mm6, mm0
paddd mm3, mm1
pmaddwd mm0, [edi]
punpckldq mm5, mm3
movq mm1, [esi+24]
psrad mm5, 15
movq mm7, mm1
packssdw mm2, mm5
;mm5--Result3 Result2
pmaddwd mm1, [edi+8]
movq mm3, mm6
movq [edx+8], mm2
movq mm4, mm7
pmaddwd mm3, [edi+16]
pmaddwd mm4, [edi+24]
paddd mm0, mm1
movq mm2, mm0
movq mm1, mm7
pmaddwd mm1, [edi+40]
psrlq mm2, 32
paddd mm2, mm0
movq mm0, mm6
pmaddwd mm0, [edi+32]
paddd mm3, mm4
movq mm4, mm7
movq mm5, mm3
pmaddwd mm4, [edi+56]
psrlq mm5, 32
paddd mm0, mm1
paddd mm3, mm5
punpckldq mm2, mm3
movq mm3, mm6
pmaddwd mm3, [edi+48]
psrad mm2, 15
;mm2--Result1 Result0
movq mm5, mm0
psrlq mm0, 32
paddd mm5, mm0
movq mm0, mm6
pmaddwd mm0, [edi+64]
paddd mm3, mm4
movq mm1, mm3
psrlq mm3, 32
paddd mm3, mm1
movq mm1, mm7
pmaddwd mm1, [edi+72]
punpckldq mm5, mm3
movq mm3, mm6
psrad mm5, 15
;mm5--Result3 Result2
packssdw mm2,mm5
movq mm4, mm7
pmaddwd mm3, [edi+80]
paddd mm0, mm1
movq [edx+16],mm2
movq mm1, mm7
pmaddwd mm4, [edi+88]
movq mm2, mm0
pmaddwd mm1, [edi+104]
psrlq mm2, 32
paddd mm2, mm0
movq mm0, mm6
pmaddwd mm0, [edi+96]
paddd mm3, mm4
movq mm5, mm3
movq mm4, mm7
pmaddwd mm4, [edi+120]
psrlq mm5, 32
paddd mm3, mm5
paddd mm0, mm1
punpckldq mm2,mm3
movq mm3, mm6
pmaddwd mm3, [edi+112]
psrad mm2, 15
;mm2--Result1 Result0
movq mm5, mm0
psrlq mm0, 32
paddd mm5, mm0
movq mm0, [esi+32]
paddd mm3, mm4
movq mm6, mm0
movq mm1, mm3
pmaddwd mm0, [edi]
psrlq mm1, 32
paddd mm3, mm1
movq mm1, [esi+40]
punpckldq mm5, mm3
movq mm7, mm1
psrad mm5, 15
;mm5--Result3 Result2
pmaddwd mm1, [edi+8]
packssdw mm2, mm5
movq mm3, mm6
movq mm4, mm7
movq [edx+24], mm2
pmaddwd mm3, [edi+16]
paddd mm0, mm1
pmaddwd mm4, [edi+24]
movq mm1, mm7
pmaddwd mm1, [edi+40]
movq mm2, mm0
psrlq mm2, 32
paddd mm2, mm0
movq mm0, mm6
pmaddwd mm0, [edi+32]
paddd mm3, mm4
movq mm4, mm7
movq mm5, mm3
pmaddwd mm4, [edi+56]
psrlq mm5, 32
paddd mm0, mm1
paddd mm3, mm5
punpckldq mm2, mm3
movq mm3, mm6
pmaddwd mm3, [edi+48]
psrad mm2, 15
;mm2--Result1 Result0
movq mm5, mm0
psrlq mm0, 32
paddd mm5, mm0
movq mm0, mm6
pmaddwd mm0, [edi+64]
paddd mm3, mm4
movq mm1, mm3
psrlq mm3, 32
paddd mm3, mm1
movq mm1, mm7
pmaddwd mm1, [edi+72]
punpckldq mm5,mm3
movq mm3, mm6
psrad mm5, 15
;mm5--Result3 Result2
packssdw mm2,mm5
movq mm4, mm7
pmaddwd mm3, [edi+80]
paddd mm0, mm1
movq [edx+32], mm2
movq mm1, mm7
pmaddwd mm4, [edi+88]
movq mm2, mm0
pmaddwd mm1, [edi+104]
psrlq mm2, 32
paddd mm2, mm0
movq mm0, mm6
pmaddwd mm0, [edi+96]
paddd mm3, mm4
movq mm5, mm3
movq mm4, mm7
pmaddwd mm4, [edi+120]
psrlq mm5, 32
paddd mm3, mm5
paddd mm0, mm1
punpckldq mm2, mm3
movq mm3, mm6
pmaddwd mm3, [edi+112]
psrad mm2, 15
;mm2--Result1 Result0
movq mm5, mm0
psrlq mm0, 32
paddd mm5, mm0
movq mm0, [esi+48]
paddd mm3, mm4
movq mm6, mm0
movq mm1, mm3
pmaddwd mm0, [edi]
psrlq mm1, 32
paddd mm3, mm1
movq mm1, [esi+56]
punpckldq mm5, mm3
movq mm7, mm1
psrad mm5, 15
;mm5--Result3 Result2
pmaddwd mm1, [edi+8]
packssdw mm2, mm5
movq mm3, mm6
movq mm4, mm7
movq [edx+40], mm2
pmaddwd mm3, [edi+16]
paddd mm0, mm1
pmaddwd mm4, [edi+24]
movq mm2, mm0
movq mm1, mm7
psrlq mm2, 32
pmaddwd mm1, [edi+40]
paddd mm2, mm0
movq mm0, mm6
paddd mm3, mm4
pmaddwd mm0, [edi+32]
movq mm4, mm7
pmaddwd mm4, [edi+56]
movq mm5, mm3
psrlq mm5, 32
paddd mm0, mm1
paddd mm5, mm3
movq mm3, mm6
pmaddwd mm3, [edi+48]
punpckldq mm2, mm5
movq mm5, mm0
psrlq mm0, 32
;mm2--Result1 Result0
psrad mm2, 15
paddd mm5, mm0
paddd mm3, mm4
movq mm0, mm6
movq mm1, mm3
psrlq mm3, 32
pmaddwd mm0, [edi+64]
paddd mm3, mm1
punpckldq mm5, mm3
movq mm1, mm7
pmaddwd mm1, [edi+72]
psrad mm5, 15
;mm5--Result3 Result2
packssdw mm2, mm5
movq mm3, mm6
pmaddwd mm3, [edi+80]
movq mm4, mm7
movq [edx+48], mm2
paddd mm0, mm1
pmaddwd mm4, [edi+88]
movq mm2, mm0
movq mm1, mm7
psrlq mm2, 32
pmaddwd mm1, [edi+104]
paddd mm2, mm0
movq mm0, mm6
paddd mm3, mm4
pmaddwd mm0, [edi+96]
movq mm5, mm3
movq mm4, mm7
psrlq mm5, 32
pmaddwd mm4, [edi+120]
paddd mm3, mm5
punpckldq mm2, mm3
movq mm3, mm6
pmaddwd mm3, [edi+112]
paddd mm0, mm1
movq mm5, mm0
psrlq mm0, 32
;mm2--Result1 Result0
psrad mm2, 15
paddd mm5, mm0
movq mm0, [esi+64]
paddd mm3, mm4
movq mm6, mm0
movq mm1, mm3
pmaddwd mm0, [edi]
psrlq mm1, 32
paddd mm3, mm1
movq mm1, [esi+72]
punpckldq mm5, mm3
movq mm7, mm1
psrad mm5, 15
;mm5--Result3 Result2
pmaddwd mm1, [edi+8]
packssdw mm2, mm5
movq mm3, mm6
movq mm4, mm7
movq [edx+56], mm2
pmaddwd mm3, [edi+16]
paddd mm0, mm1
pmaddwd mm4, [edi+24]
movq mm1, mm7
pmaddwd mm1, [edi+40]
movq mm2, mm0
psrlq mm2, 32
paddd mm2, mm0
movq mm0, mm6
pmaddwd mm0, [edi+32]
paddd mm3, mm4
movq mm4, mm7
movq mm5, mm3
pmaddwd mm4, [edi+56]
psrlq mm5, 32
paddd mm0, mm1
paddd mm3, mm5
punpckldq mm2, mm3
movq mm3, mm6
pmaddwd mm3, [edi+48]
psrad mm2, 15
;mm2--Result1 Result0
movq mm5, mm0
psrlq mm0, 32
paddd mm5, mm0
movq mm0, mm6
pmaddwd mm0, [edi+64]
paddd mm3, mm4
movq mm1, mm3
psrlq mm3, 32
paddd mm3, mm1
movq mm1, mm7
pmaddwd mm1, [edi+72]
punpckldq mm5, mm3
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -