📄 mmxintp.asm
字号:
.nolist
INCLUDE iammx.inc ; IAMMX Emulator Macros
.list
.586p
.model FLAT
.data
ALIGN 8
MaskHiFF DQ 0FF00000000000000h
Mask01 DQ 0101010101010101h
MaskFE DQ 0FEFEFEFEFEFEFEFEh
Mask03 DQ 0303030303030303h
MaskFC DQ 0FCFCFCFCFCFCFCFCh
BYTE02 DQ 0202020202020202h
.const
.code
COMMENT ^
void MMXInterColorP (
BYTE *Src,
BYTE *Dst,
int Pixel,
int Lines,
int ImgSize
);
^
MMXInterColorP PROC NEAR C USES ESI EDI EAX EBX ECX EDX,
Src:PTR BYTE,Dst:PTR BYTE,
Pixel:DWORD, Lines:DWORD, ImgSize:DWORD
;ImgSize is neg val of ImgSize
mov esi, Src
mov edi, Dst
mov eax, Pixel
mov ebx, ImgSize
mov edx, Lines
;The following is to process last line
mov ecx, eax
shr ecx, 3
movq mm7, [esi]
movq mm5,dword ptr MaskFE
;mm7--P7 0 0 0 0 0 0 0,store the last pixel for next loop
Column:
movq mm0, [esi]
psllq mm7, 56
sub esi, 8
movq mm1, mm0
psrlq mm0, 8
movq mm2, mm1
por mm0, mm7
movq mm7, mm1
;mm0--P7 P7 P6 P5 P4 P3 P2 P1
;mm1--P7 P6 P5 P4 P3 P2 P1 P0
;claculate (m0+m1)/2 and store to Y1 and Y3
;leave P0(that is P8 for the next 8 pixels) in m7 for the next loop
movq [edi+ebx],mm1 ;store to Y2
pand mm2, mm0
pand mm1, mm5
pand mm0, mm5
pand mm2,dword ptr Mask01
psrlq mm0, 1
psrlq mm1, 1
paddusb mm0, mm2
paddusb mm0, mm1
movq [edi], mm0 ;store to Y3
movq [edi+2*ebx],mm0 ;store to Y1
sub edi, 8
dec ecx
jnz Column
;the last line is calculated, then process the other lines for Y1-Y3
dec edx
LineLoop:
mov ecx, eax
shr ecx, 3
movq mm7, [esi]
movq mm6, [esi+eax]
;leave m6 and m7 for the next loop
;mm6--P7 0 0 0 0 0 0 0 of next line
;mm7--P7 0 0 0 0 0 0 0 of current line
Column2:
movq mm0, [esi]
psllq mm7, 56
movq mm1, mm0
psllq mm6, 56
movq mm3, mm1
psrlq mm0, 8
por mm0, mm7
movq mm7, mm1
movq mm4, mm0 ;save for calucate Y2 and Y3
pand mm7, mm0
;the following calculate m0=(m0+m1)/2 and store m0 to Y1
pand mm0,dword ptr MaskFE
movq mm2, mm3
pand mm1,dword ptr MaskFE
psrlq mm0, 1
pand mm7,dword ptr Mask01
psrlq mm1, 1
movq mm5, [esi+eax]
paddusb mm0, mm7
paddusb mm0, mm1
movq mm7, mm5; mm5 and mm1 is the same
;Y1 is calculated,the following is calculate
;m1=(m1+m2)/2 and store m1 to Y2
movq mm1, mm5
sub esi, 8
pand mm1,dword ptr MaskFE
pand mm7, mm2
pand mm7,dword ptr Mask01
psrlq mm1, 1
pand mm2,dword ptr MaskFE
paddusb mm1, mm7
movq [edi+2*ebx],mm0
psrlq mm2, 1
paddusb mm1, mm2
movq mm0, mm4
;Y2 is calculated, the following is calculate
;m2=(m2+m3+m4+m5+2)/4 and store m2 to Y3
pand mm0,dword ptr Mask03
movq mm2, mm5
pand mm4,dword ptr MaskFC
psrlq mm2, 8
movq [edi+ebx],mm1
por mm2, mm6
movq mm1,dword ptr BYTE02
psrlq mm4, 2
;mm2--P7 P7 P6 P5 P4 P3 P2 P1 of next line
;mm3--P7 P6 P5 P4 P3 P2 P1 P0 of current line
;mm4--P7 P7 P6 P5 P4 P3 P2 P1 of current line
;mm5--P7 P6 P5 P4 P3 P2 P1 P0 of next line
;process mm4
;store mm7 for the next loop
movq mm7, mm3
paddusb mm1, mm0
;process mm2
movq mm0, mm2
pand mm0,dword ptr Mask03
movq mm6, mm5
pand mm2,dword ptr MaskFC
paddusb mm1, mm0
psrlq mm2, 2
movq mm0, mm3
;process mm3
pand mm0,dword ptr Mask03
paddusb mm2, mm4
pand mm3,dword ptr MaskFC
paddusb mm1, mm0
psrlq mm3, 2
movq mm0, mm5
;store mm6 for the next loop
;process mm5
pand mm0,dword ptr Mask03
paddusb mm2, mm3
pand mm5,dword ptr MaskFC
paddusb mm1, mm0
pand mm1,dword ptr MaskFC
psrlq mm5, 2
;add four register
psrlq mm1, 2
paddusb mm2, mm5
paddusb mm2, mm1
movq [edi], mm2 ;store to Y3
sub edi, 8
dec ecx
jnz Column2
dec edx
jnz LineLoop
emms
ret
MMXInterColorP ENDP
COMMENT ^
void MMXInterLumP (
BYTE *Src,
BYTE *Dst,
int Pixel,
int Lines,
int ImgSize
);
^
MMXInterLumP PROC NEAR C USES ESI EDI EAX EBX ECX EDX,
Src:PTR BYTE,Dst:PTR BYTE,
Pixel:DWORD, Lines:DWORD, ImgSize:DWORD
;ImgSize is neg val of ImgSize
mov esi, Src
mov edi, Dst
mov eax, Pixel
mov ebx, ImgSize
mov edx, Lines
;The following is to process last line
mov ecx, eax
shr ecx, 4
movq mm7, [esi+8]
movq mm5,dword ptr MaskFE
;mm7--P7 0 0 0 0 0 0 0,store the last pixel for next loop
Column:
movq mm0, [esi+8]
psllq mm7, 56
add edi, 8
movq mm1, mm0
psrlq mm0, 8
movq mm2, mm1
por mm0, mm7
movq mm7, mm1
;mm0--P7 P7 P6 P5 P4 P3 P2 P1
;mm1--P7 P6 P5 P4 P3 P2 P1 P0
;claculate (m0+m1)/2 and store to Y1 and Y3
;leave P0(that is P8 for the next 8 pixels) in m7 for the next loop
movq [edi+ebx],mm1 ;store to Y2
pand mm2, mm0
pand mm1, mm5
pand mm0, mm5
pand mm2,dword ptr Mask01
psrlq mm0, 1
psrlq mm1, 1
paddusb mm0, mm2
movq mm3, [esi]
paddusb mm0, mm1
psllq mm7, 56
movq mm4, mm3
sub esi, 16
psrlq mm3, 8
movq [edi], mm0 ;store to Y3
movq mm6, mm4
movq [edi+2*ebx],mm0 ;store to Y1
por mm3, mm7
sub edi, 8
movq mm7, mm4
;mm0--P7 P7 P6 P5 P4 P3 P2 P1
;mm1--P7 P6 P5 P4 P3 P2 P1 P0
;claculate (m0+m1)/2 and store to Y1 and Y3
;leave P0(that is P8 for the next 8 pixels) in m7 for the next loop
movq [edi+ebx],mm4 ;store to Y2
pand mm6, mm3
pand mm4, mm5
pand mm3, mm5
pand mm6,dword ptr Mask01
psrlq mm3, 1
psrlq mm4, 1
paddusb mm3, mm6
paddusb mm3, mm4
movq [edi], mm3 ;store to Y3
movq [edi+2*ebx],mm3 ;store to Y1
sub edi, 16
dec ecx
jnz Column
;the last line is calculated, then process the other lines for Y1-Y3
add esi, 8
add edi, 8
dec edx
LineLoop:
mov ecx, eax
shr ecx, 4
movq mm7, [esi]
movq mm6, [esi+eax]
;leave m6 and m7 for the next loop
;mm6--P7 0 0 0 0 0 0 0 of next line
;mm7--P7 0 0 0 0 0 0 0 of current line
Column2:
movq mm0, [esi]
psllq mm7, 56
psllq mm6, 56
movq mm1, mm0
movq mm3, mm1
psrlq mm0, 8
por mm0, mm7
movq mm7, mm1
movq mm4, mm0 ;save for calucate Y2 and Y3
pand mm7, mm0
;the following calculate m0=(m0+m1)/2 and store m0 to Y1
pand mm0,dword ptr MaskFE
movq mm2, mm3
pand mm1,dword ptr MaskFE
psrlq mm0, 1
pand mm7,dword ptr Mask01
psrlq mm1, 1
movq mm5, [esi+eax]
paddusb mm0, mm7
paddusb mm0, mm1
movq mm7, mm5; mm5 and mm1 is the same
;Y1 is calculated,the following is calculate
;m1=(m1+m2)/2 and store m1 to Y2
movq mm1, mm5
sub esi, 8
pand mm1,dword ptr MaskFE
pand mm7, mm2
pand mm7,dword ptr Mask01
psrlq mm1, 1
pand mm2,dword ptr MaskFE
paddusb mm1, mm7
movq [edi+2*ebx],mm0
psrlq mm2, 1
paddusb mm1, mm2
movq mm0, mm4
;Y2 is calculated, the following is calculate
;m2=(m2+m3+m4+m5+2)/4 and store m2 to Y3
pand mm0,dword ptr Mask03
movq mm2, mm5
pand mm4,dword ptr MaskFC
psrlq mm2, 8
movq [edi+ebx],mm1
por mm2, mm6
movq mm1,dword ptr BYTE02
psrlq mm4, 2
;mm2--P7 P7 P6 P5 P4 P3 P2 P1 of next line
;mm3--P7 P6 P5 P4 P3 P2 P1 P0 of current line
;mm4--P7 P7 P6 P5 P4 P3 P2 P1 of current line
;mm5--P7 P6 P5 P4 P3 P2 P1 P0 of next line
;process mm4
;store mm7 for the next loop
movq mm7, mm3
paddusb mm1, mm0
;process mm2
movq mm0, mm2
pand mm0,dword ptr Mask03
movq mm6, mm5
pand mm2,dword ptr MaskFC
paddusb mm1, mm0
psrlq mm2, 2
movq mm0, mm3
;process mm3
pand mm0,dword ptr Mask03
paddusb mm2, mm4
pand mm3,dword ptr MaskFC
paddusb mm1, mm0
psrlq mm3, 2
movq mm0, mm5
;store mm6 for the next loop
;process mm5
pand mm0,dword ptr Mask03
paddusb mm2, mm3
pand mm5,dword ptr MaskFC
paddusb mm1, mm0
pand mm1,dword ptr MaskFC
psrlq mm5, 2
;add four register
psrlq mm1, 2
paddusb mm2, mm5
movq mm0, [esi]
paddusb mm2, mm1
psllq mm7, 56
movq mm1, mm0
movq mm3, mm1
psrlq mm0, 8
movq [edi], mm2 ;store to Y3
psllq mm6, 56
sub edi, 8
por mm0, mm7
movq mm7, mm1
movq mm4, mm0 ;save for calucate Y2 and Y3
pand mm7, mm0
;the following calculate m0=(m0+m1)/2 and store m0 to Y1
pand mm0,dword ptr MaskFE
movq mm2, mm3
pand mm1,dword ptr MaskFE
psrlq mm0, 1
pand mm7,dword ptr Mask01
psrlq mm1, 1
movq mm5, [esi+eax]
paddusb mm0, mm7
paddusb mm0, mm1
movq mm7, mm5; mm5 and mm1 is the same
;Y1 is calculated,the following is calculate
;m1=(m1+m2)/2 and store m1 to Y2
movq mm1, mm5
sub esi, 8
pand mm1,dword ptr MaskFE
pand mm7, mm2
pand mm7,dword ptr Mask01
psrlq mm1, 1
pand mm2,dword ptr MaskFE
paddusb mm1, mm7
movq [edi+2*ebx],mm0
psrlq mm2, 1
paddusb mm1, mm2
movq mm0, mm4
;Y2 is calculated, the following is calculate
;m2=(m2+m3+m4+m5+2)/4 and store m2 to Y3
pand mm0,dword ptr Mask03
movq mm2, mm5
pand mm4,dword ptr MaskFC
psrlq mm2, 8
movq [edi+ebx],mm1
por mm2, mm6
movq mm1,dword ptr BYTE02
psrlq mm4, 2
;mm2--P7 P7 P6 P5 P4 P3 P2 P1 of next line
;mm3--P7 P6 P5 P4 P3 P2 P1 P0 of current line
;mm4--P7 P7 P6 P5 P4 P3 P2 P1 of current line
;mm5--P7 P6 P5 P4 P3 P2 P1 P0 of next line
;process mm4
;store mm7 for the next loop
movq mm7, mm3
paddusb mm1, mm0
;process mm2
movq mm0, mm2
pand mm0,dword ptr Mask03
movq mm6, mm5
pand mm2,dword ptr MaskFC
paddusb mm1, mm0
psrlq mm2, 2
movq mm0, mm3
;process mm3
pand mm0,dword ptr Mask03
paddusb mm2, mm4
pand mm3,dword ptr MaskFC
paddusb mm1, mm0
psrlq mm3, 2
movq mm0, mm5
;store mm6 for the next loop
;process mm5
pand mm0,dword ptr Mask03
paddusb mm2, mm3
pand mm5,dword ptr MaskFC
paddusb mm1, mm0
pand mm1,dword ptr MaskFC
psrlq mm5, 2
;add four register
psrlq mm1, 2
paddusb mm2, mm5
paddusb mm2, mm1
movq [edi], mm2 ;store to Y3
sub edi, 8
dec ecx
jnz Column2
dec edx
jnz LineLoop
emms
ret
MMXInterLumP ENDP
END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -