📄 2xsaimmx.inc
字号:
;Copyright (C) 1997-2001 ZSNES Team ( zsknight@zsnes.com / _demo_@zsnes.com )
;
;This program is free software; you can redistribute it and/or
;modify it under the terms of the GNU General Public License
;as published by the Free Software Foundation; either
;version 2 of the License, or (at your option) any later
;version.
;
;This program is distributed in the hope that it will be useful,
;but WITHOUT ANY WARRANTY; without even the implied warranty of
;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;GNU General Public License for more details.
;
;You should have received a copy of the GNU General Public License
;along with this program; if not, write to the Free Software
;Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
;/*---------------------------------------------------------------------*
; * The following (piece of) code, (part of) the 2xSaI engine, *
; * copyright (c) 1999 by Derek Liauw Kie Fa. *
; * Non-Commercial use of the engine is allowed and is encouraged, *
; * provided that appropriate credit be given and that this copyright *
; * notice will not be removed under any circumstance. *
; * You may freely modify this code, but I request *
; * that any improvements to the engine be submitted to me, so *
; * that I can implement these improvements in newer versions of *
; * the engine. *
; * If you need more information, have any comments or suggestions, *
; * you can e-mail me. My e-mail: derek-liauw@usa.net. *
; *---------------------------------------------------------------------*/
;----------------------
; 2xSaI version 0.59 WIP, soon to become version 0.60
;----------------------
BITS 32
%ifdef __DJGPP__
; GLOBAL __2xSaILine
; GLOBAL __2xSaISuperEagleLine
; GLOBAL _Init_2xSaIMMX
%else
GLOBAL _2xSaILine
GLOBAL _2xSaISuperEagleLine
GLOBAL Init_2xSaIMMX
%endif
SECTION .text ALIGN = 32
%ifdef __DJGPP__
;EXTERN_C void __2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
; uint8 *dstPtr, uint32 dstPitch, uint16 dstSegment);
%else
;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
; uint8 *dstPtr, uint32 dstPitch);
%endif
srcPtr equ 8
deltaPtr equ 12
srcPitch equ 16
width equ 20
dstOffset equ 24
dstPitch equ 28
dstSegment equ 32
colorB0 equ -2
colorB1 equ 0
colorB2 equ 2
colorB3 equ 4
color7 equ -2
color8 equ 0
color9 equ 2
color4 equ -2
color5 equ 0
color6 equ 2
colorS2 equ 4
color1 equ -2
color2 equ 0
color3 equ 2
colorS1 equ 4
colorA0 equ -2
colorA1 equ 0
colorA2 equ 2
colorA3 equ 4
NEWSYM _2xSaISuper2xSaILine
; Store some stuff
push ebp
mov ebp, esp
pushad
; Prepare the destination
%ifdef __DJGPP__
; Set the selector
mov eax, [ebp+dstSegment]
mov fs, ax
%endif
mov edx, [ebp+dstOffset] ; edx points to the screen
; Prepare the source
; eax points to colorA
mov eax, [ebp+srcPtr] ;eax points to colorA
mov ebx, [ebp+srcPitch] ;ebx contains the source pitch
mov ecx, [ebp+width] ;ecx contains the number of pixels to process
; eax now points to colorB1
sub eax, ebx ;eax points to B1 which is the base
; Main Loop
.Loop: push ecx
;-----Check Delta------------------
mov ecx, [ebp+deltaPtr]
;load source img
movq mm0, [eax+colorB0]
movq mm1, [eax+colorB3]
movq mm2, [eax+ebx+color4]
movq mm3, [eax+ebx+colorS2]
movq mm4, [eax+ebx+ebx+color1]
movq mm5, [eax+ebx+ebx+colorS1]
push eax
add eax, ebx
movq mm6, [eax+ebx+ebx+colorA0]
movq mm7, [eax+ebx+ebx+colorA3]
pop eax
;compare to delta
pcmpeqw mm0, [ecx+2+colorB0]
pcmpeqw mm1, [ecx+2+colorB3]
pcmpeqw mm2, [ecx+ebx+2+color4]
pcmpeqw mm3, [ecx+ebx+2+colorS2]
pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
add ecx, ebx
pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
sub ecx, ebx
;compose results
pand mm0, mm1
pand mm2, mm3
pand mm4, mm5
pand mm6, mm7
pand mm0, mm2
pand mm4, mm6
pxor mm7, mm7
pand mm0, mm4
movq mm6, [eax+colorB0]
pcmpeqw mm7, mm0 ;did any compare give us a zero ?
movq [ecx+2+colorB0], mm6
packsswb mm7, mm7
movd ecx, mm7
test ecx, ecx
jz near .SKIP_PROCESS ;no, so we can skip
;End Delta
;---------------------------------
movq mm0, [eax+ebx+color5]
movq mm1, [eax+ebx+color6]
movq mm2, mm0
movq mm3, mm1
movq mm4, mm0
movq mm5, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3 ;mm0 contains the interpolated values
movq [I56Pixel], mm0
movq mm7, mm0
;-------------------
movq mm0, mm7
movq mm1, mm4 ;5,5,5,6
movq mm2, mm0
movq mm3, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3 ;mm0 contains the interpolated values
movq [I5556Pixel], mm0
;--------------------
movq mm0, mm7
movq mm1, mm5 ;6,6,6,5
movq mm2, mm0
movq mm3, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3
movq [I5666Pixel], mm0
;-------------------------
;-------------------------
movq mm0, [eax+ebx+ebx+color2]
movq mm1, [eax+ebx+ebx+color3]
movq mm2, mm0
movq mm3, mm1
movq mm4, mm0
movq mm5, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3
movq [I23Pixel], mm0
movq mm7, mm0
;---------------------
movq mm0, mm7
movq mm1, mm4 ;2,2,2,3
movq mm2, mm0
movq mm3, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3
movq [I2223Pixel], mm0
;----------------------
movq mm0, mm7
movq mm1, mm5 ;3,3,3,2
movq mm2, mm0
movq mm3, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3
movq [I2333Pixel], mm0
;--------------------
;////////////////////////////////
; Decide which "branch" to take
;--------------------------------
movq mm0, [eax+ebx+color5]
movq mm1, [eax+ebx+color6]
movq mm6, mm0
movq mm7, mm1
pcmpeqw mm0, [eax+ebx+ebx+color3]
pcmpeqw mm1, [eax+ebx+ebx+color2]
pcmpeqw mm6, mm7
movq mm2, mm0
movq mm3, mm0
pand mm0, mm1 ;colorA == colorD && colorB == colorC
pxor mm7, mm7
pcmpeqw mm2, mm7
pand mm6, mm0
pand mm2, mm1 ;colorA != colorD && colorB == colorC
pcmpeqw mm1, mm7
pand mm1, mm3 ;colorA == colorD && colorB != colorC
pxor mm0, mm6
por mm1, mm6
movq mm7, mm0
movq [Mask26], mm2
packsswb mm7, mm7
movq [Mask35], mm1
movd ecx, mm7
test ecx, ecx
jz near .SKIP_GUESS
;---------------------------------------------
movq mm6, mm0
movq mm4, [eax+ebx+colorA]
movq mm5, [eax+ebx+colorB]
pxor mm7, mm7
pand mm6, [ONE]
movq mm0, [eax+colorE]
movq mm1, [eax+ebx+colorG]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
movq mm0, [eax+colorF]
movq mm1, [eax+ebx+colorK]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
push eax
add eax, ebx
movq mm0, [eax+ebx+colorH]
movq mm1, [eax+ebx+ebx+colorN]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
movq mm0, [eax+ebx+colorL]
movq mm1, [eax+ebx+ebx+colorO]
movq mm2, mm0
movq mm3, mm1
pcmpeqw mm0, mm4
pcmpeqw mm1, mm4
pcmpeqw mm2, mm5
pcmpeqw mm3, mm5
pand mm0, mm6
pand mm1, mm6
pand mm2, mm6
pand mm3, mm6
paddw mm0, mm1
paddw mm2, mm3
pxor mm3, mm3
pcmpgtw mm0, mm6
pcmpgtw mm2, mm6
pcmpeqw mm0, mm3
pcmpeqw mm2, mm3
pand mm0, mm6
pand mm2, mm6
paddw mm7, mm0
psubw mm7, mm2
pop eax
movq mm1, mm7
pxor mm0, mm0
pcmpgtw mm7, mm0
pcmpgtw mm0, mm1
por mm7, [Mask35]
por mm0, [Mask26]
movq [Mask35], mm7
movq [Mask26], mm0
.SKIP_GUESS:
;Start the ASSEMBLY !!! eh... compose all the results together to form the final image...
movq mm0, [eax+ebx+color5]
movq mm1, [eax+ebx+ebx+color2]
movq mm2, mm0
movq mm3, mm1
movq mm4, mm0
movq mm5, mm1
pand mm0, [colorMask]
pand mm1, [colorMask]
psrlw mm0, 1
psrlw mm1, 1
pand mm3, [lowPixelMask]
paddw mm0, mm1
pand mm3, mm2
paddw mm0, mm3 ;mm0 contains the interpolated values
;---------------------------
%ifdef dfhsdfhsdahdsfhdsfh
if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2)
product2a = INTERPOLATE (color2, color5);
else
if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0)
product2a = INTERPOLATE(color2, color5);
else
product2a = color2;
if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2)
product1a = INTERPOLATE (color2, color5);
else
if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0)
product1a = INTERPOLATE(color2, color5);
else
product1a = color5;
%endif
movq mm7, [Mask26]
movq mm6, [eax+colorB2]
movq mm5, [eax+ebx+ebx+color2]
movq mm4, [eax+ebx+ebx+color1]
pcmpeqw mm4, mm5
pcmpeqw mm6, mm5
pxor mm5, mm5
pand mm7, mm4
pcmpeqw mm6, mm5
pand mm7, mm6
movq mm6, [eax+ebx+ebx+color3]
movq mm5, [eax+ebx+ebx+color2]
movq mm4, [eax+ebx+ebx+color1]
movq mm2, [eax+ebx+color5]
movq mm1, [eax+ebx+color4]
movq mm3, [eax+colorB0]
pcmpeqw mm2, mm4
pcmpeqw mm6, mm5
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -