hq2x32.asm

来自「linux下的任天堂模拟器代码。供大家参考。」· 汇编 代码 · 共 2,134 行 · 第 1/3 页

ASM
2,134
字号
;Copyright (C) 1997-2007 ZSNES Team ( zsKnight, _Demo_, pagefault, Nach );;http://www.zsnes.com;http://sourceforge.net/projects/zsnes;https://zsnes.bountysource.com;;This program is free software; you can redistribute it and/or;modify it under the terms of the GNU General Public License;version 2 as published by the Free Software Foundation.;;This program is distributed in the hope that it will be useful,;but WITHOUT ANY WARRANTY; without even the implied warranty of;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the;GNU General Public License for more details.;;You should have received a copy of the GNU General Public License;along with this program; if not, write to the Free Software;Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.;----------------------------------------------------------;hq2x filter;Copyright (C) 2003 MaxSt ( maxst@hiend3d.com );----------------------------------------------------------%include "macros.mac"EXTSYM vidbuffer,curblank,MMXSupport,GUIOn,GUIOn2,vidbufferofsb,FilteredGUIEXTSYM resolutn,lineleft,cfield,hirestiledat,newengen,SpecialLine,hqFilterEXTSYM AddEndBytes,NumBytesPerLine,WinVidMemStart,BitConv32Ptr,RGBtoYUVPtrEXTSYM prevline,nextline,deltaptr,xcounter,w1,w2,w3,w4,w5,w6,w7,w8,w9EXTSYM reg_blank,const3,const5,const6,const14,cross,thresholdSECTION .bssNEWSYM c1, resd 1NEWSYM c2, resd 1NEWSYM c3, resd 1NEWSYM c4, resd 1NEWSYM c5, resd 1NEWSYM c6, resd 1NEWSYM c7, resd 1NEWSYM c8, resd 1NEWSYM c9, resd 1SECTION .text%macro TestDiff 2    xor     ecx,ecx    mov     edx,[%1]    cmp     edx,[%2]    je      %%fin    mov     ecx,[RGBtoYUVPtr]    movd    mm1,[ecx+edx*4]    movq    mm5,mm1    mov     edx,[%2]    movd    mm2,[ecx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    ecx,mm1%%fin:%endmacro%macro DiffOrNot 4   TestDiff %1,%2   test ecx,ecx   jz   %%same   %3   jmp %%fin%%same:   %4%%fin%endmacro%macro DiffOrNot 6   TestDiff %1,%2   test ecx,ecx   jz   %%same   %3   %4   jmp %%fin%%same:   %5   %6%%fin%endmacro%macro DiffOrNot 8   TestDiff %1,%2   test ecx,ecx   jz   %%same   %3   %4   %5   jmp %%fin%%same:   %6   %7   %8%%fin%endmacro%macro DiffOrNot 10   TestDiff %1,%2   test ecx,ecx   jz %%same   %3   %4   %5   %6   jmp %%fin%%same:   %7   %8   %9   %10%%fin%endmacro%macro Interp1 3    mov edx,%2    shl edx,2    add edx,%3    sub edx,%2    shr edx,2    mov %1,edx%endmacro%macro Interp2 4    mov edx,%2    shl edx,1    add edx,%3    add edx,%4    shr edx,2    mov %1,edx%endmacro%macro Interp5 3    mov edx,%2    add edx,%3    shr edx,1    mov %1,edx%endmacro%macro Interp6 3    movd       mm1, eax    movd       mm2, %2    movd       mm3, %3    punpcklbw  mm1, [reg_blank]    punpcklbw  mm2, [reg_blank]    punpcklbw  mm3, [reg_blank]    pmullw     mm1, [const5]    psllw      mm2, 1    paddw      mm1, mm3    paddw      mm1, mm2    psrlw      mm1, 3    packuswb   mm1, [reg_blank]    movd       %1, mm1%endmacro%macro Interp7 3    movd       mm1, eax    movd       mm2, %2    movd       mm3, %3    punpcklbw  mm1, [reg_blank]    punpcklbw  mm2, [reg_blank]    punpcklbw  mm3, [reg_blank]    pmullw     mm1, [const6]    paddw      mm2, mm3    paddw      mm1, mm2    psrlw      mm1, 3    packuswb   mm1, [reg_blank]    movd       %1, mm1%endmacro%macro Interp9 3    movd       mm1, eax    movd       mm2, %2    movd       mm3, %3    punpcklbw  mm1, [reg_blank]    punpcklbw  mm2, [reg_blank]    punpcklbw  mm3, [reg_blank]    psllw      mm1, 1    paddw      mm2, mm3    pmullw     mm2, [const3]    paddw      mm1, mm2    psrlw      mm1, 3    packuswb   mm1, [reg_blank]    movd       %1, mm1%endmacro%macro Interp10 3    movd       mm1, eax    movd       mm2, %2    movd       mm3, %3    punpcklbw  mm1, [reg_blank]    punpcklbw  mm2, [reg_blank]    punpcklbw  mm3, [reg_blank]    pmullw     mm1, [const14]    paddw      mm2, mm3    paddw      mm1, mm2    psrlw      mm1, 4    packuswb   mm1, [reg_blank]    movd       %1, mm1%endmacro%macro PIXEL00_0 0    mov [edi],eax%endmacro%macro PIXEL00_10 0    Interp1 [edi],eax,[c1]%endmacro%macro PIXEL00_11 0    Interp1 [edi],eax,[c4]%endmacro%macro PIXEL00_12 0    Interp1 [edi],eax,[c2]%endmacro%macro PIXEL00_20 0    Interp2 [edi],eax,[c4],[c2]%endmacro%macro PIXEL00_21 0    Interp2 [edi],eax,[c1],[c2]%endmacro%macro PIXEL00_22 0    Interp2 [edi],eax,[c1],[c4]%endmacro%macro PIXEL00_60 0    Interp6 [edi],[c2],[c4]%endmacro%macro PIXEL00_61 0    Interp6 [edi],[c4],[c2]%endmacro%macro PIXEL00_70 0    Interp7 [edi],[c4],[c2]%endmacro%macro PIXEL00_90 0    Interp9 [edi],[c4],[c2]%endmacro%macro PIXEL00_100 0    Interp10 [edi],[c4],[c2]%endmacro%macro PIXEL01_0 0    mov [edi+4],eax%endmacro%macro PIXEL01_10 0    Interp1 [edi+4],eax,[c3]%endmacro%macro PIXEL01_11 0    Interp1 [edi+4],eax,[c2]%endmacro%macro PIXEL01_12 0    Interp1 [edi+4],eax,[c6]%endmacro%macro PIXEL01_20 0    Interp2 [edi+4],eax,[c2],[c6]%endmacro%macro PIXEL01_21 0    Interp2 [edi+4],eax,[c3],[c6]%endmacro%macro PIXEL01_22 0    Interp2 [edi+4],eax,[c3],[c2]%endmacro%macro PIXEL01_60 0    Interp6 [edi+4],[c6],[c2]%endmacro%macro PIXEL01_61 0    Interp6 [edi+4],[c2],[c6]%endmacro%macro PIXEL01_70 0    Interp7 [edi+4],[c2],[c6]%endmacro%macro PIXEL01_90 0    Interp9 [edi+4],[c2],[c6]%endmacro%macro PIXEL01_100 0    Interp10 [edi+4],[c2],[c6]%endmacro%macro PIXEL10_0 0    mov [edi+ebx],eax%endmacro%macro PIXEL10_10 0    Interp1 [edi+ebx],eax,[c7]%endmacro%macro PIXEL10_11 0    Interp1 [edi+ebx],eax,[c8]%endmacro%macro PIXEL10_12 0    Interp1 [edi+ebx],eax,[c4]%endmacro%macro PIXEL10_20 0    Interp2 [edi+ebx],eax,[c8],[c4]%endmacro%macro PIXEL10_21 0    Interp2 [edi+ebx],eax,[c7],[c4]%endmacro%macro PIXEL10_22 0    Interp2 [edi+ebx],eax,[c7],[c8]%endmacro%macro PIXEL10_60 0    Interp6 [edi+ebx],[c4],[c8]%endmacro%macro PIXEL10_61 0    Interp6 [edi+ebx],[c8],[c4]%endmacro%macro PIXEL10_70 0    Interp7 [edi+ebx],[c8],[c4]%endmacro%macro PIXEL10_90 0    Interp9 [edi+ebx],[c8],[c4]%endmacro%macro PIXEL10_100 0    Interp10 [edi+ebx],[c8],[c4]%endmacro%macro PIXEL11_0 0    mov [edi+ebx+4],eax%endmacro%macro PIXEL11_10 0    Interp1 [edi+ebx+4],eax,[c9]%endmacro%macro PIXEL11_11 0    Interp1 [edi+ebx+4],eax,[c6]%endmacro%macro PIXEL11_12 0    Interp1 [edi+ebx+4],eax,[c8]%endmacro%macro PIXEL11_20 0    Interp2 [edi+ebx+4],eax,[c6],[c8]%endmacro%macro PIXEL11_21 0    Interp2 [edi+ebx+4],eax,[c9],[c8]%endmacro%macro PIXEL11_22 0    Interp2 [edi+ebx+4],eax,[c9],[c6]%endmacro%macro PIXEL11_60 0    Interp6 [edi+ebx+4],[c8],[c6]%endmacro%macro PIXEL11_61 0    Interp6 [edi+ebx+4],[c6],[c8]%endmacro%macro PIXEL11_70 0    Interp7 [edi+ebx+4],[c6],[c8]%endmacro%macro PIXEL11_90 0    Interp9 [edi+ebx+4],[c6],[c8]%endmacro%macro PIXEL11_100 0    Interp10 [edi+ebx+4],[c6],[c8]%endmacroNEWSYM hq2x_32b    cmp byte[curblank],40h    jne .startcopy    ret.startcopy    pushad    mov ax,ds    mov es,ax    mov esi,[vidbuffer]    mov edi,[WinVidMemStart]    add esi,16*2+256*2+32*2    mov ecx,[vidbufferofsb]    mov [deltaptr],ecx    cmp byte[FilteredGUI],0    jne .filtergui    cmp byte[GUIOn2],1    je  nointerp.filtergui    cmp byte[MMXSupport],0    je  nointerp    cmp byte[hqFilter],0    jne hq2x;----------------------------;nointerp:    mov dl,[resolutn]    mov [lineleft],dl    mov ebx,[NumBytesPerLine]    mov edx,[BitConv32Ptr].loopy    mov ecx,256.loopx    movzx eax,word[esi]    mov eax,[edx+eax*4]    mov [edi],eax    mov [edi+4],eax    mov [edi+ebx],eax    mov [edi+ebx+4],eax    add esi,2    add edi,8    dec ecx    jnz .loopx    add edi,[AddEndBytes]    add edi,ebx    add esi,64    dec byte[lineleft]    jnz near .loopy    popad    ret;----------------------------;hq2x:    mov dl,[resolutn]    mov [lineleft],dl    mov dword[prevline],0    mov dword[nextline],576    mov ebx,hirestiledat+1    cmp byte[GUIOn],1    je .loopy    cmp byte[newengen],0    je .loopy    mov ebx,SpecialLine+1.loopy    mov [InterPtr],ebx    cmp byte[ebx],1    jbe .nohires    call HighResProc    mov edx,[deltaptr]    mov ecx,128    mov eax,0xAAAAAAAA.a    mov [edx],eax    add edx,4    dec ecx    jnz .a    mov [deltaptr],edx    jmp .nexty.nohires    mov     dword[xcounter],254   ; x={Xres-2, Xres-1} are special cases.    ; x=0 - special case    mov     edx,[deltaptr]    mov     ecx,[prevline]    mov     eax,[nextline]    movq    mm2,[esi+ecx]    movq    mm3,[esi]    movq    mm4,[esi+eax]    movq    mm5,mm2    movq    mm6,mm3    movq    mm7,mm4    pcmpeqw mm2,[edx+ecx]    pcmpeqw mm3,[edx]    pcmpeqw mm4,[edx+eax]    pand    mm2,mm3    pand    mm2,mm4    movd    eax,mm2    inc     eax    jz      near .loopx_end    movd    eax,mm5    movzx   edx,ax    mov     [w1],edx    mov     [w2],edx    shr     eax,16    mov     [w3],eax    movd    eax,mm6    movzx   edx,ax    mov     [w4],edx    mov     [w5],edx    shr     eax,16    mov     [w6],eax    movd    eax,mm7    movzx   edx,ax    mov     [w7],edx    mov     [w8],edx    shr     eax,16    mov     [w9],eax    jmp     .flags.loopx    mov     edx,[deltaptr]    mov     ecx,[prevline]    mov     eax,[nextline]    movq    mm2,[esi+ecx-2]    movq    mm3,[esi-2]    movq    mm4,[esi+eax-2]    movq    mm5,mm2    movq    mm6,mm3    movq    mm7,mm4    pcmpeqw mm2,[edx+ecx-2]    pcmpeqw mm3,[edx-2]    pcmpeqw mm4,[edx+eax-2]    pand    mm2,mm3    pand    mm2,mm4    movd    ebx,mm2    psrlq   mm2,32    movd    eax,mm2    cwde    and     eax,ebx    inc     eax    jz      near .loopx_end    movd    eax,mm5    mov     [edx+ecx-2],ax    movzx   edx,ax    mov     [w1],edx    shr     eax,16    mov     [w2],eax    psrlq   mm5,32    movd    eax,mm5    movzx   edx,ax    mov     [w3],edx    movd    eax,mm6    movzx   edx,ax    mov     [w4],edx    shr     eax,16    mov     [w5],eax    psrlq   mm6,32    movd    eax,mm6    movzx   edx,ax    mov     [w6],edx    movd    eax,mm7    movzx   edx,ax    mov     [w7],edx    shr     eax,16    mov     [w8],eax    psrlq   mm7,32    movd    eax,mm7    movzx   edx,ax    mov     [w9],edx.flags    mov     ebx,[RGBtoYUVPtr]    mov     eax,[w5]    xor     ecx,ecx    movd    mm5,[ebx+eax*4]    mov     dword[cross],0    mov     edx,[w2]    cmp     eax,edx    je      .noflag2    or      dword[cross],1    movq    mm1,mm5    movd    mm2,[ebx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    edx,mm1    test    edx,edx    jz      .noflag2    or      ecx,2.noflag2    mov     edx,[w4]    cmp     eax,edx    je      .noflag4    or      dword[cross],2    movq    mm1,mm5    movd    mm2,[ebx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    edx,mm1    test    edx,edx    jz      .noflag4    or      ecx,8.noflag4    mov     edx,[w6]    cmp     eax,edx    je      .noflag6    or      dword[cross],4    movq    mm1,mm5    movd    mm2,[ebx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    edx,mm1    test    edx,edx    jz      .noflag6    or      ecx,16.noflag6    mov     edx,[w8]    cmp     eax,edx    je      .noflag8    or      dword[cross],8    movq    mm1,mm5    movd    mm2,[ebx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    edx,mm1    test    edx,edx    jz      .noflag8    or      ecx,64.noflag8    test    ecx,ecx    jnz     .testflag1    mov     ecx,[cross]    mov     ebx,[BitConv32Ptr]    mov     eax,[ebx+eax*4]    jmp     [FuncTable2+ecx*4].testflag1    mov     edx,[w1]    cmp     eax,edx    je      .noflag1    movq    mm1,mm5    movd    mm2,[ebx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    edx,mm1    test    edx,edx    jz      .noflag1    or      ecx,1.noflag1    mov     edx,[w3]    cmp     eax,edx    je      .noflag3    movq    mm1,mm5    movd    mm2,[ebx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    edx,mm1    test    edx,edx    jz      .noflag3    or      ecx,4.noflag3    mov     edx,[w7]    cmp     eax,edx    je      .noflag7    movq    mm1,mm5    movd    mm2,[ebx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    edx,mm1    test    edx,edx    jz      .noflag7    or      ecx,32.noflag7    mov     edx,[w9]    cmp     eax,edx    je      .noflag9    movq    mm1,mm5    movd    mm2,[ebx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    edx,mm1    test    edx,edx    jz      .noflag9    or      ecx,128.noflag9    mov     ebx,[BitConv32Ptr]    mov     eax,[ebx+eax*4]    mov     edx,[w2]    mov     edx,[ebx+edx*4]    mov     [c2],edx    mov     edx,[w4]    mov     edx,[ebx+edx*4]    mov     [c4],edx    mov     edx,[w6]    mov     edx,[ebx+edx*4]    mov     [c6],edx    mov     edx,[w8]    mov     edx,[ebx+edx*4]    mov     [c8],edx    test    ecx,0x005A    jz      .switch

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?