hq3x16.asm

来自「linux下的任天堂模拟器代码。供大家参考。」· 汇编 代码 · 共 2,750 行 · 第 1/4 页

ASM
2,750
字号
;Copyright (C) 1997-2007 ZSNES Team ( zsKnight, _Demo_, pagefault, Nach );;http://www.zsnes.com;http://sourceforge.net/projects/zsnes;https://zsnes.bountysource.com;;This program is free software; you can redistribute it and/or;modify it under the terms of the GNU General Public License;version 2 as published by the Free Software Foundation.;;This program is distributed in the hope that it will be useful,;but WITHOUT ANY WARRANTY; without even the implied warranty of;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the;GNU General Public License for more details.;;You should have received a copy of the GNU General Public License;along with this program; if not, write to the Free Software;Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.;----------------------------------------------------------; hq3x filter; Copyright (C) 2003 MaxSt ( maxst@hiend3d.com );----------------------------------------------------------%include "macros.mac"EXTSYM vidbuffer,curblank,MMXSupport,GUIOn,GUIOn2,vidbufferofsb,FilteredGUIEXTSYM resolutn,lineleft,cfield,hirestiledat,newengen,SpecialLine,HalfTransEXTSYM hqFilter,AddEndBytes,NumBytesPerLine,WinVidMemStart,BitConv32PtrEXTSYM RGBtoYUVPtr,prevline,nextline,deltaptr,xcounter,w1,w2,w3,w4,w5,w6,w7,w8EXTSYM w9,reg_blank,const7,cross,thresholdSECTION .text%macro TestDiff 2    xor     ecx,ecx    mov     edx,[%1]    cmp     edx,[%2]    je      %%fin    mov     ecx,[RGBtoYUVPtr]    movd    mm1,[ecx+edx*4]    movq    mm5,mm1    mov     edx,[%2]    movd    mm2,[ecx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    ecx,mm1%%fin:%endmacro%macro DiffOrNot 4    TestDiff %1,%2    test ecx,ecx    jz   %%same    %3    jmp %%fin%%same:    %4%%fin%endmacro%macro DiffOrNot 6    TestDiff %1,%2    test ecx,ecx    jz   %%same    %3    %4    jmp %%fin%%same:    %5    %6%%fin%endmacro%macro DiffOrNot 8    TestDiff %1,%2    test ecx,ecx    jz   %%same    %3    %4    %5    jmp %%fin%%same:    %6    %7    %8%%fin%endmacro%macro DiffOrNot 10    TestDiff %1,%2    test ecx,ecx    jz %%same    %3    %4    %5    %6    jmp %%fin%%same:    %7    %8    %9    %10%%fin%endmacro%macro Interp1 3    mov edx,%2    mov ecx,%3    cmp edx,ecx    je  %%fin    and edx,[HalfTrans]    and ecx,[HalfTrans]    add ecx,edx    shr ecx,1    add ecx,0x0821    and ecx,[HalfTrans]    add edx,ecx    shr edx,1%%fin    mov %1,dx%endmacro%macro Interp2 4    mov edx,%3    mov ecx,%4    cmp edx,ecx    je  %%fin1    and edx,[HalfTrans]    and ecx,[HalfTrans]    add ecx,edx    shr ecx,1    add ecx,0x0821%%fin1    mov edx,%2    cmp edx,ecx    je  %%fin2    and ecx,[HalfTrans]    and edx,[HalfTrans]    add edx,ecx    shr edx,1%%fin2    mov %1,dx%endmacro%macro Interp3 2    mov        ecx, [BitConv32Ptr]    movd       mm1, [ecx+eax*4]    mov        edx, %2    movd       mm2, [ecx+edx*4]    punpcklbw  mm1, [reg_blank]    punpcklbw  mm2, [reg_blank]    pmullw     mm1, [const7]    paddw      mm1, mm2    psrlw      mm1, 5    packuswb   mm1, [reg_blank]    movd       edx, mm1    shl        dl,  2    shr        edx, 1    shl        dx,  3    shr        edx, 5    mov        %1,  dx%endmacro%macro Interp4 3    mov        ecx, [BitConv32Ptr]    movd       mm1, [ecx+eax*4]    mov        edx, %2    movd       mm2, [ecx+edx*4]    mov        edx, %3    movd       mm3, [ecx+edx*4]    punpcklbw  mm1, [reg_blank]    punpcklbw  mm2, [reg_blank]    punpcklbw  mm3, [reg_blank]    psllw      mm1, 1    paddw      mm2, mm3    pmullw     mm2, [const7]    paddw      mm1, mm2    psrlw      mm1, 6    packuswb   mm1, [reg_blank]    movd       edx, mm1    shl        dl,  2    shr        edx, 1    shl        dx,  3    shr        edx, 5    mov        %1,  dx%endmacro%macro Interp5 3    mov edx,%2    mov ecx,%3    cmp edx,ecx    je  %%fin    and edx,[HalfTrans]    and ecx,[HalfTrans]    add edx,ecx    shr edx,1%%fin    mov %1,dx%endmacro%macro PIXEL00_1M 0    Interp1 [edi],eax,[w1]%endmacro%macro PIXEL00_1U 0    Interp1 [edi],eax,[w2]%endmacro%macro PIXEL00_1L 0    Interp1 [edi],eax,[w4]%endmacro%macro PIXEL00_2 0    Interp2 [edi],eax,[w4],[w2]%endmacro%macro PIXEL00_4 0    Interp4 [edi],[w4],[w2]%endmacro%macro PIXEL00_5 0    Interp5 [edi],[w4],[w2]%endmacro%macro PIXEL00_C 0    mov [edi],ax%endmacro%macro PIXEL01_1 0    Interp1 [edi+2],eax,[w2]%endmacro%macro PIXEL01_3 0    Interp3 [edi+2],[w2]%endmacro%macro PIXEL01_6 0    Interp1 [edi+2],[w2],eax%endmacro%macro PIXEL01_C 0    mov [edi+2],ax%endmacro%macro PIXEL02_1M 0    Interp1 [edi+4],eax,[w3]%endmacro%macro PIXEL02_1U 0    Interp1 [edi+4],eax,[w2]%endmacro%macro PIXEL02_1R 0    Interp1 [edi+4],eax,[w6]%endmacro%macro PIXEL02_2 0    Interp2 [edi+4],eax,[w2],[w6]%endmacro%macro PIXEL02_4 0    Interp4 [edi+4],[w2],[w6]%endmacro%macro PIXEL02_5 0    Interp5 [edi+4],[w2],[w6]%endmacro%macro PIXEL02_C 0    mov [edi+4],ax%endmacro%macro PIXEL10_1 0    Interp1 [edi+ebx],eax,[w4]%endmacro%macro PIXEL10_3 0    Interp3 [edi+ebx],[w4]%endmacro%macro PIXEL10_6 0    Interp1 [edi+ebx],[w4],eax%endmacro%macro PIXEL10_C 0    mov [edi+ebx],ax%endmacro%macro PIXEL11 0    mov [edi+ebx+2],ax%endmacro%macro PIXEL12_1 0    Interp1 [edi+ebx+4],eax,[w6]%endmacro%macro PIXEL12_3 0    Interp3 [edi+ebx+4],[w6]%endmacro%macro PIXEL12_6 0    Interp1 [edi+ebx+4],[w6],eax%endmacro%macro PIXEL12_C 0    mov [edi+ebx+4],ax%endmacro%macro PIXEL20_1M 0    Interp1 [edi+ebx*2],eax,[w7]%endmacro%macro PIXEL20_1D 0    Interp1 [edi+ebx*2],eax,[w8]%endmacro%macro PIXEL20_1L 0    Interp1 [edi+ebx*2],eax,[w4]%endmacro%macro PIXEL20_2 0    Interp2 [edi+ebx*2],eax,[w8],[w4]%endmacro%macro PIXEL20_4 0    Interp4 [edi+ebx*2],[w8],[w4]%endmacro%macro PIXEL20_5 0    Interp5 [edi+ebx*2],[w8],[w4]%endmacro%macro PIXEL20_C 0    mov [edi+ebx*2],ax%endmacro%macro PIXEL21_1 0    Interp1 [edi+ebx*2+2],eax,[w8]%endmacro%macro PIXEL21_3 0    Interp3 [edi+ebx*2+2],[w8]%endmacro%macro PIXEL21_6 0    Interp1 [edi+ebx*2+2],[w8],eax%endmacro%macro PIXEL21_C 0    mov [edi+ebx*2+2],ax%endmacro%macro PIXEL22_1M 0    Interp1 [edi+ebx*2+4],eax,[w9]%endmacro%macro PIXEL22_1D 0    Interp1 [edi+ebx*2+4],eax,[w8]%endmacro%macro PIXEL22_1R 0    Interp1 [edi+ebx*2+4],eax,[w6]%endmacro%macro PIXEL22_2 0    Interp2 [edi+ebx*2+4],eax,[w6],[w8]%endmacro%macro PIXEL22_4 0    Interp4 [edi+ebx*2+4],[w6],[w8]%endmacro%macro PIXEL22_5 0    Interp5 [edi+ebx*2+4],[w6],[w8]%endmacro%macro PIXEL22_C 0    mov [edi+ebx*2+4],ax%endmacroNEWSYM hq3x_16b    cmp byte[curblank],40h    jne .startcopy    ret.startcopy    pushad    mov ax,ds    mov es,ax    mov esi,[vidbuffer]    mov edi,[WinVidMemStart]    add esi,16*2+256*2+32*2    mov ecx,[vidbufferofsb]    mov [deltaptr],ecx    cmp byte[FilteredGUI],0    jne .filtergui    cmp byte[GUIOn2],1    je  nointerp.filtergui    cmp byte[MMXSupport],0    je  nointerp    cmp byte[hqFilter],0    jne hq3x;----------------------------;nointerp:    mov dl,[resolutn]    mov ebx,[NumBytesPerLine].loopy    mov ecx,256.loopx    mov ax,[esi]    shl eax,16    mov ax,[esi]    mov [edi],eax    mov [edi+ebx],eax    mov [edi+ebx*2],eax    mov [edi+4],ax    mov [edi+ebx+4],ax    mov [edi+ebx*2+4],ax    add esi,2    add edi,6    dec ecx    jnz .loopx    add edi,[AddEndBytes]    add edi,ebx    add edi,ebx    add esi,64    dec dl    jnz near .loopy    popad    ret;----------------------------;hq3x:    mov dl,[resolutn]    mov [lineleft],dl    mov dword[prevline],0    mov dword[nextline],576    mov ebx,hirestiledat+1    cmp byte[GUIOn],1    je .loopy    cmp byte[newengen],0    je .loopy    mov ebx,SpecialLine+1.loopy    mov [InterPtr],ebx    cmp byte[ebx],1    jbe .nohires    call HighResProc    mov edx,[deltaptr]    mov ecx,128    mov eax,0xAAAAAAAA.a    mov [edx],eax    add edx,4    dec ecx    jnz .a    mov [deltaptr],edx    jmp .nexty.nohires    mov     dword[xcounter],254   ; x={Xres-2, Xres-1} are special cases.    ; x=0 - special case    mov     edx,[deltaptr]    mov     ecx,[prevline]    mov     eax,[nextline]    movq    mm2,[esi+ecx]    movq    mm3,[esi]    movq    mm4,[esi+eax]    movq    mm5,mm2    movq    mm6,mm3    movq    mm7,mm4    pcmpeqw mm2,[edx+ecx]    pcmpeqw mm3,[edx]    pcmpeqw mm4,[edx+eax]    pand    mm2,mm3    pand    mm2,mm4    movd    eax,mm2    inc     eax    jz      near .loopx_end    movd    eax,mm5    movzx   edx,ax    mov     [w1],edx    mov     [w2],edx    shr     eax,16    mov     [w3],eax    movd    eax,mm6    movzx   edx,ax    mov     [w4],edx    mov     [w5],edx    shr     eax,16    mov     [w6],eax    movd    eax,mm7    movzx   edx,ax    mov     [w7],edx    mov     [w8],edx    shr     eax,16    mov     [w9],eax    jmp     .flags.loopx    mov     edx,[deltaptr]    mov     ecx,[prevline]    mov     eax,[nextline]    movq    mm2,[esi+ecx-2]    movq    mm3,[esi-2]    movq    mm4,[esi+eax-2]    movq    mm5,mm2    movq    mm6,mm3    movq    mm7,mm4    pcmpeqw mm2,[edx+ecx-2]    pcmpeqw mm3,[edx-2]    pcmpeqw mm4,[edx+eax-2]    pand    mm2,mm3    pand    mm2,mm4    movd    ebx,mm2    psrlq   mm2,32    movd    eax,mm2    cwde    and     eax,ebx    inc     eax    jz      near .loopx_end    movd    eax,mm5    mov     [edx+ecx-2],ax    movzx   edx,ax    mov     [w1],edx    shr     eax,16    mov     [w2],eax    psrlq   mm5,32    movd    eax,mm5    movzx   edx,ax    mov     [w3],edx    movd    eax,mm6    movzx   edx,ax    mov     [w4],edx    shr     eax,16    mov     [w5],eax    psrlq   mm6,32    movd    eax,mm6    movzx   edx,ax    mov     [w6],edx    movd    eax,mm7    movzx   edx,ax    mov     [w7],edx    shr     eax,16    mov     [w8],eax    psrlq   mm7,32    movd    eax,mm7    movzx   edx,ax    mov     [w9],edx.flags    mov     ebx,[RGBtoYUVPtr]    mov     eax,[w5]    xor     ecx,ecx    movd    mm5,[ebx+eax*4]    mov     dword[cross],0    mov     edx,[w2]    cmp     eax,edx    je      .noflag2    or      dword[cross],1    movq    mm1,mm5    movd    mm2,[ebx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    edx,mm1    test    edx,edx    jz      .noflag2    or      ecx,2.noflag2    mov     edx,[w4]    cmp     eax,edx    je      .noflag4    or      dword[cross],2    movq    mm1,mm5    movd    mm2,[ebx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    edx,mm1    test    edx,edx    jz      .noflag4    or      ecx,8.noflag4    mov     edx,[w6]    cmp     eax,edx    je      .noflag6    or      dword[cross],4    movq    mm1,mm5    movd    mm2,[ebx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    edx,mm1    test    edx,edx    jz      .noflag6    or      ecx,16.noflag6    mov     edx,[w8]    cmp     eax,edx    je      .noflag8    or      dword[cross],8    movq    mm1,mm5    movd    mm2,[ebx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    edx,mm1    test    edx,edx    jz      .noflag8    or      ecx,64.noflag8    test    ecx,ecx    jnz     .testflag1    mov     ecx,[cross]    mov     ebx,[NumBytesPerLine]    jmp     [FuncTable2+ecx*4].testflag1    mov     edx,[w1]    cmp     eax,edx    je      .noflag1    movq    mm1,mm5    movd    mm2,[ebx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    edx,mm1    test    edx,edx    jz      .noflag1    or      ecx,1.noflag1    mov     edx,[w3]    cmp     eax,edx    je      .noflag3    movq    mm1,mm5    movd    mm2,[ebx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    edx,mm1    test    edx,edx    jz      .noflag3    or      ecx,4.noflag3    mov     edx,[w7]    cmp     eax,edx    je      .noflag7    movq    mm1,mm5    movd    mm2,[ebx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    edx,mm1    test    edx,edx    jz      .noflag7    or      ecx,32.noflag7    mov     edx,[w9]    cmp     eax,edx    je      .noflag9    movq    mm1,mm5    movd    mm2,[ebx+edx*4]    psubusb mm1,mm2    psubusb mm2,mm5    por     mm1,mm2    psubusb mm1,[threshold]    movd    edx,mm1    test    edx,edx    jz      .noflag9    or      ecx,128.noflag9    mov ebx,[NumBytesPerLine]    jmp [FuncTable+ecx*4]..@flag0..@flag1..@flag4

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?