hq2x32.asm
来自「linux下的任天堂模拟器代码。供大家参考。」· 汇编 代码 · 共 2,134 行 · 第 1/3 页
ASM
2,134 行
;Copyright (C) 1997-2007 ZSNES Team ( zsKnight, _Demo_, pagefault, Nach );;http://www.zsnes.com;http://sourceforge.net/projects/zsnes;https://zsnes.bountysource.com;;This program is free software; you can redistribute it and/or;modify it under the terms of the GNU General Public License;version 2 as published by the Free Software Foundation.;;This program is distributed in the hope that it will be useful,;but WITHOUT ANY WARRANTY; without even the implied warranty of;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the;GNU General Public License for more details.;;You should have received a copy of the GNU General Public License;along with this program; if not, write to the Free Software;Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.;----------------------------------------------------------;hq2x filter;Copyright (C) 2003 MaxSt ( maxst@hiend3d.com );----------------------------------------------------------%include "macros.mac"EXTSYM vidbuffer,curblank,MMXSupport,GUIOn,GUIOn2,vidbufferofsb,FilteredGUIEXTSYM resolutn,lineleft,cfield,hirestiledat,newengen,SpecialLine,hqFilterEXTSYM AddEndBytes,NumBytesPerLine,WinVidMemStart,BitConv32Ptr,RGBtoYUVPtrEXTSYM prevline,nextline,deltaptr,xcounter,w1,w2,w3,w4,w5,w6,w7,w8,w9EXTSYM reg_blank,const3,const5,const6,const14,cross,thresholdSECTION .bssNEWSYM c1, resd 1NEWSYM c2, resd 1NEWSYM c3, resd 1NEWSYM c4, resd 1NEWSYM c5, resd 1NEWSYM c6, resd 1NEWSYM c7, resd 1NEWSYM c8, resd 1NEWSYM c9, resd 1SECTION .text%macro TestDiff 2 xor ecx,ecx mov edx,[%1] cmp edx,[%2] je %%fin mov ecx,[RGBtoYUVPtr] movd mm1,[ecx+edx*4] movq mm5,mm1 mov edx,[%2] movd mm2,[ecx+edx*4] psubusb mm1,mm2 psubusb mm2,mm5 por mm1,mm2 psubusb mm1,[threshold] movd ecx,mm1%%fin:%endmacro%macro DiffOrNot 4 TestDiff %1,%2 test ecx,ecx jz %%same %3 jmp %%fin%%same: %4%%fin%endmacro%macro DiffOrNot 6 TestDiff %1,%2 test ecx,ecx jz %%same %3 %4 jmp %%fin%%same: %5 %6%%fin%endmacro%macro DiffOrNot 8 TestDiff %1,%2 test ecx,ecx jz %%same %3 %4 %5 jmp %%fin%%same: %6 %7 %8%%fin%endmacro%macro DiffOrNot 10 TestDiff %1,%2 test ecx,ecx jz %%same %3 %4 %5 %6 jmp %%fin%%same: %7 %8 %9 %10%%fin%endmacro%macro Interp1 3 mov edx,%2 shl edx,2 add edx,%3 sub edx,%2 shr edx,2 mov %1,edx%endmacro%macro Interp2 4 mov edx,%2 shl edx,1 add edx,%3 add edx,%4 shr edx,2 mov %1,edx%endmacro%macro Interp5 3 mov edx,%2 add edx,%3 shr edx,1 mov %1,edx%endmacro%macro Interp6 3 movd mm1, eax movd mm2, %2 movd mm3, %3 punpcklbw mm1, [reg_blank] punpcklbw mm2, [reg_blank] punpcklbw mm3, [reg_blank] pmullw mm1, [const5] psllw mm2, 1 paddw mm1, mm3 paddw mm1, mm2 psrlw mm1, 3 packuswb mm1, [reg_blank] movd %1, mm1%endmacro%macro Interp7 3 movd mm1, eax movd mm2, %2 movd mm3, %3 punpcklbw mm1, [reg_blank] punpcklbw mm2, [reg_blank] punpcklbw mm3, [reg_blank] pmullw mm1, [const6] paddw mm2, mm3 paddw mm1, mm2 psrlw mm1, 3 packuswb mm1, [reg_blank] movd %1, mm1%endmacro%macro Interp9 3 movd mm1, eax movd mm2, %2 movd mm3, %3 punpcklbw mm1, [reg_blank] punpcklbw mm2, [reg_blank] punpcklbw mm3, [reg_blank] psllw mm1, 1 paddw mm2, mm3 pmullw mm2, [const3] paddw mm1, mm2 psrlw mm1, 3 packuswb mm1, [reg_blank] movd %1, mm1%endmacro%macro Interp10 3 movd mm1, eax movd mm2, %2 movd mm3, %3 punpcklbw mm1, [reg_blank] punpcklbw mm2, [reg_blank] punpcklbw mm3, [reg_blank] pmullw mm1, [const14] paddw mm2, mm3 paddw mm1, mm2 psrlw mm1, 4 packuswb mm1, [reg_blank] movd %1, mm1%endmacro%macro PIXEL00_0 0 mov [edi],eax%endmacro%macro PIXEL00_10 0 Interp1 [edi],eax,[c1]%endmacro%macro PIXEL00_11 0 Interp1 [edi],eax,[c4]%endmacro%macro PIXEL00_12 0 Interp1 [edi],eax,[c2]%endmacro%macro PIXEL00_20 0 Interp2 [edi],eax,[c4],[c2]%endmacro%macro PIXEL00_21 0 Interp2 [edi],eax,[c1],[c2]%endmacro%macro PIXEL00_22 0 Interp2 [edi],eax,[c1],[c4]%endmacro%macro PIXEL00_60 0 Interp6 [edi],[c2],[c4]%endmacro%macro PIXEL00_61 0 Interp6 [edi],[c4],[c2]%endmacro%macro PIXEL00_70 0 Interp7 [edi],[c4],[c2]%endmacro%macro PIXEL00_90 0 Interp9 [edi],[c4],[c2]%endmacro%macro PIXEL00_100 0 Interp10 [edi],[c4],[c2]%endmacro%macro PIXEL01_0 0 mov [edi+4],eax%endmacro%macro PIXEL01_10 0 Interp1 [edi+4],eax,[c3]%endmacro%macro PIXEL01_11 0 Interp1 [edi+4],eax,[c2]%endmacro%macro PIXEL01_12 0 Interp1 [edi+4],eax,[c6]%endmacro%macro PIXEL01_20 0 Interp2 [edi+4],eax,[c2],[c6]%endmacro%macro PIXEL01_21 0 Interp2 [edi+4],eax,[c3],[c6]%endmacro%macro PIXEL01_22 0 Interp2 [edi+4],eax,[c3],[c2]%endmacro%macro PIXEL01_60 0 Interp6 [edi+4],[c6],[c2]%endmacro%macro PIXEL01_61 0 Interp6 [edi+4],[c2],[c6]%endmacro%macro PIXEL01_70 0 Interp7 [edi+4],[c2],[c6]%endmacro%macro PIXEL01_90 0 Interp9 [edi+4],[c2],[c6]%endmacro%macro PIXEL01_100 0 Interp10 [edi+4],[c2],[c6]%endmacro%macro PIXEL10_0 0 mov [edi+ebx],eax%endmacro%macro PIXEL10_10 0 Interp1 [edi+ebx],eax,[c7]%endmacro%macro PIXEL10_11 0 Interp1 [edi+ebx],eax,[c8]%endmacro%macro PIXEL10_12 0 Interp1 [edi+ebx],eax,[c4]%endmacro%macro PIXEL10_20 0 Interp2 [edi+ebx],eax,[c8],[c4]%endmacro%macro PIXEL10_21 0 Interp2 [edi+ebx],eax,[c7],[c4]%endmacro%macro PIXEL10_22 0 Interp2 [edi+ebx],eax,[c7],[c8]%endmacro%macro PIXEL10_60 0 Interp6 [edi+ebx],[c4],[c8]%endmacro%macro PIXEL10_61 0 Interp6 [edi+ebx],[c8],[c4]%endmacro%macro PIXEL10_70 0 Interp7 [edi+ebx],[c8],[c4]%endmacro%macro PIXEL10_90 0 Interp9 [edi+ebx],[c8],[c4]%endmacro%macro PIXEL10_100 0 Interp10 [edi+ebx],[c8],[c4]%endmacro%macro PIXEL11_0 0 mov [edi+ebx+4],eax%endmacro%macro PIXEL11_10 0 Interp1 [edi+ebx+4],eax,[c9]%endmacro%macro PIXEL11_11 0 Interp1 [edi+ebx+4],eax,[c6]%endmacro%macro PIXEL11_12 0 Interp1 [edi+ebx+4],eax,[c8]%endmacro%macro PIXEL11_20 0 Interp2 [edi+ebx+4],eax,[c6],[c8]%endmacro%macro PIXEL11_21 0 Interp2 [edi+ebx+4],eax,[c9],[c8]%endmacro%macro PIXEL11_22 0 Interp2 [edi+ebx+4],eax,[c9],[c6]%endmacro%macro PIXEL11_60 0 Interp6 [edi+ebx+4],[c8],[c6]%endmacro%macro PIXEL11_61 0 Interp6 [edi+ebx+4],[c6],[c8]%endmacro%macro PIXEL11_70 0 Interp7 [edi+ebx+4],[c6],[c8]%endmacro%macro PIXEL11_90 0 Interp9 [edi+ebx+4],[c6],[c8]%endmacro%macro PIXEL11_100 0 Interp10 [edi+ebx+4],[c6],[c8]%endmacroNEWSYM hq2x_32b cmp byte[curblank],40h jne .startcopy ret.startcopy pushad mov ax,ds mov es,ax mov esi,[vidbuffer] mov edi,[WinVidMemStart] add esi,16*2+256*2+32*2 mov ecx,[vidbufferofsb] mov [deltaptr],ecx cmp byte[FilteredGUI],0 jne .filtergui cmp byte[GUIOn2],1 je nointerp.filtergui cmp byte[MMXSupport],0 je nointerp cmp byte[hqFilter],0 jne hq2x;----------------------------;nointerp: mov dl,[resolutn] mov [lineleft],dl mov ebx,[NumBytesPerLine] mov edx,[BitConv32Ptr].loopy mov ecx,256.loopx movzx eax,word[esi] mov eax,[edx+eax*4] mov [edi],eax mov [edi+4],eax mov [edi+ebx],eax mov [edi+ebx+4],eax add esi,2 add edi,8 dec ecx jnz .loopx add edi,[AddEndBytes] add edi,ebx add esi,64 dec byte[lineleft] jnz near .loopy popad ret;----------------------------;hq2x: mov dl,[resolutn] mov [lineleft],dl mov dword[prevline],0 mov dword[nextline],576 mov ebx,hirestiledat+1 cmp byte[GUIOn],1 je .loopy cmp byte[newengen],0 je .loopy mov ebx,SpecialLine+1.loopy mov [InterPtr],ebx cmp byte[ebx],1 jbe .nohires call HighResProc mov edx,[deltaptr] mov ecx,128 mov eax,0xAAAAAAAA.a mov [edx],eax add edx,4 dec ecx jnz .a mov [deltaptr],edx jmp .nexty.nohires mov dword[xcounter],254 ; x={Xres-2, Xres-1} are special cases. ; x=0 - special case mov edx,[deltaptr] mov ecx,[prevline] mov eax,[nextline] movq mm2,[esi+ecx] movq mm3,[esi] movq mm4,[esi+eax] movq mm5,mm2 movq mm6,mm3 movq mm7,mm4 pcmpeqw mm2,[edx+ecx] pcmpeqw mm3,[edx] pcmpeqw mm4,[edx+eax] pand mm2,mm3 pand mm2,mm4 movd eax,mm2 inc eax jz near .loopx_end movd eax,mm5 movzx edx,ax mov [w1],edx mov [w2],edx shr eax,16 mov [w3],eax movd eax,mm6 movzx edx,ax mov [w4],edx mov [w5],edx shr eax,16 mov [w6],eax movd eax,mm7 movzx edx,ax mov [w7],edx mov [w8],edx shr eax,16 mov [w9],eax jmp .flags.loopx mov edx,[deltaptr] mov ecx,[prevline] mov eax,[nextline] movq mm2,[esi+ecx-2] movq mm3,[esi-2] movq mm4,[esi+eax-2] movq mm5,mm2 movq mm6,mm3 movq mm7,mm4 pcmpeqw mm2,[edx+ecx-2] pcmpeqw mm3,[edx-2] pcmpeqw mm4,[edx+eax-2] pand mm2,mm3 pand mm2,mm4 movd ebx,mm2 psrlq mm2,32 movd eax,mm2 cwde and eax,ebx inc eax jz near .loopx_end movd eax,mm5 mov [edx+ecx-2],ax movzx edx,ax mov [w1],edx shr eax,16 mov [w2],eax psrlq mm5,32 movd eax,mm5 movzx edx,ax mov [w3],edx movd eax,mm6 movzx edx,ax mov [w4],edx shr eax,16 mov [w5],eax psrlq mm6,32 movd eax,mm6 movzx edx,ax mov [w6],edx movd eax,mm7 movzx edx,ax mov [w7],edx shr eax,16 mov [w8],eax psrlq mm7,32 movd eax,mm7 movzx edx,ax mov [w9],edx.flags mov ebx,[RGBtoYUVPtr] mov eax,[w5] xor ecx,ecx movd mm5,[ebx+eax*4] mov dword[cross],0 mov edx,[w2] cmp eax,edx je .noflag2 or dword[cross],1 movq mm1,mm5 movd mm2,[ebx+edx*4] psubusb mm1,mm2 psubusb mm2,mm5 por mm1,mm2 psubusb mm1,[threshold] movd edx,mm1 test edx,edx jz .noflag2 or ecx,2.noflag2 mov edx,[w4] cmp eax,edx je .noflag4 or dword[cross],2 movq mm1,mm5 movd mm2,[ebx+edx*4] psubusb mm1,mm2 psubusb mm2,mm5 por mm1,mm2 psubusb mm1,[threshold] movd edx,mm1 test edx,edx jz .noflag4 or ecx,8.noflag4 mov edx,[w6] cmp eax,edx je .noflag6 or dword[cross],4 movq mm1,mm5 movd mm2,[ebx+edx*4] psubusb mm1,mm2 psubusb mm2,mm5 por mm1,mm2 psubusb mm1,[threshold] movd edx,mm1 test edx,edx jz .noflag6 or ecx,16.noflag6 mov edx,[w8] cmp eax,edx je .noflag8 or dword[cross],8 movq mm1,mm5 movd mm2,[ebx+edx*4] psubusb mm1,mm2 psubusb mm2,mm5 por mm1,mm2 psubusb mm1,[threshold] movd edx,mm1 test edx,edx jz .noflag8 or ecx,64.noflag8 test ecx,ecx jnz .testflag1 mov ecx,[cross] mov ebx,[BitConv32Ptr] mov eax,[ebx+eax*4] jmp [FuncTable2+ecx*4].testflag1 mov edx,[w1] cmp eax,edx je .noflag1 movq mm1,mm5 movd mm2,[ebx+edx*4] psubusb mm1,mm2 psubusb mm2,mm5 por mm1,mm2 psubusb mm1,[threshold] movd edx,mm1 test edx,edx jz .noflag1 or ecx,1.noflag1 mov edx,[w3] cmp eax,edx je .noflag3 movq mm1,mm5 movd mm2,[ebx+edx*4] psubusb mm1,mm2 psubusb mm2,mm5 por mm1,mm2 psubusb mm1,[threshold] movd edx,mm1 test edx,edx jz .noflag3 or ecx,4.noflag3 mov edx,[w7] cmp eax,edx je .noflag7 movq mm1,mm5 movd mm2,[ebx+edx*4] psubusb mm1,mm2 psubusb mm2,mm5 por mm1,mm2 psubusb mm1,[threshold] movd edx,mm1 test edx,edx jz .noflag7 or ecx,32.noflag7 mov edx,[w9] cmp eax,edx je .noflag9 movq mm1,mm5 movd mm2,[ebx+edx*4] psubusb mm1,mm2 psubusb mm2,mm5 por mm1,mm2 psubusb mm1,[threshold] movd edx,mm1 test edx,edx jz .noflag9 or ecx,128.noflag9 mov ebx,[BitConv32Ptr] mov eax,[ebx+eax*4] mov edx,[w2] mov edx,[ebx+edx*4] mov [c2],edx mov edx,[w4] mov edx,[ebx+edx*4] mov [c4],edx mov edx,[w6] mov edx,[ebx+edx*4] mov [c6],edx mov edx,[w8] mov edx,[ebx+edx*4] mov [c8],edx test ecx,0x005A jz .switch
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?