📄 hq2x16.asm
字号:
;Copyright (C) 1997-2007 ZSNES Team ( zsKnight, _Demo_, pagefault, Nach );;http://www.zsnes.com;http://sourceforge.net/projects/zsnes;https://zsnes.bountysource.com;;This program is free software; you can redistribute it and/or;modify it under the terms of the GNU General Public License;version 2 as published by the Free Software Foundation.;;This program is distributed in the hope that it will be useful,;but WITHOUT ANY WARRANTY; without even the implied warranty of;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the;GNU General Public License for more details.;;You should have received a copy of the GNU General Public License;along with this program; if not, write to the Free Software;Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.;----------------------------------------------------------; hq2x filter; Copyright (C) 2003 MaxSt ( maxst@hiend3d.com );----------------------------------------------------------%include "macros.mac"EXTSYM vidbuffer,curblank,MMXSupport,GUIOn,GUIOn2,vidbufferofsb,FilteredGUIEXTSYM resolutn,lineleft,cfield,hirestiledat,newengen,SpecialLine,HalfTransEXTSYM hqFilter,AddEndBytes,NumBytesPerLine,WinVidMemStart,BitConv32PtrEXTSYM RGBtoYUVPtrSECTION .bssNEWSYM prevline, resd 1NEWSYM nextline, resd 1NEWSYM deltaptr, resd 1NEWSYM xcounter, resd 1NEWSYM w1, resd 1NEWSYM w2, resd 1NEWSYM w3, resd 1NEWSYM w4, resd 1NEWSYM w5, resd 1NEWSYM w6, resd 1NEWSYM w7, resd 1NEWSYM w8, resd 1NEWSYM w9, resd 1NEWSYM cross, resd 1SECTION .dataNEWSYM reg_blank, dd 0,0NEWSYM const3, dd 0x00030003,0x00000003NEWSYM const5, dd 0x00050005,0x00000005NEWSYM const6, dd 0x00060006,0x00000006NEWSYM const7, dd 0x00070007,0x00000007NEWSYM const14, dd 0x000E000E,0x0000000ENEWSYM threshold, dd 0x00300706,0x00000000SECTION .text%macro TestDiff 2 xor ecx,ecx mov edx,[%1] cmp edx,[%2] je %%fin mov ecx,[RGBtoYUVPtr] movd mm1,[ecx+edx*4] movq mm5,mm1 mov edx,[%2] movd mm2,[ecx+edx*4] psubusb mm1,mm2 psubusb mm2,mm5 por mm1,mm2 psubusb mm1,[threshold] movd ecx,mm1%%fin:%endmacro%macro DiffOrNot 4 TestDiff %1,%2 test ecx,ecx jz %%same %3 jmp %%fin%%same: %4%%fin%endmacro%macro DiffOrNot 6 TestDiff %1,%2 test ecx,ecx jz %%same %3 %4 jmp %%fin%%same: %5 %6%%fin%endmacro%macro DiffOrNot 8 TestDiff %1,%2 test ecx,ecx jz %%same %3 %4 %5 jmp %%fin%%same: %6 %7 %8%%fin%endmacro%macro DiffOrNot 10 TestDiff %1,%2 test ecx,ecx jz %%same %3 %4 %5 %6 jmp %%fin%%same: %7 %8 %9 %10%%fin%endmacro%macro Interp1 3 mov edx,%2 mov ecx,%3 cmp edx,ecx je %%fin and edx,[HalfTrans] and ecx,[HalfTrans] add ecx,edx shr ecx,1 add ecx,0x0821 and ecx,[HalfTrans] add edx,ecx shr edx,1%%fin mov %1,dx%endmacro%macro Interp2 4 mov edx,%3 mov ecx,%4 cmp edx,ecx je %%fin1 and edx,[HalfTrans] and ecx,[HalfTrans] add ecx,edx shr ecx,1 add ecx,0x0821%%fin1 mov edx,%2 cmp edx,ecx je %%fin2 and ecx,[HalfTrans] and edx,[HalfTrans] add edx,ecx shr edx,1%%fin2 mov %1,dx%endmacro%macro Interp5 3 mov edx,%2 mov ecx,%3 cmp edx,ecx je %%fin and edx,[HalfTrans] and ecx,[HalfTrans] add edx,ecx shr edx,1%%fin mov %1,dx%endmacro%macro Interp6 3 mov ecx, [BitConv32Ptr] movd mm1, [ecx+eax*4] mov edx, %2 movd mm2, [ecx+edx*4] mov edx, %3 movd mm3, [ecx+edx*4] punpcklbw mm1, [reg_blank] punpcklbw mm2, [reg_blank] punpcklbw mm3, [reg_blank] pmullw mm1, [const5] psllw mm2, 1 paddw mm1, mm3 paddw mm1, mm2 psrlw mm1, 5 packuswb mm1, [reg_blank] movd edx, mm1 shl dl, 2 shr edx, 1 shl dx, 3 shr edx, 5 mov %1, dx%endmacro%macro Interp7 3 mov ecx, [BitConv32Ptr] movd mm1, [ecx+eax*4] mov edx, %2 movd mm2, [ecx+edx*4] mov edx, %3 movd mm3, [ecx+edx*4] punpcklbw mm1, [reg_blank] punpcklbw mm2, [reg_blank] punpcklbw mm3, [reg_blank] pmullw mm1, [const6] paddw mm2, mm3 paddw mm1, mm2 psrlw mm1, 5 packuswb mm1, [reg_blank] movd edx, mm1 shl dl, 2 shr edx, 1 shl dx, 3 shr edx, 5 mov %1, dx%endmacro%macro Interp9 3 mov ecx, [BitConv32Ptr] movd mm1, [ecx+eax*4] mov edx, %2 movd mm2, [ecx+edx*4] mov edx, %3 movd mm3, [ecx+edx*4] punpcklbw mm1, [reg_blank] punpcklbw mm2, [reg_blank] punpcklbw mm3, [reg_blank] psllw mm1, 1 paddw mm2, mm3 pmullw mm2, [const3] paddw mm1, mm2 psrlw mm1, 5 packuswb mm1, [reg_blank] movd edx, mm1 shl dl, 2 shr edx, 1 shl dx, 3 shr edx, 5 mov %1, dx%endmacro%macro Interp10 3 mov ecx, [BitConv32Ptr] movd mm1, [ecx+eax*4] mov edx, %2 movd mm2, [ecx+edx*4] mov edx, %3 movd mm3, [ecx+edx*4] punpcklbw mm1, [reg_blank] punpcklbw mm2, [reg_blank] punpcklbw mm3, [reg_blank] pmullw mm1, [const14] paddw mm2, mm3 paddw mm1, mm2 psrlw mm1, 6 packuswb mm1, [reg_blank] movd edx, mm1 shl dl, 2 shr edx, 1 shl dx, 3 shr edx, 5 mov %1, dx%endmacro%macro PIXEL00_0 0 mov [edi],ax%endmacro%macro PIXEL00_10 0 Interp1 [edi],eax,[w1]%endmacro%macro PIXEL00_11 0 Interp1 [edi],eax,[w4]%endmacro%macro PIXEL00_12 0 Interp1 [edi],eax,[w2]%endmacro%macro PIXEL00_20 0 Interp2 [edi],eax,[w4],[w2]%endmacro%macro PIXEL00_21 0 Interp2 [edi],eax,[w1],[w2]%endmacro%macro PIXEL00_22 0 Interp2 [edi],eax,[w1],[w4]%endmacro%macro PIXEL00_60 0 Interp6 [edi],[w2],[w4]%endmacro%macro PIXEL00_61 0 Interp6 [edi],[w4],[w2]%endmacro%macro PIXEL00_70 0 Interp7 [edi],[w4],[w2]%endmacro%macro PIXEL00_90 0 Interp9 [edi],[w4],[w2]%endmacro%macro PIXEL00_100 0 Interp10 [edi],[w4],[w2]%endmacro%macro PIXEL01_0 0 mov [edi+2],ax%endmacro%macro PIXEL01_10 0 Interp1 [edi+2],eax,[w3]%endmacro%macro PIXEL01_11 0 Interp1 [edi+2],eax,[w2]%endmacro%macro PIXEL01_12 0 Interp1 [edi+2],eax,[w6]%endmacro%macro PIXEL01_20 0 Interp2 [edi+2],eax,[w2],[w6]%endmacro%macro PIXEL01_21 0 Interp2 [edi+2],eax,[w3],[w6]%endmacro%macro PIXEL01_22 0 Interp2 [edi+2],eax,[w3],[w2]%endmacro%macro PIXEL01_60 0 Interp6 [edi+2],[w6],[w2]%endmacro%macro PIXEL01_61 0 Interp6 [edi+2],[w2],[w6]%endmacro%macro PIXEL01_70 0 Interp7 [edi+2],[w2],[w6]%endmacro%macro PIXEL01_90 0 Interp9 [edi+2],[w2],[w6]%endmacro%macro PIXEL01_100 0 Interp10 [edi+2],[w2],[w6]%endmacro%macro PIXEL10_0 0 mov [edi+ebx],ax%endmacro%macro PIXEL10_10 0 Interp1 [edi+ebx],eax,[w7]%endmacro%macro PIXEL10_11 0 Interp1 [edi+ebx],eax,[w8]%endmacro%macro PIXEL10_12 0 Interp1 [edi+ebx],eax,[w4]%endmacro%macro PIXEL10_20 0 Interp2 [edi+ebx],eax,[w8],[w4]%endmacro%macro PIXEL10_21 0 Interp2 [edi+ebx],eax,[w7],[w4]%endmacro%macro PIXEL10_22 0 Interp2 [edi+ebx],eax,[w7],[w8]%endmacro%macro PIXEL10_60 0 Interp6 [edi+ebx],[w4],[w8]%endmacro%macro PIXEL10_61 0 Interp6 [edi+ebx],[w8],[w4]%endmacro%macro PIXEL10_70 0 Interp7 [edi+ebx],[w8],[w4]%endmacro%macro PIXEL10_90 0 Interp9 [edi+ebx],[w8],[w4]%endmacro%macro PIXEL10_100 0 Interp10 [edi+ebx],[w8],[w4]%endmacro%macro PIXEL11_0 0 mov [edi+ebx+2],ax%endmacro%macro PIXEL11_10 0 Interp1 [edi+ebx+2],eax,[w9]%endmacro%macro PIXEL11_11 0 Interp1 [edi+ebx+2],eax,[w6]%endmacro%macro PIXEL11_12 0 Interp1 [edi+ebx+2],eax,[w8]%endmacro%macro PIXEL11_20 0 Interp2 [edi+ebx+2],eax,[w6],[w8]%endmacro%macro PIXEL11_21 0 Interp2 [edi+ebx+2],eax,[w9],[w8]%endmacro%macro PIXEL11_22 0 Interp2 [edi+ebx+2],eax,[w9],[w6]%endmacro%macro PIXEL11_60 0 Interp6 [edi+ebx+2],[w8],[w6]%endmacro%macro PIXEL11_61 0 Interp6 [edi+ebx+2],[w6],[w8]%endmacro%macro PIXEL11_70 0 Interp7 [edi+ebx+2],[w6],[w8]%endmacro%macro PIXEL11_90 0 Interp9 [edi+ebx+2],[w6],[w8]%endmacro%macro PIXEL11_100 0 Interp10 [edi+ebx+2],[w6],[w8]%endmacroNEWSYM hq2x_16b cmp byte[curblank],40h jne .startcopy ret.startcopy pushad mov ax,ds mov es,ax mov esi,[vidbuffer] mov edi,[WinVidMemStart] add esi,16*2+256*2+32*2 mov ecx,[vidbufferofsb] mov [deltaptr],ecx cmp byte[FilteredGUI],0 jne .filtergui cmp byte[GUIOn2],1 je nointerp.filtergui cmp byte[MMXSupport],0 je nointerp cmp byte[hqFilter],0 jne hq2x;----------------------------;nointerp: mov dl,[resolutn] mov ebx,[NumBytesPerLine].loopy mov ecx,256.loopx mov ax,[esi] shl eax,16 mov ax,[esi] mov [edi],eax mov [edi+ebx],eax add esi,2 add edi,4 dec ecx jnz .loopx add edi,[AddEndBytes] add edi,ebx add esi,64 dec dl jnz near .loopy popad ret;----------------------------;hq2x: mov dl,[resolutn] mov [lineleft],dl mov dword[prevline],0 mov dword[nextline],576 mov ebx,hirestiledat+1 cmp byte[GUIOn],1 je .loopy cmp byte[newengen],0 je .loopy mov ebx,SpecialLine+1.loopy mov [InterPtr],ebx cmp byte[ebx],1 jbe .nohires call HighResProc mov edx,[deltaptr] mov ecx,128 mov eax,0xAAAAAAAA.a mov [edx],eax add edx,4 dec ecx jnz .a mov [deltaptr],edx jmp .nexty.nohires mov dword[xcounter],254 ; x={Xres-2, Xres-1} are special cases. ; x=0 - special case mov edx,[deltaptr] mov ecx,[prevline] mov eax,[nextline] movq mm2,[esi+ecx] movq mm3,[esi] movq mm4,[esi+eax] movq mm5,mm2 movq mm6,mm3 movq mm7,mm4 pcmpeqw mm2,[edx+ecx] pcmpeqw mm3,[edx] pcmpeqw mm4,[edx+eax] pand mm2,mm3 pand mm2,mm4 movd eax,mm2 inc eax jz near .loopx_end movd eax,mm5 movzx edx,ax mov [w1],edx mov [w2],edx shr eax,16 mov [w3],eax movd eax,mm6 movzx edx,ax mov [w4],edx mov [w5],edx shr eax,16 mov [w6],eax movd eax,mm7 movzx edx,ax mov [w7],edx mov [w8],edx shr eax,16 mov [w9],eax jmp .flags.loopx mov edx,[deltaptr] mov ecx,[prevline] mov eax,[nextline] movq mm2,[esi+ecx-2] movq mm3,[esi-2] movq mm4,[esi+eax-2] movq mm5,mm2 movq mm6,mm3 movq mm7,mm4 pcmpeqw mm2,[edx+ecx-2] pcmpeqw mm3,[edx-2] pcmpeqw mm4,[edx+eax-2] pand mm2,mm3 pand mm2,mm4 movd ebx,mm2 psrlq mm2,32 movd eax,mm2 cwde and eax,ebx inc eax jz near .loopx_end movd eax,mm5 mov [edx+ecx-2],ax movzx edx,ax mov [w1],edx shr eax,16 mov [w2],eax psrlq mm5,32 movd eax,mm5 movzx edx,ax mov [w3],edx movd eax,mm6 movzx edx,ax mov [w4],edx shr eax,16 mov [w5],eax psrlq mm6,32 movd eax,mm6 movzx edx,ax mov [w6],edx movd eax,mm7 movzx edx,ax mov [w7],edx shr eax,16 mov [w8],eax psrlq mm7,32 movd eax,mm7 movzx edx,ax mov [w9],edx.flags mov ebx,[RGBtoYUVPtr] mov eax,[w5] xor ecx,ecx movd mm5,[ebx+eax*4] mov dword[cross],0 mov edx,[w2] cmp eax,edx je .noflag2 or dword[cross],1 movq mm1,mm5 movd mm2,[ebx+edx*4] psubusb mm1,mm2 psubusb mm2,mm5 por mm1,mm2 psubusb mm1,[threshold] movd edx,mm1 test edx,edx jz .noflag2 or ecx,2.noflag2 mov edx,[w4] cmp eax,edx je .noflag4 or dword[cross],2 movq mm1,mm5 movd mm2,[ebx+edx*4] psubusb mm1,mm2 psubusb mm2,mm5 por mm1,mm2 psubusb mm1,[threshold] movd edx,mm1 test edx,edx jz .noflag4 or ecx,8.noflag4 mov edx,[w6] cmp eax,edx je .noflag6 or dword[cross],4 movq mm1,mm5 movd mm2,[ebx+edx*4] psubusb mm1,mm2 psubusb mm2,mm5 por mm1,mm2 psubusb mm1,[threshold] movd edx,mm1 test edx,edx jz .noflag6 or ecx,16.noflag6 mov edx,[w8] cmp eax,edx je .noflag8 or dword[cross],8 movq mm1,mm5 movd mm2,[ebx+edx*4] psubusb mm1,mm2 psubusb mm2,mm5 por mm1,mm2 psubusb mm1,[threshold] movd edx,mm1 test edx,edx jz .noflag8 or ecx,64.noflag8 test ecx,ecx jnz .testflag1 mov ecx,[cross] mov ebx,[NumBytesPerLine] jmp [FuncTable2+ecx*4].testflag1 mov edx,[w1] cmp eax,edx je .noflag1 movq mm1,mm5 movd mm2,[ebx+edx*4] psubusb mm1,mm2 psubusb mm2,mm5 por mm1,mm2 psubusb mm1,[threshold] movd edx,mm1 test edx,edx jz .noflag1 or ecx,1
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -