📄 gvmat32.asm
字号:
mov match_start,eax ; save new position as match_start cmp esi,nice_match ; if best_len >= nice_match, exit jae exitloop mov ecx,scan mov edx,window ; restore edx=window add ecx,esi add esi,edx dec esi mov windowlen,esi ; windowlen = window + best_len-1 mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end; now we restore ecx and esi, for the big loop : mov esi,prev mov ecx,limit jmp contloop3exitloop:; exit : s->match_start=match_start mov ebx,match_start mov ebp,str_s mov ecx,best_len mov dword ptr [ebp+dep_match_start],ebx mov eax,dword ptr [ebp+dep_lookahead] cmp ecx,eax ja minexlo mov eax,ecxminexlo:; return min(best_len,s->lookahead); restore stack and register ebx,esi,edi,ebp add esp,NbStackAdd pop ebx pop esi pop edi pop ebp retInfoAuthor:; please don't remove this string !; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary! db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ahIFDEF NOUNDERLINElongest_match_7fff endpELSE_longest_match_7fff endpENDIFIFDEF NOUNDERLINEcpudetect32 proc nearELSE_cpudetect32 proc nearENDIF push ebx pushfd ; push original EFLAGS pop eax ; get original EFLAGS mov ecx, eax ; save original EFLAGS xor eax, 40000h ; flip AC bit in EFLAGS push eax ; save new EFLAGS value on stack popfd ; replace current EFLAGS value pushfd ; get new EFLAGS pop eax ; store new EFLAGS in EAX xor eax, ecx ; can抰 toggle AC bit, processor=80386 jz end_cpu_is_386 ; jump if 80386 processor push ecx popfd ; restore AC bit in EFLAGS first pushfd pushfd pop ecx mov eax, ecx ; get original EFLAGS xor eax, 200000h ; flip ID bit in EFLAGS push eax ; save new EFLAGS value on stack popfd ; replace current EFLAGS value pushfd ; get new EFLAGS pop eax ; store new EFLAGS in EAX popfd ; restore original EFLAGS xor eax, ecx ; can抰 toggle ID bit, je is_old_486 ; processor=old mov eax,1 db 0fh,0a2h ;CPUIDexitcpudetect: pop ebx retend_cpu_is_386: mov eax,0300h jmp exitcpudetectis_old_486: mov eax,0400h jmp exitcpudetectIFDEF NOUNDERLINEcpudetect32 endpELSE_cpudetect32 endpENDIFMAX_MATCH equ 258MIN_MATCH equ 3MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1)MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h);;; stack frame offsetschainlenwmask equ esp + 0 ; high word: current chain len ; low word: s->wmaskwindow equ esp + 4 ; local copy of s->windowwindowbestlen equ esp + 8 ; s->window + bestlenscanstart equ esp + 16 ; first two bytes of stringscanend equ esp + 12 ; last two bytes of stringscanalign equ esp + 20 ; dword-misalignment of stringnicematch equ esp + 24 ; a good enough match sizebestlen equ esp + 28 ; size of best match so farscan equ esp + 32 ; ptr to string wanting matchLocalVarsSize equ 36; saved ebx byte esp + 36; saved edi byte esp + 40; saved esi byte esp + 44; saved ebp byte esp + 48; return address byte esp + 52deflatestate equ esp + 56 ; the function argumentscurmatch equ esp + 60;;; Offsets for fields in the deflate_state structure. These numbers;;; are calculated from the definition of deflate_state, with the;;; assumption that the compiler will dword-align the fields. (Thus,;;; changing the definition of deflate_state could easily cause this;;; program to crash horribly, without so much as a warning at;;; compile time. Sigh.)dsWSize equ 36dsWMask equ 44dsWindow equ 48dsPrev equ 56dsMatchLen equ 88dsPrevMatch equ 92dsStrStart equ 100dsMatchStart equ 104dsLookahead equ 108dsPrevLen equ 112dsMaxChainLen equ 116dsGoodMatch equ 132dsNiceMatch equ 136;;; match.asm -- Pentium-Pro-optimized version of longest_match();;; Written for zlib 1.1.2;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html;;;;;; This is free software; you can redistribute it and/or modify it;;; under the terms of the GNU General Public License.;GLOBAL _longest_match, _match_init;SECTION .text;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch);_longest_match:IFDEF NOUNDERLINElongest_match_686 proc nearELSE_longest_match_686 proc nearENDIF;;; Save registers that the compiler may be using, and adjust esp to;;; make room for our stack frame. push ebp push edi push esi push ebx sub esp, LocalVarsSize;;; Retrieve the function arguments. ecx will hold cur_match;;; throughout the entire function. edx will hold the pointer to the;;; deflate_state structure during the function's setup (before;;; entering the main loop. mov edx, [deflatestate] mov ecx, [curmatch];;; uInt wmask = s->w_mask;;;; unsigned chain_length = s->max_chain_length;;;; if (s->prev_length >= s->good_match) {;;; chain_length >>= 2;;;; } mov eax, [edx + dsPrevLen] mov ebx, [edx + dsGoodMatch] cmp eax, ebx mov eax, [edx + dsWMask] mov ebx, [edx + dsMaxChainLen] jl LastMatchGood shr ebx, 2LastMatchGood:;;; chainlen is decremented once beforehand so that the function can;;; use the sign flag instead of the zero flag for the exit test.;;; It is then shifted into the high word, to make room for the wmask;;; value, which it will always accompany. dec ebx shl ebx, 16 or ebx, eax mov [chainlenwmask], ebx;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; mov eax, [edx + dsNiceMatch] mov ebx, [edx + dsLookahead] cmp ebx, eax jl LookaheadLess mov ebx, eaxLookaheadLess: mov [nicematch], ebx;;; register Bytef *scan = s->window + s->strstart; mov esi, [edx + dsWindow] mov [window], esi mov ebp, [edx + dsStrStart] lea edi, [esi + ebp] mov [scan], edi;;; Determine how many bytes the scan ptr is off from being;;; dword-aligned. mov eax, edi neg eax and eax, 3 mov [scanalign], eax;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?;;; s->strstart - (IPos)MAX_DIST(s) : NIL; mov eax, [edx + dsWSize] sub eax, MIN_LOOKAHEAD sub ebp, eax jg LimitPositive xor ebp, ebpLimitPositive:;;; int best_len = s->prev_length; mov eax, [edx + dsPrevLen] mov [bestlen], eax;;; Store the sum of s->window + best_len in esi locally, and in esi. add esi, eax mov [windowbestlen], esi;;; register ush scan_start = *(ushf*)scan;;;; register ush scan_end = *(ushf*)(scan+best_len-1);;;; Posf *prev = s->prev; movzx ebx, word ptr [edi] mov [scanstart], ebx movzx ebx, word ptr [edi + eax - 1] mov [scanend], ebx mov edi, [edx + dsPrev];;; Jump into the main loop. mov edx, [chainlenwmask] jmp short LoopEntryalign 4;;; do {;;; match = s->window + cur_match;;;; if (*(ushf*)(match+best_len-1) != scan_end ||;;; *(ushf*)match != scan_start) continue;;;; [...];;; } while ((cur_match = prev[cur_match & wmask]) > limit;;; && --chain_length != 0);;;;;;; Here is the inner loop of the function. The function will spend the;;; majority of its time in this loop, and majority of that time will;;; be spent in the first ten instructions.;;;;;; Within this loop:;;; ebx = scanend;;; ecx = curmatch;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask);;; esi = windowbestlen - i.e., (window + bestlen);;; edi = prev;;; ebp = limitLookupLoop: and ecx, edx movzx ecx, word ptr [edi + ecx*2] cmp ecx, ebp jbe LeaveNow sub edx, 00010000h js LeaveNowLoopEntry: movzx eax, word ptr [esi + ecx - 1] cmp eax, ebx jnz LookupLoop mov eax, [window] movzx eax, word ptr [eax + ecx] cmp eax, [scanstart] jnz LookupLoop;;; Store the current value of chainlen. mov [chainlenwmask], edx;;; Point edi to the string under scrutiny, and esi to the string we;;; are hoping to match it up with. In actuality, esi and edi are;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is;;; initialized to -(MAX_MATCH_8 - scanalign). mov esi, [window] mov edi, [scan] add esi, ecx mov eax, [scanalign] mov edx, 0fffffef8h; -(MAX_MATCH_8) lea edi, [edi + eax + 0108h] ;MAX_MATCH_8] lea esi, [esi + eax + 0108h] ;MAX_MATCH_8];;; Test the strings for equality, 8 bytes at a time. At the end,;;; adjust edx so that it is offset to the exact byte that mismatched.;;;;;; We already know at this point that the first three bytes of the;;; strings match each other, and they can be safely passed over before;;; starting the compare loop. So what this code does is skip over 0-3;;; bytes, as much as necessary in order to dword-align the edi;;; pointer. (esi will still be misaligned three times out of four.);;;;;; It should be confessed that this loop usually does not represent;;; much of the total running time. Replacing it with a more;;; straightforward "rep cmpsb" would not drastically degrade;;; performance.LoopCmps: mov eax, [esi + edx] xor eax, [edi + edx] jnz LeaveLoopCmps mov eax, [esi + edx + 4] xor eax, [edi + edx + 4] jnz LeaveLoopCmps4 add edx, 8 jnz LoopCmps jmp short LenMaximumLeaveLoopCmps4: add edx, 4LeaveLoopCmps: test eax, 0000FFFFh jnz LenLower add edx, 2 shr eax, 16LenLower: sub al, 1 adc edx, 0;;; Calculate the length of the match. If it is longer than MAX_MATCH,;;; then automatically accept it as the best possible match and leave. lea eax, [edi + edx] mov edi, [scan] sub eax, edi cmp eax, MAX_MATCH jge LenMaximum;;; If the length of the match is not longer than the best match we;;; have so far, then forget it and return to the lookup loop. mov edx, [deflatestate] mov ebx, [bestlen] cmp eax, ebx jg LongerMatch mov esi, [windowbestlen] mov edi, [edx + dsPrev] mov ebx, [scanend] mov edx, [chainlenwmask] jmp LookupLoop;;; s->match_start = cur_match;;;; best_len = len;;;; if (len >= nice_match) break;;;; scan_end = *(ushf*)(scan+best_len-1);LongerMatch: mov ebx, [nicematch] mov [bestlen], eax mov [edx + dsMatchStart], ecx cmp eax, ebx jge LeaveNow mov esi, [window] add esi, eax mov [windowbestlen], esi movzx ebx, word ptr [edi + eax - 1] mov edi, [edx + dsPrev] mov [scanend], ebx mov edx, [chainlenwmask] jmp LookupLoop;;; Accept the current string, with the maximum possible length.LenMaximum: mov edx, [deflatestate] mov dword ptr [bestlen], MAX_MATCH mov [edx + dsMatchStart], ecx;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;;;; return s->lookahead;LeaveNow: mov edx, [deflatestate] mov ebx, [bestlen] mov eax, [edx + dsLookahead] cmp ebx, eax jg LookaheadRet mov eax, ebxLookaheadRet:;;; Restore the stack and return from whence we came. add esp, LocalVarsSize pop ebx pop esi pop edi pop ebp ret; please don't remove this string !; Your can freely use gvmat32 in any free or commercial app if you don't remove the string in the binary! db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ahIFDEF NOUNDERLINElongest_match_686 endpELSE_longest_match_686 endpENDIF_TEXT endsend
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -