⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gvmat32.asm

📁 gcc的组建
💻 ASM
📖 第 1 页 / 共 2 页
字号:
; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86
; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant.
; File written by Gilles Vollant, by modifiying the longest_match
;  from Jean-loup Gailly in deflate.c
;
;         http://www.zlib.net
;         http://www.winimage.com/zLibDll
;         http://www.muppetlabs.com/~breadbox/software/assembly.html
;
; For Visual C++ 4.x and higher and ML 6.x and higher
;   ml.exe is in directory \MASM611C of Win95 DDK
;   ml.exe is also distributed in http://www.masm32.com/masmdl.htm
;    and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
;
; this file contain two implementation of longest_match
;
;  longest_match_7fff : written 1996 by Gilles Vollant optimized for 
;            first Pentium. Assume s->w_mask == 0x7fff
;  longest_match_686 : written by Brian raiter (1998), optimized for Pentium Pro
;
;  for using an seembly version of longest_match, you need define ASMV in project
;  There is two way in using gvmat32.asm
;
;  A) Suggested method
;    if you want include both longest_match_7fff and longest_match_686
;    compile the asm file running
;           ml /coff /Zi /Flgvmat32.lst /c gvmat32.asm
;    and include gvmat32c.c in your project
;    if you have an old cpu (386,486 or first Pentium) and s->w_mask==0x7fff,
;        longest_match_7fff will be used
;    if you have a more modern CPU (Pentium Pro, II and higher)
;        longest_match_686 will be used
;    on old cpu with s->w_mask!=0x7fff, longest_match_686 will be used,
;        but this is not a sitation you'll find often
;
;  B) Alternative
;    if you are not interresed in old cpu performance and want the smaller
;       binaries possible
;
;    compile the asm file running
;           ml /coff /Zi /c /Flgvmat32.lst /DNOOLDPENTIUMCODE gvmat32.asm
;    and do not include gvmat32c.c in your project (ou define also 
;              NOOLDPENTIUMCODE)
;
; note : as I known, longest_match_686 is very faster than longest_match_7fff
;        on pentium Pro/II/III, faster (but less) in P4, but it seem
;        longest_match_7fff can be faster (very very litte) on AMD Athlon64/K8
;
; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2

;uInt longest_match_7fff(s, cur_match)
;    deflate_state *s;
;    IPos cur_match;                             /* current match */

    NbStack         equ     76
    cur_match       equ     dword ptr[esp+NbStack-0]
    str_s           equ     dword ptr[esp+NbStack-4]
; 5 dword on top (ret,ebp,esi,edi,ebx)
    adrret          equ     dword ptr[esp+NbStack-8]
    pushebp         equ     dword ptr[esp+NbStack-12]
    pushedi         equ     dword ptr[esp+NbStack-16]
    pushesi         equ     dword ptr[esp+NbStack-20]
    pushebx         equ     dword ptr[esp+NbStack-24]

    chain_length    equ     dword ptr [esp+NbStack-28]
    limit           equ     dword ptr [esp+NbStack-32]
    best_len        equ     dword ptr [esp+NbStack-36]
    window          equ     dword ptr [esp+NbStack-40]
    prev            equ     dword ptr [esp+NbStack-44]
    scan_start      equ      word ptr [esp+NbStack-48]
    wmask           equ     dword ptr [esp+NbStack-52]
    match_start_ptr equ     dword ptr [esp+NbStack-56]
    nice_match      equ     dword ptr [esp+NbStack-60]
    scan            equ     dword ptr [esp+NbStack-64]

    windowlen       equ     dword ptr [esp+NbStack-68]
    match_start     equ     dword ptr [esp+NbStack-72]
    strend          equ     dword ptr [esp+NbStack-76]
    NbStackAdd      equ     (NbStack-24)

    .386p

    name    gvmatch
    .MODEL  FLAT



;  all the +zlib1222add offsets are due to the addition of fields
;  in zlib in the deflate_state structure since the asm code was first written
;  (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
;  (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
;  if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").

    zlib1222add         equ     8

;  Note : these value are good with a 8 bytes boundary pack structure
    dep_chain_length    equ     74h+zlib1222add
    dep_window          equ     30h+zlib1222add
    dep_strstart        equ     64h+zlib1222add
    dep_prev_length     equ     70h+zlib1222add
    dep_nice_match      equ     88h+zlib1222add
    dep_w_size          equ     24h+zlib1222add
    dep_prev            equ     38h+zlib1222add
    dep_w_mask          equ     2ch+zlib1222add
    dep_good_match      equ     84h+zlib1222add
    dep_match_start     equ     68h+zlib1222add
    dep_lookahead       equ     6ch+zlib1222add


_TEXT                   segment

IFDEF NOUNDERLINE
   IFDEF NOOLDPENTIUMCODE
            public  longest_match
            public  match_init
   ELSE            
            public  longest_match_7fff
            public  cpudetect32
            public  longest_match_686
   ENDIF
ELSE
   IFDEF NOOLDPENTIUMCODE
            public  _longest_match
            public  _match_init
   ELSE
            public  _longest_match_7fff
            public  _cpudetect32
            public  _longest_match_686
   ENDIF
ENDIF

    MAX_MATCH           equ     258
    MIN_MATCH           equ     3
    MIN_LOOKAHEAD       equ     (MAX_MATCH+MIN_MATCH+1)



IFNDEF NOOLDPENTIUMCODE
IFDEF NOUNDERLINE
longest_match_7fff   proc near
ELSE
_longest_match_7fff  proc near
ENDIF

    mov     edx,[esp+4]



    push    ebp
    push    edi
    push    esi
    push    ebx

    sub     esp,NbStackAdd

; initialize or check the variables used in match.asm.
    mov     ebp,edx

; chain_length = s->max_chain_length
; if (prev_length>=good_match) chain_length >>= 2
    mov     edx,[ebp+dep_chain_length]
    mov     ebx,[ebp+dep_prev_length]
    cmp     [ebp+dep_good_match],ebx
    ja      noshr
    shr     edx,2
noshr:
; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop
    inc     edx
    mov     edi,[ebp+dep_nice_match]
    mov     chain_length,edx
    mov     eax,[ebp+dep_lookahead]
    cmp     eax,edi
; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
    jae     nolookaheadnicematch
    mov     edi,eax
nolookaheadnicematch:
; best_len = s->prev_length
    mov     best_len,ebx

; window = s->window
    mov     esi,[ebp+dep_window]
    mov     ecx,[ebp+dep_strstart]
    mov     window,esi

    mov     nice_match,edi
; scan = window + strstart
    add     esi,ecx
    mov     scan,esi
; dx = *window
    mov     dx,word ptr [esi]
; bx = *(window+best_len-1)
    mov     bx,word ptr [esi+ebx-1]
    add     esi,MAX_MATCH-1
; scan_start = *scan
    mov     scan_start,dx
; strend = scan + MAX_MATCH-1
    mov     strend,esi
; bx = scan_end = *(window+best_len-1)

;    IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
;        s->strstart - (IPos)MAX_DIST(s) : NIL;

    mov     esi,[ebp+dep_w_size]
    sub     esi,MIN_LOOKAHEAD
; here esi = MAX_DIST(s)
    sub     ecx,esi
    ja      nodist
    xor     ecx,ecx
nodist:
    mov     limit,ecx

; prev = s->prev
    mov     edx,[ebp+dep_prev]
    mov     prev,edx

;
    mov     edx,dword ptr [ebp+dep_match_start]
    mov     bp,scan_start
    mov     eax,cur_match
    mov     match_start,edx

    mov     edx,window
    mov     edi,edx
    add     edi,best_len
    mov     esi,prev
    dec     edi
; windowlen = window + best_len -1
    mov     windowlen,edi

    jmp     beginloop2
    align   4

; here, in the loop
;       eax = ax = cur_match
;       ecx = limit
;        bx = scan_end
;        bp = scan_start
;       edi = windowlen (window + best_len -1)
;       esi = prev


;// here; chain_length <=16
normalbeg0add16:
    add     chain_length,16
    jz      exitloop
normalbeg0:
    cmp     word ptr[edi+eax],bx
    je      normalbeg2noroll
rcontlabnoroll:
; cur_match = prev[cur_match & wmask]
    and     eax,7fffh
    mov     ax,word ptr[esi+eax*2]
; if cur_match > limit, go to exitloop
    cmp     ecx,eax
    jnb     exitloop
; if --chain_length != 0, go to exitloop
    dec     chain_length
    jnz     normalbeg0
    jmp     exitloop

normalbeg2noroll:
; if (scan_start==*(cur_match+window)) goto normalbeg2
    cmp     bp,word ptr[edx+eax]
    jne     rcontlabnoroll
    jmp     normalbeg2

contloop3:
    mov     edi,windowlen

; cur_match = prev[cur_match & wmask]
    and     eax,7fffh
    mov     ax,word ptr[esi+eax*2]
; if cur_match > limit, go to exitloop
    cmp     ecx,eax
jnbexitloopshort1:
    jnb     exitloop
; if --chain_length != 0, go to exitloop


; begin the main loop
beginloop2:
    sub     chain_length,16+1
; if chain_length <=16, don't use the unrolled loop
    jna     normalbeg0add16

do16:
    cmp     word ptr[edi+eax],bx
    je      normalbeg2dc0

maccn   MACRO   lab
    and     eax,7fffh
    mov     ax,word ptr[esi+eax*2]
    cmp     ecx,eax
    jnb     exitloop
    cmp     word ptr[edi+eax],bx
    je      lab
    ENDM

rcontloop0:
    maccn   normalbeg2dc1

rcontloop1:
    maccn   normalbeg2dc2

rcontloop2:
    maccn   normalbeg2dc3

rcontloop3:
    maccn   normalbeg2dc4

rcontloop4:
    maccn   normalbeg2dc5

rcontloop5:
    maccn   normalbeg2dc6

rcontloop6:
    maccn   normalbeg2dc7

rcontloop7:
    maccn   normalbeg2dc8

rcontloop8:
    maccn   normalbeg2dc9

rcontloop9:
    maccn   normalbeg2dc10

rcontloop10:
    maccn   short normalbeg2dc11

rcontloop11:
    maccn   short normalbeg2dc12

rcontloop12:
    maccn   short normalbeg2dc13

rcontloop13:
    maccn   short normalbeg2dc14

rcontloop14:
    maccn   short normalbeg2dc15

rcontloop15:
    and     eax,7fffh
    mov     ax,word ptr[esi+eax*2]
    cmp     ecx,eax
    jnb     exitloop

    sub     chain_length,16
    ja      do16
    jmp     normalbeg0add16

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

normbeg MACRO   rcontlab,valsub
; if we are here, we know that *(match+best_len-1) == scan_end
    cmp     bp,word ptr[edx+eax]
; if (match != scan_start) goto rcontlab
    jne     rcontlab
; calculate the good chain_length, and we'll compare scan and match string
    add     chain_length,16-valsub
    jmp     iseq
    ENDM


normalbeg2dc11:
    normbeg rcontloop11,11

normalbeg2dc12:
    normbeg short rcontloop12,12

normalbeg2dc13:
    normbeg short rcontloop13,13

normalbeg2dc14:
    normbeg short rcontloop14,14

normalbeg2dc15:
    normbeg short rcontloop15,15

normalbeg2dc10:
    normbeg rcontloop10,10

normalbeg2dc9:
    normbeg rcontloop9,9

normalbeg2dc8:
    normbeg rcontloop8,8

normalbeg2dc7:
    normbeg rcontloop7,7

normalbeg2dc6:
    normbeg rcontloop6,6

normalbeg2dc5:
    normbeg rcontloop5,5

normalbeg2dc4:
    normbeg rcontloop4,4

normalbeg2dc3:
    normbeg rcontloop3,3

normalbeg2dc2:
    normbeg rcontloop2,2

normalbeg2dc1:
    normbeg rcontloop1,1

normalbeg2dc0:
    normbeg rcontloop0,0


; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end

normalbeg2:
    mov     edi,window

    cmp     bp,word ptr[edi+eax]
    jne     contloop3                   ; if *(ushf*)match != scan_start, continue

iseq:
; if we are here, we know that *(match+best_len-1) == scan_end
; and (match == scan_start)

    mov     edi,edx
    mov     esi,scan                    ; esi = scan
    add     edi,eax                     ; edi = window + cur_match = match

    mov     edx,[esi+3]                 ; compare manually dword at match+3
    xor     edx,[edi+3]                 ; and scan +3

    jz      begincompare                ; if equal, go to long compare

; we will determine the unmatch byte and calculate len (in esi)
    or      dl,dl
    je      eq1rr
    mov     esi,3
    jmp     trfinval
eq1rr:
    or      dx,dx
    je      eq1

    mov     esi,4
    jmp     trfinval
eq1:
    and     edx,0ffffffh
    jz      eq11
    mov     esi,5
    jmp     trfinval
eq11:
    mov     esi,6
    jmp     trfinval

begincompare:
    ; here we now scan and match begin same
    add     edi,6
    add     esi,6
    mov     ecx,(MAX_MATCH-(2+4))/4     ; scan for at most MAX_MATCH bytes
    repe    cmpsd                       ; loop until mismatch

    je      trfin                       ; go to trfin if not unmatch
; we determine the unmatch byte
    sub     esi,4
    mov     edx,[edi-4]
    xor     edx,[esi]

    or      dl,dl
    jnz     trfin
    inc     esi

    or      dx,dx
    jnz     trfin
    inc     esi

    and     edx,0ffffffh
    jnz     trfin
    inc     esi

trfin:
    sub     esi,scan          ; esi = len
trfinval:
; here we have finised compare, and esi contain len of equal string
    cmp     esi,best_len        ; if len > best_len, go newbestlen
    ja      short newbestlen

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -