⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cdctasm.asm

📁 vc++ mp3 源码下载 使用vc写的mp3 播放器
💻 ASM
字号:
;/*
; * DCT functions ASM version for the XingMPG Decoder
; *
; * Copyright (c) 1999, Jean-Michel HERVE
; *
; * Code : TuO / StG
; * Date : 08/04/99
; *
; * WARNING : only _fdct32_* has been tested. The other ones are made from
; *           this function, but hasn't been tested at all. Should check it.
; * NOTE : I don't think that forward_bf and back_bf macros can be more
; *        optimized (except maybe in changing algo)
; *
; * This program is free software; you can redistribute it and/or modify
; * it under the terms of the GNU General Public License as published by
; * the Free Software Foundation; either version 2 of the License, or
; * (at your option) any later version.
; * 
; * This program is distributed in the hope that it will be useful,
; * but WITHOUT ANY WARRANTY; without even the implied warranty of
; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
; * GNU General Public License for more details.
; * 
; * You should have received a copy of the GNU General Public License
; * along with this program; if not, write to the Free Software
; * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; * 
; * $Id: cdctasm.asm,v 1.3 1999/04/22 08:24:01 mhw Exp $
; */

  BITS 32

SECTION .data USE32

GLOBAL _forward_bf_asm
GLOBAL _back_bf_asm

;/* fdct function exportation */
GLOBAL _fdct32_asm
GLOBAL _fdct32_dual_asm

EXTERN _coef32

ValC    dd 0
Val4N   dd 0
ValN    dd 0
ValN2   dd 0
ValN21  dd 0
ValX    dd 0
ValF    dd 0
ValCoef dd 0
ValP0   dd 0

;/* 32 floats tables */
tab_a   dd 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
tab_b   dd 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0

SECTION .text USE32

;/*
; * forward_bf ASM Version
; */
_forward_bf_asm: ;// PROC m,  n,    x[],  f[],   coef[] :  DWORD
                 ;//      ebp ebp+4 ebp+8 ebp+12 ebp+16
        push   ebp
        lea    ebp,[esp+8]
        pushad

        ;/* Initialization part */
        mov    eax,[ebp+4]
        mov    [ValN],eax
        shr    eax,1
        mov    [ValN2],eax             ;/* n2 is eax */
        mov    esi,[ebp+8]             ;/* x[0] is esi */
        mov    ebx,[ebp+12]            ;/* f[0] is ebx */
        lea    edx,[ebx+eax*4]         ;/* f[n2] is edx */
        mov    edi,[ebp+16]            ;/* coef[] is edi */
        mov    [ValCoef],edi

        mov    dword [ValP0],0

        mov    ecx,[ebp]               ;/* m-controled loop */
      .BigLoop:
        push   ecx

        mov    edi,[ValCoef]           ;/* coef is edi */
        mov    ecx,[ValP0]             ;/* p is ecx */
        mov    ebp,ecx
        add    ebp,[ValN2]
        dec    ebp                     ;/* q-n2 is ebp */
        lea    ebp,[esi+ebp*4]         ;/* x[q-n2] is ebp */
                                       ;/* This way we can use the loop */
                                       ;/* counter to index the table */
                                       ;/* it save 1 dec per loop */
        mov    eax,[ValN2]             ;/* n2-controled loop */
      .SmallLoop:
        fld    dword [esi+ecx*4]       ;/* Load x[p] */
        fld    st0                     ;/* Duplicate x[p] */
        fld    dword [ebp+eax*4]       ;/* Load x[q] */
        fld    st0                     ;/* Duplicate x[q] */
        faddp  st2,st0                 ;/* x[p]+x[q] */
        fxch   st0,st1                 ;/* put value to the top */
        fstp   dword [ebx+ecx*4]       ;/* Store value */
        fsubp  st1,st0                 ;/* x[p]-x[q] */
        fmul   dword [edi]             ;/* coef[k] * (x[p]-x[q]) */
        fstp   dword [edx+ecx*4]

        add    edi,4                   ;/* Update coef[k] */
        inc    ecx                     ;/* Update p */
        dec    eax                     ;/* Update loop counter */
        jnz    .SmallLoop

        mov    eax,[ValN]
        add    [ValP0],eax             ;/* Update p0 */

        pop    ecx
        dec    ecx
        jnz    .BigLoop

        popad
        pop    ebp
        ret

;/*
; * back_bf ASM Version
; */
_back_bf_asm: ;// PROC m,  n,    x[],  f[],   :  DWORD
              ;//      ebp ebp+4 ebp+8 ebp+12
        push   ebp
        lea    ebp,[esp+8]
        pushad

        mov    edi,[ebp+8]             ;/* x[0] is esi */
        mov    esi,[ebp+12]            ;/* f[0] is esi */

        mov    eax,[ebp+4]
        mov    [ValN],eax
        shl    eax,2
        mov    [Val4N],eax
        shr    eax,3
        mov    [ValN2],eax

        lea    edx,[edi+eax*4]

        dec    eax
        mov    [ValN21],eax

        mov    ecx,[ebp]
        mov    ebp,edx
      .BigLoop:

        xor    ebx,ebx
        mov    eax,[ValN2]
      .SmallLoop1:
        mov    edx,[edi+ebx*4]
        mov    [esi+ebx*8],edx         ;/* f[p]=x[q] */
        inc    ebx
        dec    eax
        jnz    .SmallLoop1

        xor    ebx,ebx

        mov    eax,[ValN21]            ;/* n/2 -1 */
      .SmallLoop2:

        fld    dword [ebp+ebx*4]       ;/* x[q] */
        fadd   dword [ebp+ebx*4+4]     ;/* x[q]+x[q+1] */
        fstp   dword [esi+ebx*8+4]     ;/* f[p]=x[q]+x[q+1] */
        inc    ebx
        dec    eax
        jnz    .SmallLoop2

        mov    edx,[ebp+ebx*4]
        mov    [esi+ebx*8+4],edx         ;/* f[p]=x[q] */

        mov    eax,[Val4N]
        add    edi,eax                 ;/* Update p0 (edi) */
        add    ebp,eax                 ;/* Update p0 (edi) */
        add    esi,eax                 ;/* Update p0 (esi) */

        dec    ecx
        jnz    .BigLoop

        popad
        pop    ebp
        ret

;/*
; * forward_bf macro ASM Version
; */
%macro forward_bf_macro 5 ;// PROC m,  n,    x[],  f[],   coef[] :  DWORD
                          ;//      ebp ebp+4 ebp+8 ebp+12 ebp+16
        ;/* Initialization part */
        mov    ebx,%4                  ;/* f[0] is ebx */
        mov    esi,%3                  ;/* x[0] is esi */
        lea    edx,[ebx+((%2)/2)*4]    ;/* f[n2] is edx */

        xor    edi,edi                 ;/* edi is p0 */

        mov    ecx,%1                  ;/* m-controled loop */
      %%BigLoop:
;/* Hiho, unrolled loop */
%assign i 0
%assign j %2-1
%rep (%2)/2
        fld    dword [esi+edi*4+i*4]   ;/* Load x[p] */
        fld    st0                     ;/* Duplicate x[p] */
        fld    dword [esi+edi*4+j*4]   ;/* Load x[q] */
        fadd   st1,st0                 ;/* x[p]+x[q] */
        fxch   st1                     ;/* put value to the top */
        fstp   dword [ebx+edi*4+i*4]   ;/* Store value */
        fsubp  st1,st0                 ;/* x[p]-x[q] */
        fmul   dword [%5+i*4]          ;/* coef[k] * (x[p]-x[q]) */
        fstp   dword [edx+edi*4+i*4]
%assign i i+1
%assign j j-1
%endrep

        add    edi,%2                  ;/* Update p0 */

        dec    ecx
        jnz    near %%BigLoop
%endmacro

;/*
; * back_bf macro ASM Version
; */
%macro back_bf_macro 4 ;// PROC m,  n,    x[],  f[],   :  DWORD
                       ;//      ebp ebp+4 ebp+8 ebp+12
        mov    edi,%3                  ;/* x[0] is esi */
        mov    esi,%4                  ;/* f[0] is esi */
        mov    ebp,%3+((%2/2)*4)       ;/* x[n2] is ebp */
        xor    eax,eax                 ;/* p0 is eax */

;/* Main loop counter initialization (if needed!) */
%if (%1)>1
        mov    ecx,%1                  ;/* Avoid 1 step loop */
      %%BigLoop:
%endif

;/* Unrolled loop */
%assign j 0
%rep (%2)/2
        mov    edx,[edi+eax+j*4]
        mov    [esi+eax+j*8],edx       ;/* f[p]=x[q] */
%assign j j+1
%endrep


;/* Another unrolled loop */
%assign j 0
%rep ((%2)/2)-1
        fld    dword [ebp+eax+j*4]     ;/* x[q] */
        fadd   dword [ebp+eax+j*4+4]   ;/* x[q]+x[q+1] */
        fstp   dword [esi+eax+j*8+4]   ;/* f[p]=x[q]+x[q+1] */
%assign j j+1
%endrep

        mov    edx,[ebp+eax+j*4]
        mov    [esi+eax+j*8+4],edx     ;/* f[p]=x[q] */

        add    eax,%2*4                ;/* Update p0 */

;/* Loop */
%if (%1)>1                             ;/* Avoid 1 step loop */
        dec    ecx
        jnz    near %%BigLoop
%endif
%endmacro

;/* Special case of fdct32 and fdct32_dual functions */
%macro fdct32_specialcase 1
        mov    edi,[ebp]               ;/* x[0] is edi */
;/* Unrolled loop */
%assign p 0
%assign q 31
%rep 16
        fld    dword [edi+p*4*%1]      ;/* Load x[p] */
        fld    st0
        fld    dword [edi+q*4*%1]      ;/* Load x[q] */
        fadd   st1,st0                 ;/* x[p]+x[q] */
        fxch   st1                     ;/* put value to the top */
        fstp   dword [tab_a+p*4]       ;/* Store value */
        fsubp  st1,st0                 ;/* x[p]-x[q] */
        fmul   dword [_coef32+p*4]     ;/* coef32[p] * (x[p]-x[q]) */
        fstp   dword [tab_a+p*4+16*4]
%assign p p+1
%assign q q-1
%endrep
%endmacro

;/*
; * fdct32 ASM Version
; */
_fdct32_asm: ;// PROC x[], c[] :  DWORD
             ;//      ebp  ebp+4
        push   ebp
        lea    ebp,[esp+8]
        pushad

        mov    eax,[ebp+4]
        mov    [ValC],eax

        ;/* Special first stage for single chan */
        fdct32_specialcase 1

        forward_bf_macro 2 , 16, tab_a, tab_b, _coef32 + (16)*4
        forward_bf_macro 4 , 8 , tab_b, tab_a, _coef32 + (16 + 8)*4
        forward_bf_macro 8 , 4 , tab_a, tab_b, _coef32 + (16 + 8 + 4)*4
        forward_bf_macro 16, 2 , tab_b, tab_a, _coef32 + (16 + 8 + 4 + 2)*4
        back_bf_macro    8 , 4 , tab_a, tab_b
        back_bf_macro    4 , 8 , tab_b, tab_a
        back_bf_macro    2 , 16, tab_a, tab_b
        back_bf_macro    1 , 32, tab_b, [ValC]

        popad
        pop    ebp
        ret

;/*
; * fdct32_dual ASM Version
; */
_fdct32_dual_asm: ;// PROC x[], c[] :  DWORD
                  ;//      ebp  ebp+4
        push   ebp
        lea    ebp,[esp+8]
        pushad

        mov    eax,[ebp+4]
        mov    [ValC],eax

        ;/* Special first stage for dual chan (interleaved x) */
        fdct32_specialcase 2

        forward_bf_macro 2 , 16, tab_a, tab_b, _coef32 + (16)*4
        forward_bf_macro 4 , 8 , tab_b, tab_a, _coef32 + (16 + 8)*4
        forward_bf_macro 8 , 4 , tab_a, tab_b, _coef32 + (16 + 8 + 4)*4
        forward_bf_macro 16, 2 , tab_b, tab_a, _coef32 + (16 + 8 + 4 + 2)*4
        back_bf_macro    8 , 4 , tab_a, tab_b
        back_bf_macro    4 , 8 , tab_b, tab_a
        back_bf_macro    2 , 16, tab_a, tab_b
        back_bf_macro    1 , 32, tab_b, [ValC]

        popad
        pop    ebp
        ret

end

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -