⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mdctasm.asm

📁 希望能帮助大家学习C语言.
💻 ASM
📖 第 1 页 / 共 5 页
字号:
;/*
; * DCT functions, Assembler version
; *	MP3 decoding
; *
; * Original Copyright (c) 1999, Jean-Michel HERVE
; * Re-programmed by ULTiMaTuM
; * Copyright (c) 2000
; *
; * Code : TuO / StG
; * Date : 08/04/99
; *
; * WARNING : only _fdct32_* has been tested. The other ones are made from
; *           this function, but hasn't been tested at all. Should check it.
; * NOTE : I don't think that forward_bf and back_bf macros can be more
; *        optimized (except maybe in changing algo)
; */

  BITS 32

SECTION .data USE32

GLOBAL _imdct18_asm
GLOBAL _imdct6_3_asm
GLOBAL _hybrid_asm
GLOBAL _FreqInvert_asm

EXTERN _mdct18w
EXTERN _mdct18w2
EXTERN _mdct6_3v
EXTERN _mdct6_3v2
EXTERN _coef
EXTERN _coef87
EXTERN _win
EXTERN _band_limit_nsb

WINSIZE equ 144

;/* temp table */
tab_a dd 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
tab_b dd 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0

My_FPU_Half dd 0.5

SaveReturn dd 0

SECTION .text USE32

;/*
; * imdct18 ASM Version
; */
_imdct18_asm: ;// PROC f[]
              ;//      ebp
        push   ebp
        lea    ebp,[esp+8]
        pushad

        mov    ebp,[ebp]               ;/* Save arg */
        mov    edi,_mdct18w
        mov    esi,_mdct18w2
        mov    eax,tab_a
        mov    ebx,tab_b
        mov    ecx,_coef
;/* Unroll the loop because it is a known-value count */
%assign p 0                            ;/* Use compiler var as counter */
%rep 4
        ;/* First part - compute ap and bp */
        fld    dword [edi+p*4]         ;/* w[p] */
        fmul   dword [ebp+p*4]         ;/* g1 */
        fld    dword [edi+(17-p)*4]    ;/* w[17-p] g1 */
        fmul   dword [ebp+(17-p)*4]    ;/* g2 g1 */
        fld    st0                     ;/* g2 g2 g1 */
        fadd   st2                     ;/* ap g2 g1 */
        fxch   st1                     ;/* g2 ap g1 */
        fsubp  st2,st0                 ;/* ap g1-g2 */
        fxch   st1                     ;/* g1-g2 ap */
        fmul   dword [esi+p*4]         ;/* bp ap */

        ;/* Second part - compute a8p and b8p */
        fld    dword [edi+(8-p)*4]     ;/* w[8-p] bp ap */
        fmul   dword [ebp+(8-p)*4]     ;/* g1 bp ap */
        fld    dword [edi+(9+p)*4]     ;/* w[9+p] g1 bp ap */
        fmul   dword [ebp+(9+p)*4]     ;/* g2 g1 bp ap */
        fld    st0                     ;/* g2 g2 g1 bp ap */
        fadd   st2                     ;/* a8p g2 g1 bp ap */
        fxch   st1                     ;/* g2 a8p g1 bp ap */
        fsubp  st2,st0                 ;/* a8p g1-g2 bp ap */
        fxch   st1                     ;/* g1-g2 a8p bp ap */
        fmul   dword [esi+(8-p)*4]     ;/* b8p a8p bp ap */

        ;/* Final part - finalize computing and store values */
        fld    st3                     ;/* ap b8p a8p bp ap */
        fadd   st2                     ;/* ap+a8p b8p a8p bp ap */
        fstp   dword [eax+p*4]         ;/* b8p a8p bp ap */
        fxch   st1                     ;/* a8p b8p bp ap */
        fsubp  st3,st0                 ;/* b8p bp ap-a8p */
        fxch   st2                     ;/* ap-a8p bp b8p */
        fstp   dword [eax+(p+5)*4]     ;/* bp b8p */
        fxch   st1                     ;/* b8p bp */
        fld    st0                     ;/* b8p b8p bp */
        fadd   st2                     ;/* b8p+bp b8p bp */
        fstp   dword [ebx+p*4]         ;/* b8p bp */
        fsubp  st1,st0                 ;/* bp-b8p */
        fstp   dword [ebx+(p+5)*4]     ;/* - */
%assign p p+1
%endrep

%assign p 4
        ;/* Last - finalize array */
        fld    dword [edi+p*4]         ;/* w[p] */
        fmul   dword [ebp+p*4]         ;/* g1 */
        fld    dword [edi+(17-p)*4]    ;/* w[17-p] g1 */
        fmul   dword [ebp+(17-p)*4]    ;/* g2 g1 */
        fld    st0                     ;/* g2 g2 g1 */
        fadd   st2                     ;/* ap g2 g1 */
        fstp   dword [eax+p*4]         ;/* g2 g1 */
        fsubp  st1,st0                 ;/* g1-g2 */
        fmul   dword [esi+p*4]         ;/* bp */
        fstp   dword [ebx+p*4]         ;/* - */

;/* Now the huge and boring part */
;//   f[0] = 0.5f * (a[0] + a[1] + a[2] + a[3] + a[4]);
;// TO DO : avoid reload of a[4] and b[4]
        fld    dword [eax+0*4]         ;/* a[0] */
        fadd   dword [eax+1*4]         ;/* a[0]+a[1] */
        fadd   dword [eax+2*4]         ;/* a[0]+a[1]+a[2] */
        fadd   dword [eax+3*4]         ;/* a[0]+a[1]+a[2]+a[3] */
        fadd   dword [eax+4*4]         ;/* a[0]+a[1]+a[2]+a[3]+a[4] */
        fmul   dword [My_FPU_Half]       ;/* 0.5*(a[0]+a[1]+a[2]+a[3]+a[4]) */
        fst    dword [ebp+0*4]         ;/* f[0] */
;//   f[1] = 0.5f * (b[0] + b[1] + b[2] + b[3] + b[4]);
        fld    dword [ebx+0*4]         ;/* b[0] f[0] */
        fadd   dword [ebx+1*4]         ;/* b[0]+b[1] f[0] */
        fadd   dword [ebx+2*4]         ;/* b[0]+b[1]+b[2] f[0] */
        fadd   dword [ebx+3*4]         ;/* b[0]+b[1]+b[2]+b[3] f[0] */
        fadd   dword [ebx+4*4]         ;/* b[0]+b[1]+b[2]+b[3]+b[4] f[0] */
        fmul   dword [My_FPU_Half]       ;/* f[1]' f[0] */
;//   f[2] = coef[1][0]*a[5]+coef[1][1]*a[6]+coef[1][2]*a[7]+coef[1][3]*a[8];
        fld    dword [ecx+1*16+0*4]    ;/* coef[1][0]  f[1]' f[0] */
        fld    dword [ecx+1*16+1*4]    ;/* coef[1][1]  coef[1][0] f[1]' f[0] */
        fld    dword [ecx+1*16+2*4]    ;/* coef[1][2]  coef[1][1] coef[1][0] f[1]' f[0] */
        fld    dword [ecx+1*16+3*4]    ;/* coef[1][3]  coef[1][2] coef[1][1] coef[1][0] f[1]' f[0] */
        fld    st3                     ;/* coef[1][0]  coef[1][3] coef[1][2] coef[1][1] coef[1][0] f[1]' f[0] */
        fmul   dword [eax+5*4]         ;/* v0          coef[1][3] coef[1][2] coef[1][1] coef[1][0] f[1]' f[0] */
        fld    st3                     ;/* coef[1][1]  v0         coef[1][3] coef[1][2] coef[1][1] coef[1][0] f[1]' f[0] */
        fmul   dword [eax+6*4]         ;/* v1          v0         coef[1][3] coef[1][2] coef[1][1] coef[1][0] f[1]' f[0] */
        faddp  st1,st0                 ;/* v1+v0       coef[1][3] coef[1][2] coef[1][1] coef[1][0] f[1]' f[0] */
        fld    st2                     ;/* coef[1][2]  v1+v0      coef[1][3] coef[1][2] coef[1][1] coef[1][0] f[1]' f[0] */
        fmul   dword [eax+7*4]         ;/* v2 v1+v0    coef[1][3] coef[1][2] coef[1][1] coef[1][0] f[1]' f[0] */
        faddp  st1,st0                 ;/* v2+v1+v0    coef[1][3] coef[1][2] coef[1][1] coef[1][0] f[1]' f[0] */
        fld    st1                     ;/* coef[1][3]  v2+v1+v0   coef[1][3] coef[1][2] coef[1][1] coef[1][0] f[1]' f[0] */
        fmul   dword [eax+8*4]         ;/* v3 v2+v1+v0 coef[1][3] coef[1][2] coef[1][1] coef[1][0] f[1]' f[0] */
        faddp  st1,st0                 ;/* f[2]'       coef[1][3] coef[1][2] coef[1][1] coef[1][0] f[1]' f[0] */
;//   f[3] = coef[1][0]*b[5]+coef[1][1]*b[6]+coef[1][2]*b[7]+coef[1][3]*b[8]-f[1];
        fxch   st4                     ;/* coef[1][0]  coef[1][3] coef[1][2] coef[1][1] f[2]'      f[1]' f[0] */
        fmul   dword [ebx+5*4]         ;/* v0          coef[1][3] coef[1][2] coef[1][1] f[2]'      f[1]' f[0] */
        fxch   st3                     ;/* coef[1][1]  coef[1][3] coef[1][2] v0         f[2]'      f[1]' f[0] */
        fmul   dword [ebx+6*4]         ;/* v1          coef[1][3] coef[1][2] v0         f[2]'      f[1]' f[0] */
        faddp  st3,st0                 ;/* coef[1][3]  coef[1][2] v0+v1      f[2]'      f[1]' f[0] */
        fxch   st1                     ;/* coef[1][2]  coef[1][3] v0+v1      f[2]'      f[1]' f[0] */
        fmul   dword [ebx+7*4]         ;/* v2          coef[1][3] v0+v1      f[2]'      f[1]' f[0] */
        faddp  st2,st0                 ;/* coef[1][3]  v0+v1+v2   f[2]'      f[1]' f[0] */
        fmul   dword [ebx+8*4]         ;/* v3          v0+v1+v2   f[2]'      f[1]' f[0] */
        faddp  st1,st0                 ;/* v0+v1+v2+v3 f[2]'      f[1]' f[0] */
        fsub   st0,st2                 ;/* f[3]'       f[2]'      f[1]' f[0] */
;//   f[1] = f[1] - f[0];
        fxch   st3                     ;/* f[0]        f[2]'      f[1]' f[3]' */
        fsubp  st2,st0                 ;/* f[2]'       f[1]       f[3]' */
        fxch   st1                     ;/* f[1]        f[2]'      f[3]' */
        fst    dword [ebp+1*4]         ;/* f[1]        f[2]'      f[3]' */
;//   f[2] = f[2] - f[1];
        fsubp  st1,st0                 ;/* f[2]        f[3]' */
        fst    dword [ebp+2*4]         ;/* f[2]        f[3]' */
        fxch   st1                     ;/* f[3]'       f[2] */
;//   f[4]=coef[2][0]*a[0]+coef[2][1]*a[1]+coef[2][2]*a[2]+coef[2][3]*a[3]-a[4];
        fld    dword [ecx+2*16+0*4]    ;/* c[2,0] f[3]' f[2] */
        fld    dword [ecx+2*16+1*4]    ;/* c[2,1] c[2,0] f[3]' f[2] */
        fld    dword [ecx+2*16+2*4]    ;/* c[2,2] c[2,1] c[2,0] f[3]' f[2] */
        fld    dword [ecx+2*16+3*4]    ;/* c[2,3] c[2,2] c[2,1] c[2,0] f[3]' f[2] */
        fld    st3                     ;/* c[2,0] c[2,3] c[2,2] c[2,1] c[2,0] f[3]' f[2] */
        fmul   dword [eax+0*4]         ;/* v0     c[2,3] c[2,2] c[2,1] c[2,0] f[3]' f[2] */
        fld    st3                     ;/* c[2,1] v0     c[2,3] c[2,2] c[2,1] c[2,0] f[3]' f[2] */
        fmul   dword [eax+1*4]         ;/* v1     v0     c[2,3] c[2,2] c[2,1] c[2,0] f[3]' f[2] */
        faddp  st1,st0                 ;/* v1+v0  c[2,3] c[2,2] c[2,1] c[2,0] f[3]' f[2] */
        fld    st2                     ;/* c[2,2] v1+v0  c[2,3] c[2,2] c[2,1] c[2,0] f[3]' f[2] */
        fmul   dword [eax+2*4]         ;/* v2     v1+v0  c[2,3] c[2,2] c[2,1] c[2,0] f[3]' f[2] */
        faddp  st1,st0                 ;/* v0v1v2 c[2,3] c[2,2] c[2,1] c[2,0] f[3]' f[2] */
        fld    st1                     ;/* c[2,3] v0v1v2 c[2,3] c[2,2] c[2,1] c[2,0] f[3]' f[2] */
        fmul   dword [eax+3*4]         ;/* v3     v0v1v2 c[2,3] c[2,2] c[2,1] c[2,0] f[3]' f[2] */
        faddp  st1,st0                 ;/* f[4]'  c[2,3] c[2,2] c[2,1] c[2,0] f[3]' f[2] */
        fsub   dword [eax+4*4]         ;/* f[4]'  c[2,3] c[2,2] c[2,1] c[2,0] f[3]' f[2] */
        fxch   st4                     ;/* c[2,0] c[2,3] c[2,2] c[2,1] f[4]'  f[3]' f[2] */
;//   f[5]=coef[2][0]*b[0]+coef[2][1]*b[1]+coef[2][2]*b[2]+coef[2][3]*b[3]-b[4]-f[3];
        fmul   dword [ebx+0*4]         ;/* v0     c[2,3] c[2,2] c[2,1] f[4]'  f[3]' f[2] */
        fxch   st3                     ;/* c[2,1] c[2,3] c[2,2] v0     f[4]'  f[3]' f[2] */
        fmul   dword [ebx+1*4]         ;/* v1     c[2,3] c[2,2] v0     f[4]'  f[3]' f[2] */
        faddp  st3,st0                 ;/* c[2,3] c[2,2] v0+v1  f[4]'  f[3]' f[2] */
        fxch   st1                     ;/* c[2,2] c[2,3] v0+v1  f[4]'  f[3]' f[2] */
        fmul   dword [ebx+2*4]         ;/* v2     c[2,3] v0+v1  f[4]'  f[3]' f[2] */
        faddp  st2,st0                 ;/* c[2,3] v0v1v2 f[4]'  f[3]' f[2] */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -