📄 mdctasm.asm
字号:
faddp st1,st0 ;/* f[8]' c[8,3] c[8,2] c[8,1] c[8,0] f[15]' f[14] */
fadd dword [eax+4*4] ;/* f[8]' c[8,3] c[8,2] c[8,1] c[8,0] f[15]' f[14] */ ;// CHECK HERE
fxch st4 ;/* c[8,0] c[8,3] c[8,2] c[8,1] f[16]' f[15]' f[14] */
;// f[17] = coef[8][0]*b[0]+coef[8][1]*b[1]+coef[8][2]*b[2]+coef[8][3]*b[3]+b[4]-f[15];
fmul dword [ebx+0*4] ;/* v0 c[8,3] c[8,2] c[8,1] f[16]' f[15]' f[14] */
fxch st3 ;/* c[8,1] c[8,3] c[8,2] v0 f[16]' f[15]' f[14] */
fmul dword [ebx+1*4] ;/* v1 c[8,3] c[8,2] v0 f[16]' f[15]' f[14] */
faddp st3,st0 ;/* c[8,3] c[8,2] v0+v1 f[16]' f[15]' f[14] */
fxch st1 ;/* c[8,2] c[8,3] v0+v1 f[16]' f[15]' f[14] */
fmul dword [ebx+2*4] ;/* v2 c[8,3] v0+v1 f[16]' f[15]' f[14] */
faddp st2,st0 ;/* c[8,3] v0v1v2 f[16]' f[15]' f[14] */
fmul dword [ebx+3*4] ;/* v3 v0v1v2 f[16]' f[15]' f[14] */
faddp st1,st0 ;/* f[17]'' f[16]' f[15]' f[14] */
fadd dword [ebx+4*4] ;/* f[17]'' f[16]' f[15]' f[14] */ ;// CHECK HERE
fsub st0,st2 ;/* f[17]' f[16]' f[15]' f[14] */
;// f[15] = f[15] - f[14];
fxch st3 ;/* f[14] f[16]' f[15]' f[17]' */
fsubp st2,st0 ;/* f[16]' f[15] f[17]' */
fxch st1 ;/* f[15] f[16]' f[17]' */
fst dword [ebp+15*4] ;/* f[15] f[16]' f[17]' */
;// f[16] = f[16] - f[15];
fsubp st1,st0 ;/* f[16] f[17]' */
fst dword [ebp+16*4] ;/* f[16] f[17]' */
;// f[17] = f[17] - f[16];
fsubp st1,st0 ;/* f[17] */
fstp dword [ebp+17*4] ;/* - */
popad
pop ebp
ret
;/*
; * imdct6_3 ASM Version
; */
_imdct6_3_asm: ;// PROC f[]
;// ebp
push ebp
lea ebp,[esp+8]
pushad
mov ebp,[ebp] ;/* Save arg */
mov edi,_mdct6_3v
mov esi,_mdct6_3v2
mov eax,tab_a
;/* Unroll the loop because it is a known-value count */
%assign p 0
%rep 3
;// g1 = v[0] * f[3 * 0];
fld dword [ebp+(p+(3*0))*4] ;/* f[3*0] */
fmul dword [edi+0*4] ;/* g1 */
;// g2 = v[5] * f[3 * 5];
fld dword [ebp+(p+(3*5))*4] ;/* f[3*5] g1 */
fmul dword [edi+5*4] ;/* g2 g1 */
;// a[0] = g1 + g2;
fld st0 ;/* g2 g2 g1 */
fadd st2 ;/* a[0] g2 g1 */
fstp dword [eax+(p*6+0)*4] ;/* g2 g1 */
;// a[3 + 0] = v2[0] * (g1 - g2);
fsubp st1,st0 ;/* g1-g2 */
fmul dword [esi+0*4] ;/* a[3+0] */
fstp dword [eax+(p*6+3)*4] ;/* - */
;// g1 = v[1] * f[3 * 1];
fld dword [ebp+(p+(3*1))*4] ;/* f[3*1] */
fmul dword [edi+1*4] ;/* g1 */
;// g2 = v[4] * f[3 * 4];
fld dword [ebp+(p+(3*4))*4] ;/* f[3*4] g1 */
fmul dword [edi+4*4] ;/* g2 g1 */
;// a[1] = g1 + g2;
fld st0 ;/* g2 g2 g1 */
fadd st2 ;/* a[1] g2 g1 */
fstp dword [eax+(p*6+1)*4] ;/* g2 g1 */
;// a[3 + 1] = v2[1] * (g1 - g2);
fsubp st1,st0 ;/* g1-g2 */
fmul dword [esi+1*4] ;/* a[3+1] */
fstp dword [eax+(p*6+3+1)*4] ;/* - */
;// g1 = v[2] * f[3 * 2];
fld dword [ebp+(p+(3*2))*4] ;/* f[3*2] */
fmul dword [edi+2*4] ;/* g1 */
;// g2 = v[3] * f[3 * 3];
fld dword [ebp+(p+(3*3))*4] ;/* f[3*3] g1 */
fmul dword [edi+3*4] ;/* g2 g1 */
;// a[2] = g1 + g2;
fld st0 ;/* g2 g2 g1 */
fadd st2 ;/* a[2] g2 g1 */
fstp dword [eax+(p*6+2)*4] ;/* g2 g1 */
;// a[3 + 2] = v2[2] * (g1 - g2);
fsubp st1,st0 ;/* g1-g2 */
fmul dword [esi+2*4] ;/* a[3+2] */
fstp dword [eax+(p*6+3+2)*4] ;/* - */
%assign p p+1
%endrep
;/* Unroll the loop because it is a known-value count */
%assign p 0
%rep 3
;// a02 = (a[0] + a[2]);
fld dword [eax+(p*6+0)*4] ;/* a[0] */
fadd dword [eax+(p*6+2)*4] ;/* a02 */
;// b02 = (a[3 + 0] + a[3 + 2]);
fld dword [eax+(p*6+3+0)*4] ;/* a[3+0] a02 */
fadd dword [eax+(p*6+3+2)*4] ;/* b02 a02 */
;// c[0] = a02 + a[1];
fld st1 ;/* a02 b02 a02 */
fadd dword [eax+(p*6+1)*4] ;/* c[0] b02 a02 */
fst dword [ebp+(p*6+0)*4] ;/* c[0] b02 a02 */
;// c[1] = b02 + a[3 + 1];
fld st1 ;/* b02 c[0] b02 a02 */
fadd dword [eax+(p*6+3+1)*4] ;/* c[1]' c[0] b02 a02 */
;// c[2] = coef87 * (a[0] - a[2]);
fld dword [eax+(p*6+0)*4] ;/* a[0] c[1]' c[0] b02 a02 */
fsub dword [eax+(p*6+2)*4] ;/* a[0]-a[2] c[1]' c[0] b02 a02 */
fmul dword [_coef87] ;/* c[2]' c[1]' c[0] b02 a02 */
;// c[3] = coef87 * (a[3 + 0] - a[3 + 2]) - c[1];
fld dword [eax+(p*6+3)*4] ;/* a[3] c[2]' c[1]' c[0] b02 a02 */
fsub dword [eax+(p*6+3+2)*4] ;/* a[3]-a[5] c[2]' c[1]' c[0] b02 a02 */
fmul dword [_coef87] ;/* c[3]'' c[2]' c[1]' c[0] b02 a02 */
fsub st2 ;/* c[3]' c[2]' c[1]' c[0] b02 a02 */
;// c[1] = c[1] - c[0];
fxch st3 ;/* c[0] c[2]' c[1]' c[3]' b02 a02 */
fsubp st2,st0 ;/* c[2]' c[1] c[3]' b02 a02 */
fxch st1 ;/* c[1] c[2]' c[3]' b02 a02 */
fst dword [ebp+(p*6+1)*4] ;/* c[1] c[2]' c[3]' b02 a02 */
;// c[2] = c[2] - c[1];
fsubp st1,st0 ;/* c[2] c[3]' b02 a02 */
fst dword [ebp+(p*6+2)*4] ;/* c[2] c[3]' b02 a02 */
;// c[4] = a02 - a[1] - a[1];
fxch st3 ;/* a02 c[3]' b02 c[2] */
fsub dword [eax+(p*6+1)*4] ;/* a02-a[1] c[3]' b02 c[2] */
fsub dword [eax+(p*6+1)*4] ;/* c[4]' c[3]' b02 c[2] */
;// c[5] = b02 - a[3 + 1] - a[3 + 1] - c[3];
fxch st2 ;/* b02 c[3]' c[4]' c[2] */
fsub dword [eax+(p*6+3+1)*4] ;/* b02-a[3+1] c[3]' c[4]' c[2] */
fsub dword [eax+(p*6+3+1)*4] ;/* c[5]'' c[3]' c[4]' c[2] */
fsub st0,st1 ;/* c[5]' c[3]' c[4]' c[2] */
;// c[3] = c[3] - c[2];
fxch st3 ;/* c[2] c[3]' c[4]' c[5]' */
fsubp st1,st0 ;/* c[3] c[4]' c[5]' */
fst dword [ebp+(p*6+3)*4] ;/* c[3] c[4]' c[5]' */
;// c[4] = c[4] - c[3];
fsubp st1,st0 ;/* c[4] c[5]' */
fst dword [ebp+(p*6+4)*4] ;/* c[4] c[5]' */
;// c[5] = c[5] - c[4];
fsubp st1,st0 ;/* c[5] */
fstp dword [ebp+(p*6+5)*4] ;/* - */
%assign p p+1
%endrep
popad
pop ebp
ret
;/*
; * imdct18 ASM MACRO Version
; */
%macro imdct18 1
push ebp
mov ebp,%1 ;/* Save arg */
;/* Unroll the loop because it is a known-value count */
%assign p 0 ;/* Use compiler var as counter */
%rep 4
;/* First part - compute ap and bp */
fld dword [_mdct18w+p*4] ;/* w[p] */
fmul dword [ebp+p*4] ;/* g1 */
fld dword [_mdct18w+(17-p)*4] ;/* w[17-p] g1 */
fmul dword [ebp+(17-p)*4] ;/* g2 g1 */
fld st0 ;/* g2 g2 g1 */
fadd st2 ;/* ap g2 g1 */
fxch st1 ;/* g2 ap g1 */
fsubp st2,st0 ;/* ap g1-g2 */
fxch st1 ;/* g1-g2 ap */
fmul dword [_mdct18w2+p*4] ;/* bp ap */
;/* Second part - compute a8p and b8p */
fld dword [_mdct18w+(8-p)*4];/* w[8-p] bp ap */
fmul dword [ebp+(8-p)*4] ;/* g1 bp ap */
fld dword [_mdct18w+(9+p)*4];/* w[9+p] g1 bp ap */
fmul dword [ebp+(9+p)*4] ;/* g2 g1 bp ap */
fld st0 ;/* g2 g2 g1 bp ap */
fadd st2 ;/* a8p g2 g1 bp ap */
fxch st1 ;/* g2 a8p g1 bp ap */
fsubp st2,st0 ;/* a8p g1-g2 bp ap */
fxch st1 ;/* g1-g2 a8p bp ap */
fmul dword [_mdct18w2+(8-p)*4] ;/* b8p a8p bp ap */
;/* Final part - finalize computing and store values */
fld st3 ;/* ap b8p a8p bp ap */
fadd st2 ;/* ap+a8p b8p a8p bp ap */
fstp dword [tab_a+p*4] ;/* b8p a8p bp ap */
fxch st1 ;/* a8p b8p bp ap */
fsubp st3,st0 ;/* b8p bp ap-a8p */
fxch st2 ;/* ap-a8p bp b8p */
fstp dword [tab_a+(p+5)*4] ;/* bp b8p */
fxch st1 ;/* b8p bp */
fld st0 ;/* b8p b8p bp */
fadd st2 ;/* b8p+bp b8p bp */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -