⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fft3dn.nas

📁 this the best mp3 encoder
💻 NAS
字号:
; from a new GOGO-no-coda (1999/09);	Copyright (C) 1999 shigeo;	special thanks to Keiichi SAKAI, URURI; hacked and back-ported to LAME;	 by Takehiro TOMINAGA Nov 2000%include "nasm.h"	globaldef fht_3DN	segment_data	align	16costab	dd	0x80000000, 0	dd	1.414213562,1.414213562	dd	9.238795283293805e-01, 9.238795283293805e-01	dd	3.826834424611044e-01, 3.826834424611044e-01	dd	9.951847264044178e-01, 9.951847264044178e-01	dd	9.801714304836734e-02, 9.801714304836734e-02	dd	9.996988186794428e-01, 9.996988186794428e-01	dd	2.454122920569705e-02, 2.454122920569705e-02	dd	9.999811752815535e-01, 9.999811752815535e-01	dd	6.135884819898878e-03, 6.135884819898878e-03D_1_0_0_0	dd	0.0		, 1.0	segment_codePIC_OFFSETTABLE;void fht_3DN(float *fz, int nn);proc	fht_3DN	pushd	ebp, ebx, esi, edi	sub	esp, 20	call	get_pc.bp	add	ebp, PIC_BASE()	mov	r0, [esp+40]		;fi	mov	r1, [esp+44]		;r1 = nn	lea	r3, [PIC_EBP_REL(costab)]		;tri = costab	lea	r4, [r0+r1*8]		;r4 = fn = &fz[n]	mov	[esp+16], r4	mov	r4, 8			;kx = k1/2	pmov	mm7, [r3]	loopalign 16.do1	lea	r3, [r3+16]	;tri += 2;	pmov	mm6, [PIC_EBP_REL(costab+8)]	lea	r2, [r4+r4*2]		;k3*fsize/2	mov	r5, 4		;i = 1*fsize	loopalign 16.do2:	lea	r1, [r0+r4]		;gi = fi + kx	;f	pmov	mm0, [r0]	;fi0	pmov	mm1, [r0+r4*2]	;fi1	pmov	mm2, [r0+r2*2]	;fi3	pmov	mm3, [r0+r4*4]	;fi2	pupldq	mm0, mm0	;fi0 | fi0	pupldq	mm1, mm1	;fi1 | fi1	pupldq	mm2, mm2	;fi2 | fi2	pupldq	mm3, mm3	;fi3 | fi3	pxor	mm1, mm7	;fi1 | -fi1	pxor	mm3, mm7	;fi3 | -fi3	pfsub	mm0, mm1	;f1 | f0	pfsub	mm2, mm3	;f3 | f2	pmov	mm4, mm0	pfadd	mm0, mm2	;f1+f3|f0+f2 = fi1 | fi0	pfsub	mm4, mm2	;f1-f3|f0-f2 = fi3 | fi2	pmovd	[r0], mm0	;fi[0]	puphdq	mm0, mm0	pmovd	[r0+r4*4], mm4	;fi[k2]	puphdq	mm4, mm4	pmovd	[r0+r4*2], mm4	;fi[k1]	pmovd	[r0+r2*2], mm0	;fi[k3]	lea	r0, [r0+r4*8]	;g	pmov	mm0, [r1]	;gi0	pmov	mm1, [r1+r4*2]	;gi1	pmov	mm2, [r1+r4*4]	;gi2	pmov	mm3, [r1+r2*2]	;gi3	pupldq	mm1, mm1	pupldq	mm0, mm0	;gi0 | gi0	pupldq	mm2, mm3	;gi3 | gi2	pxor	mm1, mm7	;gi1 | -gi1	pfsub	mm0, mm1	;gi0-gi1|gi0+gi1 = g1 | g0	pfmul	mm2, mm6	;gi3*SQRT2|gi2*SQRT2 = g3 | g2	pmov	mm4, mm0	pfadd	mm0, mm2	;g1+g3|g0+g2 = gi1 | gi0	pfsub	mm4, mm2	;g1-g3|g0-g2 = gi3 | gi2	pmovd	[r1], mm0	;gi[0]	puphdq	mm0, mm0	pmovd	[r1+r4*4], mm4	;gi[k2]	puphdq	mm4, mm4	cmp	r0, [esp + 16]	pmovd	[r1+r4*2], mm0	;gi[k1]	pmovd	[r1+r2*2], mm4	;gi[k3]	jb near .do2	pmov	mm6, [r3+r5]	; this is not aligned address!!	loopalign 16.for:;; mm6 = c1 | s1; mm7 = 0x800000000 | 0;	pmov	mm1, mm6	mov	r0, [esp+40]	; fz	puphdq	mm1, mm1	; c1 | c1	lea	r1, [r0+r4*2]	pfadd	mm1, mm1	; c1+c1 | c1+c1	pfmul	mm1, mm6	; 2*c1*c1 | 2*c1*s1	pfsub	mm1, [PIC_EBP_REL(D_1_0_0_0)] ; 2*c1*c1-1.0 | 2*c1*s1 = -c2 | s2	pmov	mm0, mm1	pxor	mm7, mm6	; c1 | -s1	pupldq	mm2, mm0	pupldq	mm3, mm6	; ** | c1	puphdq	mm0, mm2	; s2 | c2	puphdq	mm6, mm3	;-s1 | c1	pxor	mm0, [PIC_EBP_REL(costab)]	; c2 | -s2; mm0 =  s2| c2; mm1 = -c2| s2; mm6 =  c1| s1; mm7 =  s1|-c1 (we use the opposite sign. from GOGO here)	pmov	[esp], mm0	pmov	[esp+8], mm1	sub	r1, r5		;r1 = gi	add	r0, r5		;r0 = fi	loopalign 16.do3:	pmov	mm2, [r0+r4*2] ; fi[k1]	pmov	mm4, [r1+r4*2] ; gi[k1]	pmov	mm3, [r0+r2*2] ; fi[k3]	pmov	mm5, [r1+r2*2] ; gi[k3]	pupldq	mm2, mm2	; fi1 | fi1	pupldq	mm4, mm4	; gi1 | gi1	pupldq	mm3, mm3	; fi3 | fi3	pupldq	mm5, mm5	; gi3 | gi3	pfmul	mm2, mm0	; s2 * fi1 | c2 * fi1	pfmul	mm4, mm1	;-c2 * gi1 | s2 * gi1	pfmul	mm3, mm0	; s2 * fi3 | c2 * fi3	pfmul	mm5, mm1	;-c2 * gi3 | s2 * gi3	pfadd	mm2, mm4		;b | a	pfadd	mm3, mm5		;d | c	pmov	mm0, [r0]	pmov	mm4, [r1]	pmov	mm1, [r0+r4*4]	pmov	mm5, [r1+r4*4]	pupldq	mm0, mm4		;gi0 | fi0	pupldq	mm1, mm5		;gi2 | fi2	pmov	mm4, mm2	pmov	mm5, mm3	pfadd	mm2, mm0		;g0 | f0	pfadd	mm3, mm1		;g2 | f2	pfsub	mm0, mm4		;g1 | f1	pfsub	mm1, mm5		;g3 | f3	pmov	mm4, mm3	pmov	mm5, mm1	pupldq	mm4, mm4		;f2 | f2	puphdq	mm5, mm5		;g3 | g3	puphdq	mm3, mm3		;g2 | g2	pupldq	mm1, mm1		;f3 | f3	pfmul	mm4, mm6		;f2 * c1 | f2 * s1	pfmul	mm5, mm7		;g3 * s1 | g3 *-c1	pfmul	mm3, mm6		;g2 * c1 | g2 * s1	pfmul	mm1, mm7		;f3 * s1 | f3 *-c1	pfadd	mm4, mm5		;a | b	pfsub	mm3, mm1		;d | c	pmov	mm5, mm2	pmov	mm1, mm0	pupldq	mm2, mm2		;f0 | f0	pupldq	mm0, mm0		;f1 | f1	puphdq	mm1, mm2		;f0 | g1	puphdq	mm5, mm0		;f1 | g0	pmov	mm2, mm4	pmov	mm0, mm3	pfadd	mm4, mm1		;fi0 | gi1	pfadd	mm3, mm5		;fi1 | gi0	pfsub	mm1, mm2		;fi2 | gi3	pfsub	mm5, mm0		;fi3 | gi2	pmovd	[r1+r4*2], mm4	;gi[k1]	puphdq	mm4, mm4	pmovd	[r1], mm3		;gi[0]	puphdq	mm3, mm3	pmovd	[r1+r2*2], mm1	;gi[k3]	puphdq	mm1, mm1	pmovd	[r1+r4*4], mm5	;gi[k2]	puphdq	mm5, mm5	pmovd	[r0], mm4	;fi[0]	pmovd	[r0+r4*2], mm3	;fi[k1]	pmovd	[r0+r4*4], mm1	;fi[k2]	pmovd	[r0+r2*2], mm5	;fi[k3]	lea	r0, [r0+r4*8]	lea	r1, [r1+r4*8]	cmp	r0, [esp + 16]	pmov	mm0, [esp]	pmov	mm1, [esp+8]	jb near	.do3	add	r5, 4; mm6 =  c1| s1; mm7 =  s1|-c1 (we use the opposite sign. from GOGO here)	pfmul	mm6, [r3]	; c1*a | s1*a	pfmul	mm7, [r3+8]	; s1*b |-c1*b	cmp	r5, r4	pfsub	mm6, mm7	; c1*a-s1*b | s1*a+c1*b	pupldq	mm7,mm6	puphdq	mm6,mm7	pmov	mm7, [PIC_EBP_REL(costab)]	jb near	.for	mov	r0, [esp+40]	;fi	cmp	r4, [esp+40+4]	lea	r4, [r4*4]	;kx *= 4	jb near	.do1.exitttt	femms	add	esp,20	popd	ebp, ebx, esi, ediendproc;void fht_E3DN(float *fz, int nn);proc	fht_E3DN	pushd	ebp, ebx, esi, edi	sub	esp, 20	call	get_pc.bp	add	ebp, PIC_BASE()	mov	r0, [esp+40]		;fi	mov	r1, [esp+44]		;r1 = nn	lea	r3, [PIC_EBP_REL(costab)]		;tri = costab	lea	r4, [r0+r1*8]		;r4 = fn = &fz[n]	mov	[esp+16], r4	mov	r4, 8			;kx = k1/2	pmov	mm7, [r3]	loopalign 16.do1	lea	r3, [r3+16]	;tri += 2;	pmov	mm6, [PIC_EBP_REL(costab+8)]	lea	r2, [r4+r4*2]		;k3*fsize/2	mov	r5, 4		;i = 1*fsize	loopalign 16.do2:	lea	r1, [r0+r4]		;gi = fi + kx;f	pmov	mm0, [r0]	; X  | fi0	pmov	mm1, [r0+r4*4]	; X  | fi2	pupldq	mm0, [r0+r4*2]	;fi1 | fi0	pupldq	mm1, [r0+r2*2]	;fi3 | fi2	pfpnacc	mm0, mm0	;fi0+fi1 | fi0-fi1 = f0|f1	pfpnacc	mm1, mm1	;fi2+fi3 | fi2-fi3 = f2|f3	pmov	mm2, mm0	pfadd	mm0, mm1	;f0+f2|f1+f3 = fi0 | fi1	pfsub	mm2, mm1	;f0-f2|f1-f3 = fi2 | fi3	pmovd	[r0+r4*2], mm0	;fi[k1]	pmovd	[r0+r2*2], mm2	;fi[k3]	puphdq	mm0, mm0	puphdq	mm2, mm2	pmovd	[r0], mm0	;fi[0]	pmovd	[r0+r4*4], mm2	;fi[k2]	lea	r0, [r0+r4*8];g	pmov	mm3, [r1]	;    gi0	pmov	mm4, [r1+r2*2]	;    gi3	pupldq	mm3, [r1+r4*2]	;gi1|gi0	pupldq	mm4, [r1+r4*4]	;gi2|gi3	pfpnacc	mm3, mm3	;gi0+gi1  |gi0-gi1   = f0|f1	pfmul	mm4, mm6	;gi2*SQRT2|gi3*SQRT2 = f2|f3	pmov	mm5, mm3	pfadd	mm3, mm4	;f0+f2|f1+f3	pfsub	mm5, mm4	;f0-f2|f1-f3	cmp	r0, [esp + 16]	pmovd	[r1+r4*2], mm3	;gi[k1]	pmovd	[r1+r2*2], mm5	;gi[k3]	puphdq	mm3, mm3	puphdq	mm5, mm5	pmovd	[r1], mm3	;gi[0]	pmovd	[r1+r4*4], mm5	;gi[k2]	jb near .do2	pmov	mm6, [r3+r5]	; this is not aligned address!!	loopalign 16.for:;; mm6 = c1 | s1; mm7 = 0x800000000 | 0;	pmov	mm5, mm6	mov	r0, [esp+40]	; fz	puphdq	mm5, mm5	; c1 | c1	lea	r1, [r0+r4*2]	pfadd	mm5, mm5	; c1+c1 | c1+c1	pfmul	mm5, mm6	; 2*c1*c1 | 2*c1*s1	pfsub	mm5, [PIC_EBP_REL(D_1_0_0_0)] ; 2*c1*c1-1.0 | 2*c1*s1 = -c2 | s2	pswapd	mm4, mm5	; s2 |-c2	pxor	mm4, mm7	; s2 | c2	pxor	mm7, mm6	; c1 |-s1	pswapd	mm6, mm6	; s1 | c1; mm4 =  s2| c2; mm5 = -c2| s2; mm6 =  c1| s1; mm7 =  s1|-c1 (we use the opposite sign. from GOGO here)	pmov	[esp], mm4	pmov	[esp+8], mm5	sub	r1, r5		;r1 = gi	add	r0, r5		;r0 = fi	loopalign 16.do3:	pmov	mm0, [r0+r2*2] ; fi[k1]	pmov	mm2, [r1+r2*2] ; gi[k1]	pmov	mm1, [r0+r4*2] ; fi[k3]	pmov	mm3, [r1+r4*2] ; gi[k3]	pupldq	mm0, mm0	pupldq	mm2, mm2	pupldq	mm1, mm1	pupldq	mm3, mm3	pfmul	mm0, mm4	pfmul	mm2, mm5	pfmul	mm1, mm4	pfmul	mm3, mm5	pfadd	mm0, mm2		;d | c	pfadd	mm1, mm3		;b | a	pmov	mm2, [r0+r4*4]		;fi2	pupldq	mm3, [r1+r4*4]		;gi2 | -	pmov	mm4, [r0]		;fi0	pupldq	mm5, [r1]		;gi0 | -	pupldq	mm2, mm0		;c | fi2	puphdq	mm3, mm0		;d | gi2	pupldq	mm4, mm1		;a | fi0	puphdq	mm5, mm1		;b | gi0	pfpnacc	mm2, mm2		;f2 | f3	pfpnacc	mm3, mm3		;g2 | g3	pfpnacc	mm4, mm4		;f0 | f1	pfpnacc	mm5, mm5		;g0 | g1	pmov	mm0, mm2	pmov	mm1, mm3	pupldq	mm2, mm2		;f3 | f3	pupldq	mm3, mm3		;g3 | g3	puphdq	mm0, mm0		;f2 | f2	puphdq	mm1, mm1		;g2 | g2	pswapd	mm4, mm4		;f1 | f0	pswapd	mm5, mm5		;g1 | g0	pfmul	mm0, mm7		;f2 * s1 | f2 *-c1	pfmul	mm3, mm6		;g3 * c1 | g3 * s1	pfmul	mm1, mm6		;g2 * c1 | g2 * s1	pfmul	mm2, mm7		;f3 * s1 | f3 *-c1	pfsub	mm0, mm3		; b |-a	pfsub	mm1, mm2		; d | c	pmov	mm2, mm5	pmov	mm3, mm4	pupldq	mm4, mm0		;-a | f0	pupldq	mm5, mm1		; c | g0	puphdq	mm2, mm0		; b | g1	puphdq	mm3, mm1		; d | f1	pfpnacc	mm4, mm4		;fi2 | fi0	pfpnacc	mm5, mm5		;gi0 | gi2	pfpnacc	mm2, mm2		;gi1 | gi3	pfpnacc	mm3, mm3		;fi1 | fi3	pmovd	[r0], mm4		;fi[0]	pmovd	[r1+r4*4], mm5		;gi[k2]	pmovd	[r1+r2*2], mm2		;gi[k3]	pmovd	[r0+r2*2], mm3		;fi[k3]	puphdq	mm4, mm4	puphdq	mm5, mm5	puphdq	mm2, mm2	puphdq	mm3, mm3	pmovd	[r0+r4*4], mm4		;fi[k2]	pmovd	[r1], mm5		;gi[0]	pmovd	[r1+r4*2], mm2		;gi[k1]	pmovd	[r0+r4*2], mm3		;fi[k1]	lea	r0, [r0+r4*8]	lea	r1, [r1+r4*8]	cmp	r0, [esp + 16]	pmov	mm4, [esp]	pmov	mm5, [esp+8]	jb near	.do3	add	r5, 4; mm6 =  c1| s1; mm7 =  s1|-c1 (we use the opposite sign. from GOGO here)	pfmul	mm6, [r3]	; c1*a | s1*a	pfmul	mm7, [r3+8]	; s1*b |-c1*b	cmp	r5, r4	pfsub	mm6, mm7	; c1*a-s1*b | s1*a+c1*b	pswapd	mm6, mm6 ; ???	; s1*a+c1*b | c1*a-s1*b	pmov	mm7, [PIC_EBP_REL(costab)]	jb near	.for	mov	r0, [esp+40]	;fi	cmp	r4, [esp+40+4]	lea	r4, [r4*4]	;kx *= 4	jb near	.do1.exitttt	femms	add	esp,20	popd	ebp, ebx, esi, ediendproc

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -