⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hhcfft4.asm

📁 在ADSP-2126x上编写的优化过的FFT程序(用c和汇编编写)。
💻 ASM
📖 第 1 页 / 共 2 页
字号:
lcntr=r0, do mstage until lce;    /* do STAGES-2 stages */

                        i7=dm(_Rad4FFT_cosine);      /* first real twiddle */
                        i15=dm(_Rad4FFT_sine);       /* first imag twiddle */

                        r8=dm(_Rad4FFT_redata);
                        r9=dm(_Rad4FFT_imdata);

                        i0=r8;          /* upper real      path */
        r10=r8+r2;      i8=r9;          /* upper imaginary path */
                        i1=r10;         /* second real input path */
        r10=r10+r2,     i4=r10;         /* second real output path */
                        i2=r10;         /* third real input path */
        r10=r10+r2,     i5=r10;         /* third real output path */
                        i3=r10;         /* fourth real input path */
        r10=r9+r2,      i6=r10;         /* fourth real output path */
                        i9=r10;         /* second imag input path */
        r10=r10+r2,     i12=r10;        /* second imag output path */
                        i10=r10;        /* third imag input path */
        r10=r10+r2,     i13=r10;        /* third imag output path */
                        i11=r10;        /* fourth imag input path */
                        i14=r10;        /* fourth imag output path */ 
                        m4=r3;
                        m12=r3;
        r4=r3+1,        m6=r2;
                        m3=r4;
        r2=r2-1,        m11=r4;
                        m7=r2;
        
lcntr=m5,       do mgroup until lce;    /* do m5 groups */
                                                f0=dm(i7,m0),   f5=pm(i9,m8);
f8=f0*f5,                                       f4=dm(i1,m0),   f1=pm(i15,m8);
f9=f0*f4;
f12=f1*f5,                                      f0=dm(i7,m0),   f5=pm(i11,m8);
f13=f1*f4,      f12=f9+f12,                     f4=dm(i3,m0),   f1=pm(i15,m8);
f8=f0*f4,                       f2=f8-f13;
f13=f1*f5;
f9=f0*f5,       f8=f8+f13,                      f0=dm(i7,m1),   f5=pm(i10,m8);
f13=f1*f4,      f12=f8+f12,     f14=f8-f12,     f4=dm(i2,m0),   f1=pm(i15,m9);
f11=f0*f4;
f13=f1*f5,                      f6=f9-f13;
f9=f0*f5,       f13=f11+f13,                    f11=dm(i0,0);
f13=f1*f4,      f8=f11+f13,     f10=f11-f13;
           /*___________Do m7 radix-4 butterflies___________*/
lcntr=m7,       do mr4bfly until lce;
                                f13=f9-f13,     f4=dm(i1,m0),   f5=pm(i9,m8);
                f2=f2+f6,       f15=f2-f6,      f0=dm(i7,m0),   f1=pm(i15,m8);
f8=f0*f4,       f3=f8+f12,      f7=f8-f12,                      f9=pm(i8,0);
f12=f1*f5,      f9=f9+f13,      f11=f9-f13,     f13=f2;
f8=f0*f5,       f12=f8+f12,                     f0=dm(i7,m0),   f5=pm(i11,m8);
f13=f1*f4,      f9=f9+f13,      f6=f9-f13,      f4=dm(i3,m0),   f1=pm(i15,m8);
f8=f0*f4,                       f2=f8-f13,      dm(i0,m0)=f3,   pm(i8,m8)=f9;
f13=f1*f5,      f11=f11+f14,    f7=f11-f14,     dm(i4,m0)=f7,   pm(i12,m8)=f6;
f9=f0*f5,       f8=f8+f13,                      f0=dm(i7,m1),   f5=pm(i10,m8);
f13=f1*f4,      f12=f8+f12,     f14=f8-f12,     f4=dm(i2,m0),   f1=pm(i15,m9);
f11=f0*f4,      f3=f10+f15,     f8=f10-f15,                     pm(i13,m8)=f11;
f13=f1*f5,                      f6=f9-f13,      dm(i6,m0)=f8,   pm(i14,m8)=f7;
f9=f0*f5,       f13=f11+f13,                    f11=dm(i0,0);
mr4bfly:
f13=f1*f4,      f8=f11+f13,     f10=f11-f13,    dm(i5,m0)=f3;
           /*___________End radix-4 butterfly_____________*/
/*                       dummy for address update      *               * */
                                f13=f9-f13,     f0=dm(i7,m2),   f1=pm(i15,m10);
                f2=f2+f6,       f15=f2-f6,      f0=dm(i1,m4),   f1=pm(i9,m12);
                f3=f8+f12,      f7=f8-f12,                      f9=pm(i8,0);
                f9=f9+f13,      f11=f9-f13,     f0=dm(i2,m4);
                f9=f9+f2,       f6=f9-f2,       f0=dm(i3,m4),   f1=pm(i10,m12);
                                                dm(i0,m3)=f3,   pm(i8,m11)=f9;
                f11=f11+f14,    f7=f11-f14,     dm(i4,m3)=f7,   pm(i12,m11)=f6;
                f3=f10+f15,     f8=f10-f15,                     pm(i13,m11)=f11;
                                                dm(i6,m3)=f8,   pm(i14,m11)=f7;
mgroup:                                         dm(i5,m3)=f3,   f1=pm(i11,m12);

                                r3=m4;
                                r1=m5;
                                r2=m6;
        r3=ashift r3 by -2;             /* groupstep/4 */
        r1=ashift r1 by 2;              /* groups*4 */
                                m5=r1;
mstage: r2=ashift r2 by -2;             /* butterflies/4 */
        
/*____________________Last radix-4 stage__________________________________*/
/* Includes bitreversal of the real data in dm */

        bit set mode1 BR0 | BR8;              /* bitreversal in i0 */
/* with: m0=m8=1 preset */
        r0=dm(_Rad4FFT_redata);/* input */
        r0=r0+1,i4=r0;
        r0=r0+1,i1=r0;
        r0=r0+1,i2=r0;
        i3=r0;
        i0=dm(_Rad4FFT_ORE);  /* real output array base must be an integer multiple of N */
        m2=dm(_Rad4FFT_OST);
        i7=dm(_Rad4FFT_cosine);
        i8=dm(_Rad4FFT_OIM);                      /* input */
        r0=dm(_Rad4FFT_imdata);
        r0=r0+1,i12=r0;
        r0=r0+1,i9=r0;
        r0=r0+1,i10=r0;
        i11=r0;
        i15=dm(_Rad4FFT_sine);       /* first imag twiddle */
        m1=4;
        m9=m1;
        m12=dm(_Rad4FFT_OST);
                                                f0=dm(i7,m0),   f5=pm(i9,m9);
f8=f0*f5,                                       f4=dm(i1,m1),   f1=pm(i15,m8);
f9=f0*f4;
f12=f1*f5,                                      f0=dm(i7,m0),   f5=pm(i11,m9);
f13=f1*f4,      f12=f9+f12,                     f4=dm(i3,m1),   f1=pm(i15,m8);
f8=f0*f4,                       f2=f8-f13;
f13=f1*f5;
f9=f0*f5,       f8=f8+f13,                      f0=dm(i7,m0),   f5=pm(i10,m9);
f13=f1*f4,      f12=f8+f12,     f14=f8-f12,     f4=dm(i2,m1),   f1=pm(i15,m8);
f11=f0*f4;
f13=f1*f5,                      f6=f9-f13;
f9=f0*f5,       f13=f11+f13,                    f11=dm(i4,m1);
f13=f1*f4,      f8=f11+f13,     f10=f11-f13;
            /*________Do N/4-1 radix-4 butterflies_______*/
r5=dm(_Rad4FFT_N4M1);
lcntr=r5, do lstage until lce;
                                f13=f9-f13,     f4=dm(i1,m1),   f5=pm(i9,m9);
                f2=f2+f6,       f15=f2-f6,      f0=dm(i7,m0),   f1=pm(i15,m8);
f8=f0*f4,       f3=f8+f12,      f7=f8-f12,                      f9=pm(i12,m9);
f12=f1*f5,      f9=f9+f13,      f11=f9-f13,     f13=f2;
f8=f0*f5,       f12=f8+f12,                     f0=dm(i7,m0),   f5=pm(i11,m9);
f13=f1*f4,      f9=f9+f13,      f6=f9-f13,      f4=dm(i3,m1),   f1=pm(i15,m8);
f8=f0*f4,                       f2=f8-f13,      dm(i0,m2)=f3,   pm(i8,m12)=f9;
f13=f1*f5,      f11=f11+f14,    f7=f11-f14,     dm(i0,m2)=f7,   pm(i8,m12)=f6;
f9=f0*f5,       f8=f8+f13,                      f0=dm(i7,m0),   f5=pm(i10,m9);
f13=f1*f4,      f12=f8+f12,     f14=f8-f12,     f4=dm(i2,m1),   f1=pm(i15,m8);
f11=f0*f4,      f3=f10+f15,     f8=f10-f15,                     pm(i8,m12)=f11;
f13=f1*f5,                      f6=f9-f13,      dm(i0,m2)=f3,   pm(i8,m12)=f7;
f9=f0*f5,       f13=f11+f13,                    f11=dm(i4,m1);
lstage:
f13=f1*f4,      f8=f11+f13,     f10=f11-f13,    dm(i0,m2)=f8;

                                f13=f9-f13;
                f2=f2+f6,       f15=f2-f6;
                f3=f8+f12,      f7=f8-f12,                      f9=pm(i12,m9);
                f9=f9+f13,      f11=f9-f13,     dm(i0,m2)=f3;
                f9=f9+f2,       f6=f9-f2,       dm(i0,m2)=f7;
                                                                pm(i8,m12)=f9;
                f11=f11+f14,    f7=f11-f14,                     pm(i8,m12)=f6;
                f3=f10+f15,     f8=f10-f15,                     pm(i8,m12)=f11;
                                                dm(i0,m2)=f3,   pm(i8,m12)=f7;
                bit clr mode1 BR0 | BR8 | RND32 | CBUFEN | PEYEN;	/* no bitreversal in i0, disable 32bit floating point, circular buffering and SIMD */                     
                                                dm(i0,m2)=f8;
mode1=dm(SaveStack);
nop;
exit;


_Rad4FFT.end:      

/*******************************************************************************
*
* Function:  _Rad4FFT - Fast complex floating point FFT
* Prototype:
*		void _Rad4FFTInit(
*				TCFFT4_simd_size N,	// size of fft (power of 4)
*				float *reinput,		// real input array
*				float *iminput,		// imaginary input array
*				float *reoutput,		// real output array
*				float *imoutput);		// imaginary output array
*
* Synopsis:  #include "Rad4FFT.h"
*            
* Description:
*		This routine initializes the data elements for _Rad4FFT.
*		It must be called before Rad4FFT() is called.
*
* Author:	
*				Darrel Judd
*				Judd Labs, Inc.
*				801-756-2057
*				drjudd@ieee.org
*
* Revisions:
*					Created June, 2003 Darrel Judd
*******************************************************************************/

_Rad4FFT_Init:


			r0=reads(1);	// get real output address from stack
	// init parameters for rad4 fft
			i0 = r0;
			bitrev(i0,0);
			dm(_Rad4FFT_ORE)=i0;
			r0=reads(2);
			i14 = r0;
			bitrev(i14,0);
			dm(_Rad4FFT_OIM)=i14;
			
			r0 = lshift r4 by -1;
			i0 = r0;
			bitrev(i0,0);
			r0=i0;
			dm(_Rad4FFT_OST)=r0;  // bitrev(N/2)
			
			dm(_Rad4FFT_N)=r4;	// store N
			
			r2= lshift r4 by -2;  // r2=N/4
			r2= r2-1;	// r2 = N/4-1
			dm(_Rad4FFT_N4M1)=r2; // store N/4-1
			
			r2=lshift r4 by -3;	//r2=N/8
			dm(_Rad4FFT_N8)=r2;	// store N/8
			
			r2=lshift r4 by -4;	// r2=N/16
			dm(_Rad4FFT_N16)=r2;	// store N/16
			
			r0=3;
			r0=r2*r0 (UUI);	//r0 = N*3/16
			dm(_Rad4FFT_N16T3)=r0;	// store N*3/16
			
			f2= float r4;	// calculate the number of stages
			r0=logb f2;		// get binary exponent
			r0= lshift r0 by -1; // convert to log4
			r2=2;
			r0=r0-r2;	// stages-2
			dm(_Rad4FFT_StagesM2)=r0;
			
			
			dm(_Rad4FFT_redata)=r8;	// store pointer to real input data
			
			dm(_Rad4FFT_imdata)=r12; // store pointer to imag input data
			
			r3=reads(3);
			dm(_Rad4FFT_cosine)=r3;	// store pointer to real twiddle factors
			
			r3=reads(4);
			dm(_Rad4FFT_sine)=r3;	// store pointer ot imaginary twiddle factors
			// calculate various pointers
			i4=_Rad4FFT_redataPN4;
			r0=dm(_Rad4FFT_N);
			r0=lshift r0 by -2;
			r1=dm(_Rad4FFT_redata);
			r1=r0+r1;
			r1=r0+r1,dm(i4,1)=r1;
			r1=r0+r1,dm(i4,1)=r1;
			dm(i4,1)=r1;
			r1=dm(_Rad4FFT_imdata);
			r1=r0+r1;
			r1=r0+r1,dm(i4,1)=r1;
			r1=r0+r1,dm(i4,1)=r1;
			dm(i4,1)=r1;

exit;



_Rad4FFT_Init.end:			

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -