⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cfft2_21160.asm

📁 AD公司ADSP2116X的基2fft
💻 ASM
字号:
/*___________________________________________________________________________
CFFT2_21160.ASM       ADSP-21160 Radix-2 DIT Complex FFT

Does a radix-2 FFT of length 64 or greater on input data x(n).

        N		1st half of normal-ordered complex input stored in DM
        N		2nd half of normal-ordered complex input stored in PM
		N		real part of fft stored in DM 
        N		imag part of fft stored in PM
        N		Interleaved Sin and Cos table stored in DM
		
Based on FFTRAD2 by	Kapriel Karagozian	Analog Devices DSP Div. 1-800-ANALOGD
Author:   25-APR-91 Ronnin Yee			Analog Devices DSP Div. 1-800-ANALOGD

Modified:	11/98	Richard Grafton		Analog Devices DSP Div.	1-800-ANALOGD
										(converted for ADSP-21160) 
Modified:	 4/99	Richard Grafton		Analog Devices DSP Div. 1-800-ANALOGD
										(optimized for SIMD of ADSP-21160)
Modified:	6/00	Philip Giordano		Resolved illegal mixed word dual-data SIMD access
										extra cycle introduced to resolve access
										(must further optimize this access)
										Code verified on Rev 0.1 silicon and 4.1.2 tools release,
									 	simulator version 2.0.2.1
										

Calling Information:
    dm(twiddle[N])		- Interleaved sin(2pi*n/N) table and cos(2pi*n/N) table from Twiddle.exe
						  program, twiddle factors interleaved as  cos[0], sin[0], cos[1], sin[1] ...
						  stored in long-word space
    dm(cmplx1[N])       - 1st half of complex input array stored in dm
	pm(cmplx2[N])       - 2nd half of complex input array stored in pm

    (Note:  Because the bit reversed address mode is used with the array
    cmplx1 and the array cmplx2, they must start at addresses that are integer multiples of
	the length (N) of the transform, (i.e.  0,N,2N,3N,...).  This is accomplished by
    specifying the segment starting at that addresses in the linker description file
    and placing the variable alone in its own segment.  These addresses must
    also be reflected in the preprocessor variables IREDM and IREPM respectively
	in bit reversed format.)

Results:
    dm(refft[N])   - real working array and output
    pm(imfft[N])   - imaginary working array and output


Benchmarks:
                               	  
	FFT Length      cycles (SIMD)	 time (us) 80MHz     
    ----------      -------------    ---------------        
         64		    	 640		        8
        128	    		1245		    15.56
        256		    	2554		    31.93
        512  	    	5399		    67.49
       1024	           11572		   144.65
       2048            24913		   311.41
       4096            53614           670.18
       8192           115083          1438.54


   First 2 Stages    -  5 cycles per 4 butterflies
   Middle Stages     -  4 cycles per 2 butterflies
   2nd to Last Stage - 10 cycles per 4 butterflies
   Last FFT Stage    -  6 cycles per 2 butterflies
   Conversion Stage  -  6 cycles per 2 elements (N/2 elements total)

Memory Usage:
    pm code = 163 words, pm data = 2*N words, dm data = 3*N words
____________________________________________________________________________*/

/* Include for symbolic definition of system register bits */
#include "def21160.h"

/*_________The constants below must be changed for different length FFTs______
N         = number of points in the FFT, must be a power of 2
STAGES    = log2(N)
BRMODIFY  = bitrev(32 bit N/2)
IREDM     = bitrev(32 bit addr of input real in dm), addr is 0,N,2N,3N,...
IREPM     = bitrev(32 bit addr of input imag in pm), addr is 0,N,2N,3N,...
____________________________________________________________________________*/
#define N               256
#define STAGES          8          
#define BRMODIFY        0x01000000
#define IREDM			0x0008A000
#define IREPM           0x00012000

/*________These constants are independent of the number of points____________*/
#define BFLY8           4				/*Offset between bf branches in a group of 8*/

.SEGMENT/DM             seg_dm64;
.VAR    twiddle[N]		=	"twiddle.dat";	/* twiddle factors, from RFFT2TBL, interleaved as	*/
											/* cos[0] sin[0], cos[1], sin[1] ... 				*/
.ENDSEG;

.SEGMENT/DM             seg_dmda;
.ALIGN 2;
.VAR    refft[N];						/* real result */
.GLOBAL refft;
.ENDSEG;

.SEGMENT/DM             seg_dmrd;		/* Segment Addr. = integer multiple of N */
.ALIGN 2;
.VAR    cmplx1[N]		=	"incplx1.dat";	/* 1st half of input complex (interleaved) data */
.GLOBAL cmplx1;
.ENDSEG;

.SEGMENT/DM             seg_pmda;
.ALIGN 2;
.VAR    imfft[N];						/* imag result */
.GLOBAL imfft;
.ENDSEG;

.SEGMENT/DM				seg_pmrd;
.ALIGN 2;
.VAR    cmplx2[N]		=	"incplx2.dat";	/* 2nd half of input complex (interleaved) data */
.GLOBAL cmplx2;
.ENDSEG;

/*______________________reset vector test call of fft______________________*/
.SEGMENT/PM             seg_rth;		/* program starts at the reset vector */

Resrvd:	nop;nop;nop;nop;
		
Reset:	idle;
		call cfft2;
stop:   idle;
        nop;

.ENDSEG;

.SEGMENT/PM             seg_pmco;
/*______________________________begin FFT__________________________________*/

cfft2:	f4=-1.0;
		s4=1.0;
		
		bit set MODE1 BR0 | BR8 | RND32 | CBUFEN | PEYEN;		/* enable bit reverse of i0 */

/*Do bitrev and packing within first two stages*/
        b0=IREDM;
		l0=0;
        m0=BRMODIFY;
		
		b8=IREPM;
        l8=0;
		m8=BRMODIFY;
		
        r0=refft;
		r0=lshift r0 by -1;
		b2=r0;
        l2=N;
        m1=1;						

        r0=imfft;
		r0=lshift r0 by -1;
		b10=r0;
		l10=N;
		m9=1;

		/*Do the first two stages (actually a radix-4 FFT stage)*/

                                                				f8=dm(i0,m0),	f9=pm(i8,m8);
                			  	f8=f8+f9,		f9=f8-f9,		f10=dm(i0,m0),	f11=pm(i8,m8);
                				f12=f10+f11,	f3=f10-f11;
      			f3=f3*f4,		f2=f8+f12,		f6=f8-f12;
                
LCNTR=N/4,     do FSTAGE until LCE;    	/* do N/4 simple radix-4 butterflies */
                												r3<->s3;
								f3=f9+f3,		f7=f9-f3,		f8=dm(i0,m0),	f9=pm(i8,m8);
                				f8=f8+f9,		f9=f8-f9,		f10=dm(i0,m0),	f11=pm(i8,m8);
                				f12=f10+f11,	f3=f10-f11,		dm(i2,m1)=f2,	pm(i10,m9)=f2;
FSTAGE:         f3=f3*f4,		f2=f8+f12,		f6=f8-f12,		dm(i2,m1)=f6,	pm(i10,m9)=f6;
         


/*middle stages loop */

        bit clr MODE1 BR0 | BR8;		/*finished with bitreversal*/
   		b2=refft;
		b8=imfft;
		l8=N;
		
        b0=refft;
        l0=N;
    
	   	b1=twiddle;
        l1=@twiddle/2;

        b10=imfft;
        l10=N;
        
		b11=imfft;
        l11=N;

        m0=-BFLY8;
        m1=-N/8;
		m2=-BFLY8-2;
        m11=-2;

        r2=2;
        r3=-BFLY8;				/*initializes m0,10 - incr for butterf branches	*/
		r5=BFLY8/2;				/*counts # butterflies per a group				*/
		r9=(-2*BFLY8)-2;		/*initializes m12 - wrap around to next grp + 2	*/
        r10=-2*BFLY8;			/*initializes m8 - incr between groups			*/
		r13=-BFLY8-2;			/*initializes m2,13 - wrap to bgn of 1st group	*/
        r15=N/8;				/*# OF GROUPS IN THIRD STAGE					*/

        f1=dm(i1,m1);			/*set pointers to tables to 1st coeff.   */
		
LCNTR=STAGES-4, do end_stage until LCE; /*# OF STAGES TO BE HANDLED = LOG2(N)-4 */
        m8=r10;
        m10=r3;
        m12=r9;
        i0=refft+N-2;
        i2=refft+N-2;
        i8=imfft+N-2;
        i10=imfft+N-2;
        i11=imfft+N-2;
		r15=r15-r2,     m13=r13;        /*CALCULATE # OF CORE  */
                                        /*BFLIES/GROUP IN THIS STAGE */

										s1=dm(i1,m1); /* load s1 with cos and s0 with sin values */

										r1=dm(i1,m1);	f7=pm(i8,m8);   /* Resolved illegal mixed word dual-data SIMD access */
f12=f0*f7,								f6=dm(i0,m0);
f8=f1*f6,                                               modify(i11,m10);
f11=f1*f7;                                              
f14=f0*f6,  f12=f8+f12,                 f8=dm(i0,m0),	f7=pm(i8,m8);
f12=f0*f7,  f13=f8+f12, f10=f8-f12,     f6=dm(i0,m0);   

/*Each iteration does another set of bttrflys in each group */

LCNTR=r5,   do end_group until LCE;     /*# OF BUTTERFLIES/GROUP IN THIS STAGE */

/*core butterfly loop */

LCNTR=r15,  do end_bfly until LCE;      /*Do a butterfly in each group - 2 */
    f8=f1*f6,   f14=f11-f14,                    dm(i2,m0)=f10,  f9=pm(i11,m8);
    f11=f1*f7,  f3=f9+f14,      f9=f9-f14,      dm(i2,m0)=f13,  f7=pm(i8,m8);
    f14=f0*f6,  f12=f8+f12,                     f8=dm(i0,m0),   pm(i10,m10)=f9;
end_bfly:
    f12=f0*f7,  f13=f8+f12,     f10=f8-f12,     f6=dm(i0,m0),   pm(i10,m10)=f3;

/*finish up last bttrfly and set up for next butterfly in each group */

f8=f1*f6,               f14=f11-f14,    dm(i2,m0)=f10,  f9=pm(i11,m8);
f11=f1*f7,  f4=f9+f14,  f9=f9-f14,      dm(i2,m0)=f13,  f14=pm(i8,m11);
f14=f0*f6,  f12=f8+f12,                 f8=dm(i0,m2),   pm(i10,m10)=f9;
										s1=dm(i1,m1);
            f13=f8+f12, f10=f8-f12,     r1=dm(i1,m1);	f7=pm(i8,m8);   /* Resolved illegal mixed word dual-data SIMD access */
						f14=f11-f14,    dm(i2,m0)=f10,  f9=pm(i11,m12);

/*start on next butterfly in each group */
f12=f0*f7,  f3=f9+f14,  f9=f9-f14,      f6=dm(i0,m0);
f8=f1*f6,                               dm(i2,m2)=f13,  pm(i10,m10)=f4;
f11=f1*f7,                                              pm(i10,m10)=f9;
f14=f0*f6,  f12=f8+f12,                 f8=dm(i0,m0),   f7=pm(i8,m8);
end_group:
f12=f0*f7,  f13=f8+f12, f10=f8-f12,     f6=dm(i0,m0),   pm(i10,m13)=f3;

                                        
            r4=r15+r2,          i1=b1;          	/*PREPARE R4 FOR #OF BFLIES CALC */
            r15=ashift r4 by -1;                	/*# OF BFLIES/GRP IN NEXT STAGE */
            r4=-r15;
            m1=r4;                                  /*update inc for sin & cos  */
            r5=ashift r5 by 1,  f1=dm(i1,m1);		/*update # bttrfly in a grp */
            r3=ashift r5 by 1;
			r3=-r3;									/* inc for bttrfly branch */
            r13=r3-r2,           m0=r3;				/* wrap to 1st grp        */
            r10=ashift r3 by 1;						/* inc between grps       */
end_stage:  r9=r10-r2,           m2=r13;			/* wrap to grp +1         */

/*_________ next to last stage__________*/
		m1=-2;                  /*modifier to sine table pntr    */
        m8=r10;                 /*incr between groups            */
        m10=r3;                 /*incr between bttrfly branches  */
		m12=r9;               	/*wrap around to next grp + 1    */
		m13=r13;              	/*wrap to bgn of 1st group       */

        i0=refft+N-2;
        
		r0=b1;
		r1=(N/2)-2;
		r0=r0+r1;
		i1=r0;					/*pntr to 1st twiddle coeffs     */
        
		i2=refft+N-2;
        i8=imfft+N-2;
        i10=imfft+N-2;
        i11=imfft+N-2;

										s1=dm(i1,m1);

										r1=dm(i1,m1);	f7=pm(i8,m8);
f12=f0*f7,								f6=dm(i0,m0);
f8=f1*f6,								modify(i11,m10);
f11=f1*f7,												f7=pm(i8,m12);
f14=f0*f6, f12=f8+f12,					f8=dm(i0,m0);
f12=f0*f7, f13=f8+f12,  f10=f8-f12,		f6=dm(i0,m0);                           

/*Do the N/4 butterflies in the two groups of this stage */

LCNTR=N/8,  do end_group2 until LCE;   
    f8=f1*f6,                   f14=f11-f14,    dm(i2,m0)=f10,  f9=pm(i11,m8);
    f11=f1*f7,  f3=f9+f14,      f9=f9-f14,      dm(i2,m0)=f13;
    f14=f0*f6,  f12=f8+f12,                     f8=dm(i0,m2),   pm(i10,m10)=f9;
												s1=dm(i1,m1);
                f13=f8+f12,     f10=f8-f12,     r1=dm(i1,m1);	f7=pm(i8,m8);   /* Resolved illegal mixed word dual-data SIMD access */

    f12=f0*f7,                  f14=f11-f14,    f6=dm(i0,m0),   f9=pm(i11,m12);

    f8=f1*f6,   f3=f9+f14,      f9=f9-f14,      dm(i2,m0)=f10,  pm(i10,m10)=f3;
    f11=f1*f7,                                  dm(i2,m2)=f13,  pm(i10,m10)=f9; 
    f14=f0*f6, f12=f8+f12,                      f8=dm(i0,m0),   f7=pm(i8,m12);
end_group2:
    f12=f0*f7, f13=f8+f12,      f10=f8-f12,     f6=dm(i0,m0),   pm(i10,m13)=f3;


/*   The last stage      */

		m0=-N/2;               
        m2=-N/2-2;
        m10=m0;
        m13=m2;
        i0=refft+N-2;
        
		r0=b1;
		r1=(N/2)-1;
		r0=r0+r1;
		i1=r0;					/*pntr to 1st twiddle coeffs     */

        i2=refft+N-2;
        i8=imfft+N-2;
        
        i10=imfft+N-2;
        i11=imfft+N-2;
        m1=-1;                  /*modifiers to coeff tables      */

/*start first bttrfly */
										s1=dm(i1,m1);

                                        r1=dm(i1,m1);	f7=pm(i8,m11);   /* Resolved illegal mixed word dual-data SIMD access */
f12=f0*f7,								f6=dm(i0,m0);	                                 
f8=f1*f6,								modify(i11,m10);
f11=f1*f7;                                      
f14=f0*f6, f12=f8+f12,					f8=dm(i0,m2),	f9=pm(i11,m11);

/*do N/2 bttrflys in the last stage */

LCNTR=N/4,  do last_stage until LCE;   
		                                        s1=dm(i1,m1);
				f13=f8+f12,     f10=f8-f12,     r1=dm(i1,m1);	f7=pm(i8,m11);   /* Resolved illegal mixed word dual-data SIMD access */
    f12=f0*f7,  f14=f11-f14,                    f6=dm(i0,m0);
    f8=f1*f6,   f3=f9+f14,      f15=f9-f14,     dm(i2,m0)=f10,  f9=pm(i11,m11);
    f11=f1*f7,                                  dm(i2,m2)=f13,  pm(i10,m10)=f15;
last_stage:                                             
    f14=f0*f6, f12=f8+f12,                      f8=dm(i0,m2),   pm(i10,m13)=f3;
	
	rts (db);
	bit clr mode1 PEYEN;
	nop;

/*_______________________________________________________________________*/
.ENDSEG;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -