📄 hhcfft4.asm

📁 在ADSP-2126x上编写的优化过的FFT程序（用c和汇编编写）。
💻 ASM
📖 第 1 页 / 共 2 页
字号:
12 下一页

/*******************************************************************************
*
* Function:  _Rad4FFT - Fast complex floating point FFT
*
* Synopsis:  #include "Rad4FFT.h"
*            void Rad4FFT();
* Description:
*		This routine performs a single channel SIMD, complex, radix 4 Fast Fourier 
*		Transform (FFT). The FFT length (N) must be a power of 4 and a minimum of 
*		64 points. The real part of the input data is placed in DM and the complex 
*		part in PM. This data is destroyed during the course of the computation. 
*		The real and complex output of the FFT is placed in separate locations in DM.
*	
*		Since this routine takes care of all necessary address digit-reversals, the 
*		input and output data are in normal order. The digit reversal is accomplished 
*		by using a modified radix 4 butterfly throughout which swaps the inner two 
*		nodes resulting with bit reversed data. The digit reversal is completed by 
*		bit reversing the real data in the final stage and then bit reversing the 
*		imaginary so that it ends up in DM.
*	
*		To implement an inverse FFT, you only have to (1) swap the incoming datas real
*		and imaginary parts, (2) run the forward FFT, (3) swap the outgoing datas 
*		real and imaginary parts and (4) scale the data by 1/N.
*	
*
*
*		Before calling the Rad4FFT function.  The user must do the following:
*					1.  Create 2 arrays, float real_input[N] and float imag_input[N].
*						 Where N is the size of the fft computation, and must be a 
*						 power of 4.  These arrays must be aligned on a 
*						 multilple of N boundary.
*					2.  Create 2 arrays, float real_fft[N] and float imag_fft[n].
*						 These arrays hold the final results for the algorithm,
*						 and any data in them will be written over.
*					3.	 Create 2 arrays, float twid_real[N*3/4] and float twid_imag[N*3/4].
*						 These arrays will contain the complex twiddle factors used in 
*						 Calculating the FFT.  
*					
*					4.  Calculate the twiddle factors using the function:
*						 void Rad4_Twiddle_Calc(TCFFT4_simd_size length,float *tc,float pm *ts) 
*					5.  Initialize the FFT by calling the function:
*						 void Rad4FFT_Init(
*							TCFFT4_simd_size N,// size of the FFT (power of 4)
*							float *redata,		// real input data
*							float pm *imdata,	// imaginary input data
*							float *refft,		// real output data
*							float pm *imfft,	// imaginary output data
*							float *retwid,		// cosine twiddle factors
*							float pm *imtwid);// sine twiddle factors
*					6.  Populate the input arrays and call Rad4FFT.
*
*				 Note that the names of the arrays in the above procedure are used as
*				 examples and are not unique.
*
*				Note that in order to compute a different size FFT, change
*				the input or output arrays all the steps listed above 
*				must be completed again, except calculating the twiddle
*				factors if N does not change.
*				
*
* Assumptions:
*            All arrays must start on even address boundaries.
*
* Example:
*
*			#define N 4096			// define fft size
*			// declare arrays for input data
*			float redata[N];
*			float pm imdata[N];
*			// declare arrays for output data aligned on N boundaries
*			#pragma align 4096
*			float refft[N];
*			#pragma align 4096
*			float pm imfft[N];
*			// declare arrays for twiddle factors
*			#pragma align 2
*			float retwid[N*3/4];
*			#pragma align 2
*			float pm imtwid[N*3/4];
*			// FFTSize is an emurerated type, (power of 4)
*			TCFFT4_simd_size FFTSize = N4096;
*			// Calculate twiddle factors
*			Rad4_Twiddle_Calc(FFTSize,retwid,imtwid);
*			// Initialize the FFT
*			Rad4FFT_Init(
*								FFTSize,
*								&redata[0],
*								&imdata[0],
*								refft,imfft,
*								retwid,
*								imtwid);
*			// Calculate FFT
*			Rad4FFT();
*
*	Memory Usage:
*    pm code = 200 words, pm data = 1.75*N words, dm data = 3.75*N words
*
* Cycle Counts:
*
*            FFT_Size      Cycles*
*            --------      ------
*               64           769
*              256          3538
*             1024         17203
*				  4096			82516
*
*                         *cycle counts include the basic overheads in
*                          argument passing and in calling the function
*
* Author:	Karl Schwarz & Raimund Meyer, Universitaet Erlangen Nuernberg
*
* Revisions:
*				25-APR-91, RY
*	    		18-JUN-91, RY
*				05-MAR-99, RFG
*				24-MAY-00, GGL
*				08-JUN-00, GGL verified in 0.1 Si
*				Modified to be C callable. June, 2003 
*				Darrel Judd
*				Judd Labs, Inc.
*				801-756-2057
*				drjudd@ieee.org
*******************************************************************************/
//==============================================================================
// include files
//==============================================================================
#include    "def21160.h"				/* Symbol Definition File */
#include <asm_sprt.h>
//==============================================================================
// global declarations
//==============================================================================
.global _Rad4FFT_Init;	

.global _Rad4FFT_ORE;	/*bitrev(32 bit addr of output real in dm), addr is 0,N,2N,3N,...*/
.global _Rad4FFT_OIM;	/*bitrev(24 bit addr of output imag. in dm), addr is 0,N,2N,3N,...*/
.global _Rad4FFT_OST;	/*bitrev(32 bit N/2)*/
.global _Rad4FFT_N;
.global _Rad4FFT_N8;
.global _Rad4FFT_StagesM2;
.global _Rad4FFT_N4M1;
.global _Rad4FFT_N16;
.global _Rad4FFT_N16T3;
.global _Rad4FFT_redata;
.global _Rad4FFT_imdata;
.global _Rad4FFT_cosine;
.global _Rad4FFT_sine;
.global _Rad4FFT_redataPN4;
.global _Rad4FFT_redataPN2;
.global _Rad4FFT_redataPN34;
.global _Rad4FFT_imdataPN4;
.global _Rad4FFT_imdataPN2;
.global _Rad4FFT_imdataPN34;
//===========================================================================
// varialble declarations
//===========================================================================
// parameters passed by memory
.section/dm seg_dmda;
.var _Rad4FFT_ORE;	// bit reversed address of real input
.var _Rad4FFT_OIM;	// bit reversed address of imaginary input
.var _Rad4FFT_OST;	// bit reverse of N/2
.var _Rad4FFT_N;		// FFT size
.var _Rad4FFT_N8;		//	N/8	
.var _Rad4FFT_StagesM2;	// log2(N)-2
.var _Rad4FFT_N4M1;	// N/4-1
.var _Rad4FFT_N16;	// N/16
.var _Rad4FFT_N16T3;	//	N*3/16
.var _Rad4FFT_redata; // Real input address
.var _Rad4FFT_imdata; // Imaginary input address
.var _Rad4FFT_cosine; // address of real twiddle factors
.var _Rad4FFT_sine; // address of imaginary twiddle factors
.var _Rad4FFT_redataPN4; // redata + N/4
.var _Rad4FFT_redataPN2; // redata + N/2
.var _Rad4FFT_redataPN34; // redata + N*3/4
.var _Rad4FFT_imdataPN4;	// imdata + N/4
.var _Rad4FFT_imdataPN2;	// imdata + N/2
.var _Rad4FFT_imdataPN34;	// imdata + N*3/4
.align 2;

.var SaveStack[15];	// register storage
//=============================================================================
// program memory code 
//=============================================================================


.section/pm seg_pmco;

/*_______first stage radix-4 butterfly without twiddles______*/
.global _Rad4FFT;
_Rad4FFT:
dm(SaveStack)=mode1; // save mode1 register
bit set mode1 SRD1H | 	// use alternate dag1 high
				  SRD1L | 	// use alternate dag1 low
				  SRD2H | 	// use alternate dag2 high
				  SRD2L | 	// use alternate dag2 low
				  SRRFH | 	// use alternate registers high
				  SRRFL |	// use alternate registers low
				  RND32 |	// round to 32 bits 
				  CBUFEN |  // circular buffer enable
				  PEYEN ;	// enable PEY (SIMD)
				  nop;		// 1 cycle latency before we can modify the alternate dags
// set l regs to 0
l0=0;l1=0;l2=0;l3=0;l4=0;l5=0;l6=0;l7=0;
l8=0;l9=0;l10=0;l11=0;l12=0;l13=0;l14=0;l15=0;
// load addresses of arrays into dags
	i0=dm(_Rad4FFT_redata);
	i1=dm(_Rad4FFT_redataPN4);
	i2=dm(_Rad4FFT_redataPN2);
	b3=dm(_Rad4FFT_redataPN34);
	i8=dm(_Rad4FFT_imdata);
	i9=dm(_Rad4FFT_imdataPN4);
	i10=dm(_Rad4FFT_imdataPN2);
	b11=dm(_Rad4FFT_imdataPN34);
		m5=4; 
		m10=m5;
		m1=-2;
		m9=m1;	
      m0=2;		/* first stage modify values */
     	m8=2;		/* first stage modify values */
                                          f0=dm(i0,m0),   f1=pm(i8,m8);
                                                f2=dm(i2,m0),   f3=pm(i10,m8);
                f0=f0+f2,       f2=f0-f2,       f4=dm(i1,m0),   f5=pm(i9,m8);
                f1=f1+f3,       f3=f1-f3,       f6=dm(i3,m0),   f7=pm(i11,m8);
                f4=f6+f4,       f6=f6-f4;
                f5=f5+f7,       f7=f5-f7;
                f8=f0+f4,       f9=f0-f4;
                f10=f1+f5,      f11=f1-f5;
r0=dm(_Rad4FFT_N8);
lcntr=r0,      do fstage until lce;    /* do N/4 simple radix-4 butterflies */
                f12=f2+f7,      f13=f2-f7,      f0=dm(i0,m1),   f1=pm(i8,m9);
                f14=f3+f6,      f15=f3-f6,      f2=dm(i2,m1),   f3=pm(i10,m9);
                f0=f0+f2,       f2=f0-f2,       f4=dm(i1,m1),   f5=pm(i9,m9);
                f1=f1+f3,       f3=f1-f3,       f6=dm(i3,m1),   f7=pm(i11,m9);
                f4=f6+f4,       f6=f6-f4,       dm(i0,m5)=f8,   pm(i8,m10)=f10;
                f5=f5+f7,       f7=f5-f7,       dm(i1,m5)=f9,   pm(i9,m10)=f11;
                f8=f0+f4,       f9=f0-f4,       dm(i2,m5)=f12,  pm(i10,m10)=f14;
fstage:
                f10=f1+f5,      f11=f1-f5,      dm(i3,m5)=f13,  pm(i11,m10)=f15;

				bit clr mode1 PEYEN; /* added for testing SIMD */
				m0=1;
				m8=1;

/*_____________Middle stages with radix-4 main butterfly___________________*/

/* m0=1 and m8=1 is still preset */
                      //  m1=-2;          /* reverse step for twiddles */
                      //  m9=m1;  
                        m2=3;           /* forward step for twiddles */
                        m10=m2;
                        //m5=4;           /* first there are 4 groups */
                        r2=dm(_Rad4FFT_N16);        /* with N/16 butterflies in each group */
                        r3=dm(_Rad4FFT_N16T3);      /* step to next group */
r0=dm(_Rad4FFT_StagesM2);
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -