fft.asm

来自「在ADSP-2126x上编写的优化过的FFT程序（用c和汇编编写）。」· 汇编代码 · 共 548 行 · 第 1/2 页
ASM
548 行

/*******************************************************************************
*
* Function:  CFFTF - Fast complex floating point FFT
*
* Synopsis:  #include <filter.h>
*            void cfft_simd(Tcfft_simd pm *pfft);
* Description:
*            The cfft_simd function transforms the time domain complex input
*            signal sequence to the frequency domain by using the accelerated
*            version of the Discrete Fourier Transform known as a Fast Fourier
*            Transform or FFT. It will decimate in frequency using an
*            optimized radix-2 algorithm.
*
*				 An object oriented aproach was implemented to allow a clear division
*				 between algorithm and data to be processed.  Thus allowing many FFT objects to 
*				 exist at the same time.  Since each instance of the
*				 FFT objects are initialized only once, and the parameters for the algorithm
*				 are stored as a unique identifier for each object, time is saved in the actual
*				 calling of the FFT algorithm.  Only one parameter is passed, a pointer to the
*				 FFT object. 
*
*				 Before calling the cfft_simd function.  The user must do the following:
*					1.  Create 2 arrays, float real_input[N] and float imag_input[N].
*						 Where N is the size of the fft computation, and is of type
*						 Tcfft_simd_size.  The output of the fft is also passed in 
*						 these arrays. If the input data is to be preserved it must first
*						 be copied to a safe location before calling this function.
*					2.  Create 2 arrays, float real_fft[N] and float imag_fft[n].
*						 These arrays hold intermidate results for the algorithm,
*						 and any data in them will be written over.
*					3.	 Create 2 arrays, float twid_real[N/2] and float twid_imag[N/2].
*						 These arrays will contain the complex twiddle factors used in 
*						 Calculating the FFT.  
*					4.  Create a structure of type Tcfft_simd.  This structure will contain
*						 all of the information required by cfft_simd to compute the fft.
*						 Tcfft_simd is defined in the file cfft_simd.h.
*					5.  Calculate the twiddle factors using the function:
*						 void cfft_simd_twiddle_calc(Tcfft_simd pm *pfft); 
*					6.  Initialize the sturcture created in step 4 by calling the function:
*						 void cfft_simd_init(Tcfft_simd pm *pfft,	// pointer to fft object
*						  Tcfft_simd_size N,	// size of fft, must be a power of 2
*						  float *reinput,		// pointer to real input data of size N
*						  float pm *imimput,	// pointer to imag input data of size N
*						  float *retwid,		// pointer to real twiddle array of size N/2
*						  float pm *imtwid,	// pointer to imag twiddle array of size N/2
*						  float *refft,		// pointer to real temp array of size N
*						  float pm *imfft);	// pointer to imag temp array of size N
*					7.  Populate the input arrays and call cfft_simd.
*
*				 Note that the names of the arrays in the above procedure are used as
*				 examples and are not unique.
*
*				 When the above steps are followed a cfft_simd object is created.  
*				 Other cfft_simd objects can be created by repeating these steps.  
*				 If the new object is the same size the twiddle factor arrays 
*				 can be reused.
*
*            The cfftz_simd function has been designed for optimum performance and
*            requires that the input arrays are aligned on an
*            address boundary that is a multiple of the FFT size. For certain
*            applications, this alignment constraint may not be appropriate;
*            in such cases, the application should call the cfftN function
*            instead with no loss of facility (apart from performance).
*
*            It is recommended that the arrays containing real parts are allocated 
*				 in separate memory blocks from the arrays containing imaginary parts,
*				 otherwise the performance of the function will degrade.
*
* Warning:   The cfft_simd function uses the alternate dags and register files.
*				 Information in these registers will be destroyed when the function is 
*				 called.
*
* Assumptions:
*            All arrays must start on even address boundaries.
*
* Example:
*
#include "cfft_simd.h"
#define N (1024)
#define pi (3.141592654)
Tcfft_simd pm Rad2;
// intermediate arrays
float refft[N];
float pm imfft[N];
// twiddle factor arrays
float retwid[N/2];
float pm imtwid[N/2];
// input arrays
#pragma align 1024	// must be on a multiple of N boundary
float redata[N];
#pragma align 1024	// must be on a multiple of N boundary
float pm imdata[N];
// By using an enumerated type only powers of 2 will be used for the size.
Tcfft_simd_size FFTSize = N1024;
// proto-types
void sinf_simd(float *x,float *y);
main()
{
	// create input data
	U32 i;
	float x[2];
	for(i=0;i<FFTSize;i=i+2)
	{
		x[0]=2*pi/N*i;
		x[1]=2*pi/N*(i+1);
		sinf_simd(x,&redata[i]);
	}
	// initialize object, simaliar to a constructor
	cfft_simd_init(&Rad2,	// pointer to fft object
						FFTSize,		// size of fft, must be a power of 2
						redata,		// pointer to real input data of size N
						imdata,		// pointer to imag input data of size N
						retwid,		// pointer to real twiddle array of size N/2
						imtwid,		// pointer to imag twiddle array of size N/2
						refft,		// pointer to real temp array of size N
						imfft);		// pointer to imag temp array of size N
	// calculate the twiddle factors
	cfft_simd_twiddle_calc(&Rad2);
	// run the fft
	cfft_simd(&Rad2);
	
}
*
* Cycle Counts:
*
*            FFT_Size      Cycles*
*            --------      ------
*               64           639
*              128          1191
*              256          2398
*              512          5045
*             1024         10828
*             2048         23395
*				  4096			50554
*
*                         *cycle counts include the basic overheads in
*                          argument passing and in calling the function
*
* Author:	    BL, ADI
*
* Revisions:
*				Modified to be object oriented and C callable. June, 2003 
*				Darrel Judd
*				Judd Labs, Inc.
*				801-756-2057
*				drjudd@ieee.org
*******************************************************************************/
/********* Include for symbolic definition of system register bits ********/
#include "def21262.h"
// macros for C callabe assembly
#include <asm_sprt.h>
/************************** Defines ***************************************/

//==========================================================================
// defines used to access fft object of type Tcfft_simd
//==========================================================================
#define CFFT_REAL_INPUT 	0
#define CFFT_IMAG_INPUT 	1
#define CFFT_REAL_FFT		2
#define CFFT_IMAG_FFT		3
#define CFFT_REAL_FFTp8		4
#define CFFT_IMAG_FFTp8		5
#define CFFT_REAL_FFTpN4 	6
#define CFFT_IMAG_FFTpN4 	7
#define CFFT_REAL_INPUTpN2	8
#define CFFT_IMAG_INPUTpN2	9
#define CFFT_TWID_REAL		10
#define CFFT_TWID_IMAG		11
#define CFFT_IREDM			12
#define CFFT_BRMODIFY		13
#define CFFT_IREPM			14
#define CFFT_N 				15
#define CFFT_N2				16
#define CFFT_N4				17
#define CFFT_mN4			18
#define CFFT_N8				19
#define CFFT_N2p2			20
#define CFFT_mN2			21
#define CFFT_N4m1			22
#define CFFT_STAGESm5		23
/*************************** Variables ************************************/
.SEGMENT/DM             seg_dmda;
.ALIGN 2;
.var SaveStack[2];
.ENDSEG;

.SEGMENT/PM             seg_pmco;
/*______________________________begin FFT__________________________________*/
.global _cfft_simd;
_cfft_simd:
		// bit reverse input addresses
		entry;
		dm(SaveStack+0)=mode1;	
		bit set MODE1 BR0 | 		// Bit Reverse Addressing for i0
						  BR8 | 	// Bit Reverse Addressing for i8
						  RND32 | 	// Rount to 32 bit floating point
						  CBUFEN | 	// Enable circular buffering
						  PEYEN | 	// Enable PEY for simd
						  BDCST1 | 	// Broadcast mode when using i1
						  BDCST9 | 	// Broadcast mode when using i9
						  SRD1H | 	// Use alternate registers for dag1 high
						  SRD1L | 	// Use alternate registers for dag1 low
						  SRD2H | 	// Use alternate registers for dag2 high
						  SRD2L | 	// Use alternate registers for dag2 low
						  SRRFH | 	// Use alternate data registers high
						  SRRFL;	// Use alternate data registers low
						  
		dm(SaveStack+1)=r4;		// save pointer to cfft object, this instruction also allows the mode1 reg to settle				  
								// from this point on alternate registers are used
		i14=dm(SaveStack+1);	// pointer to cfft_simd object stored in alternate i14		
		b0=pm(CFFT_IREDM,i14);	// read bit reversed pointer to real input
		m0 = pm(CFFT_BRMODIFY,i14);		// bit reversed modifier to read input arrays
		m8 = pm(CFFT_BRMODIFY,i14);		// bit reversed N/2
		b8=pm(CFFT_IREPM,i14);			// read bit reversed pointer to imag input
		b5 = pm(CFFT_REAL_FFT,i14);		// Pointer to intermediate array
		l5 = pm(CFFT_N,i14);			// read size of fft
		b4 = pm(CFFT_REAL_FFT,i14);		// Pointer to intermediate array
		l4 = pm(CFFT_N,i14);			// read size of fft
		b3 = pm(CFFT_REAL_FFT,i14);		// Pointer to intermediate array
		l3 = pm(CFFT_N,i14);			// read size of fft
		b2 = pm(CFFT_REAL_FFT,i14);		// Pointer to intermediate array
		l2 = pm(CFFT_N,i14);			// read size of fft;
		b13 = pm(CFFT_IMAG_FFT,i14);	// Pointer to intermediate array
		l13 = pm(CFFT_N,i14);			// read size of fft;
		b12 = pm(CFFT_IMAG_FFT,i14);	// Pointer to intermediate array
		l12 = pm(CFFT_N,i14);			// read size of fft;
		b11 = pm(CFFT_IMAG_FFT,i14);	// Pointer to intermediate array
		l11 = pm(CFFT_N,i14);			// read size of fft;
		b1 = pm(CFFT_TWID_REAL,i14);	// Pointer to cosine table
		l1 = pm(CFFT_N2,i14);			// read N/2
		b9 = pm(CFFT_TWID_IMAG,i14);	// Pointer to sine table
		l9 = pm(CFFT_N2,i14);			// read N/2

		m1 = 2;
		m9 = 2;

/****************** Stage 1, 2 and Bitrev ******************/

		                        f0=dm(i0,m0), f8=pm(i8,m8);						/* prime the pipeline */
		                        f1=dm(i0,m0), f9=pm(i8,m8);	
		f4=f0+f1,    f0=f0-f1,  f2=dm(i0,m0), f10=pm(i8,m8);		
		f12=f8+f9,   f13=f8-f9, f3=dm(i0,m0), f11=pm(i8,m8);

		f6=f2+f3,    f3=f2-f3,    b10=b11;			
		f14=f10+f11, f15=f10-f11, l10=l11;
		m14=pm(CFFT_N8,i14);
		lcntr = m14, do _in_loop1 until lce;

			f5=f0+f15,   f7=f0-f15,   f0=dm(i0,m0), f8=pm(i8,m8);			
			f12=f12+f14, f14=f12-f14, f1=dm(i0,m0), f9=pm(i8,m8);
			f4=f4+f6,    f6=f4-f6,    f2=dm(i0,m0), f10=pm(i8,m8); 	
			f15=f13+f3,  f13=f13-f3,  f3=dm(i0,m0), f11=pm(i8,m8);

			f4=f0+f1,    f0=f0-f1,    dm(i2,m1)=f4, pm(i10,m9)=f12;
			f12=f8+f9,   f13=f8-f9,   dm(i2,m1)=f5, pm(i10,m9)=f13;
			f6=f2+f3,    f3=f2-f3,    dm(i2,m1)=f6, pm(i10,m9)=f14;			
_in_loop1:	f14=f10+f11, f15=f10-f11, dm(i2,m1)=f7, pm(i10,m9)=f15;


/******************** Stages ***********************/
_end_1:
		m4 = -4;
		m3 = 12;
		m2 = pm(CFFT_N4,i14);			// read N/4						
		i4 = pm(CFFT_REAL_FFTp8,i14);	// read pointer to real_fft+8
		i12 = pm(CFFT_IMAG_FFTp8,i14);	// read pointer to imag_fft+8
		r3=pm(CFFT_STAGESm5,i14);
		lcntr = r3, do _out_loop until lce; // loop STAGES-5 times

			r3 = m4;
			r1 = r3 + r3, r0 = m3;
			r3 = - r3, m4 = r1;
			r1 = r0 + r0, m12 = r1;
fft.asm - 源码说明

本页面展示了「在ADSP-2126x上编写的优化过的FFT程序（用c和汇编编写）。」中的 fft.asm 源码文件，采用汇编编程语言编写，共 548 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ADSP相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?