📄 fft.asm
字号:
/*******************************************************************************
*
* Function: CFFTF - Fast complex floating point FFT
*
* Synopsis: #include <filter.h>
* void cfft_simd(Tcfft_simd pm *pfft);
* Description:
* The cfft_simd function transforms the time domain complex input
* signal sequence to the frequency domain by using the accelerated
* version of the Discrete Fourier Transform known as a Fast Fourier
* Transform or FFT. It will decimate in frequency using an
* optimized radix-2 algorithm.
*
* An object oriented aproach was implemented to allow a clear division
* between algorithm and data to be processed. Thus allowing many FFT objects to
* exist at the same time. Since each instance of the
* FFT objects are initialized only once, and the parameters for the algorithm
* are stored as a unique identifier for each object, time is saved in the actual
* calling of the FFT algorithm. Only one parameter is passed, a pointer to the
* FFT object.
*
* Before calling the cfft_simd function. The user must do the following:
* 1. Create 2 arrays, float real_input[N] and float imag_input[N].
* Where N is the size of the fft computation, and is of type
* Tcfft_simd_size. The output of the fft is also passed in
* these arrays. If the input data is to be preserved it must first
* be copied to a safe location before calling this function.
* 2. Create 2 arrays, float real_fft[N] and float imag_fft[n].
* These arrays hold intermidate results for the algorithm,
* and any data in them will be written over.
* 3. Create 2 arrays, float twid_real[N/2] and float twid_imag[N/2].
* These arrays will contain the complex twiddle factors used in
* Calculating the FFT.
* 4. Create a structure of type Tcfft_simd. This structure will contain
* all of the information required by cfft_simd to compute the fft.
* Tcfft_simd is defined in the file cfft_simd.h.
* 5. Calculate the twiddle factors using the function:
* void cfft_simd_twiddle_calc(Tcfft_simd pm *pfft);
* 6. Initialize the sturcture created in step 4 by calling the function:
* void cfft_simd_init(Tcfft_simd pm *pfft, // pointer to fft object
* Tcfft_simd_size N, // size of fft, must be a power of 2
* float *reinput, // pointer to real input data of size N
* float pm *imimput, // pointer to imag input data of size N
* float *retwid, // pointer to real twiddle array of size N/2
* float pm *imtwid, // pointer to imag twiddle array of size N/2
* float *refft, // pointer to real temp array of size N
* float pm *imfft); // pointer to imag temp array of size N
* 7. Populate the input arrays and call cfft_simd.
*
* Note that the names of the arrays in the above procedure are used as
* examples and are not unique.
*
* When the above steps are followed a cfft_simd object is created.
* Other cfft_simd objects can be created by repeating these steps.
* If the new object is the same size the twiddle factor arrays
* can be reused.
*
* The cfftz_simd function has been designed for optimum performance and
* requires that the input arrays are aligned on an
* address boundary that is a multiple of the FFT size. For certain
* applications, this alignment constraint may not be appropriate;
* in such cases, the application should call the cfftN function
* instead with no loss of facility (apart from performance).
*
* It is recommended that the arrays containing real parts are allocated
* in separate memory blocks from the arrays containing imaginary parts,
* otherwise the performance of the function will degrade.
*
* Warning: The cfft_simd function uses the alternate dags and register files.
* Information in these registers will be destroyed when the function is
* called.
*
* Assumptions:
* All arrays must start on even address boundaries.
*
* Example:
*
#include "cfft_simd.h"
#define N (1024)
#define pi (3.141592654)
Tcfft_simd pm Rad2;
// intermediate arrays
float refft[N];
float pm imfft[N];
// twiddle factor arrays
float retwid[N/2];
float pm imtwid[N/2];
// input arrays
#pragma align 1024 // must be on a multiple of N boundary
float redata[N];
#pragma align 1024 // must be on a multiple of N boundary
float pm imdata[N];
// By using an enumerated type only powers of 2 will be used for the size.
Tcfft_simd_size FFTSize = N1024;
// proto-types
void sinf_simd(float *x,float *y);
main()
{
// create input data
U32 i;
float x[2];
for(i=0;i<FFTSize;i=i+2)
{
x[0]=2*pi/N*i;
x[1]=2*pi/N*(i+1);
sinf_simd(x,&redata[i]);
}
// initialize object, simaliar to a constructor
cfft_simd_init(&Rad2, // pointer to fft object
FFTSize, // size of fft, must be a power of 2
redata, // pointer to real input data of size N
imdata, // pointer to imag input data of size N
retwid, // pointer to real twiddle array of size N/2
imtwid, // pointer to imag twiddle array of size N/2
refft, // pointer to real temp array of size N
imfft); // pointer to imag temp array of size N
// calculate the twiddle factors
cfft_simd_twiddle_calc(&Rad2);
// run the fft
cfft_simd(&Rad2);
}
*
* Cycle Counts:
*
* FFT_Size Cycles*
* -------- ------
* 64 639
* 128 1191
* 256 2398
* 512 5045
* 1024 10828
* 2048 23395
* 4096 50554
*
* *cycle counts include the basic overheads in
* argument passing and in calling the function
*
* Author: BL, ADI
*
* Revisions:
* Modified to be object oriented and C callable. June, 2003
* Darrel Judd
* Judd Labs, Inc.
* 801-756-2057
* drjudd@ieee.org
*******************************************************************************/
/********* Include for symbolic definition of system register bits ********/
#include "def21262.h"
// macros for C callabe assembly
#include <asm_sprt.h>
/************************** Defines ***************************************/
//==========================================================================
// defines used to access fft object of type Tcfft_simd
//==========================================================================
#define CFFT_REAL_INPUT 0
#define CFFT_IMAG_INPUT 1
#define CFFT_REAL_FFT 2
#define CFFT_IMAG_FFT 3
#define CFFT_REAL_FFTp8 4
#define CFFT_IMAG_FFTp8 5
#define CFFT_REAL_FFTpN4 6
#define CFFT_IMAG_FFTpN4 7
#define CFFT_REAL_INPUTpN2 8
#define CFFT_IMAG_INPUTpN2 9
#define CFFT_TWID_REAL 10
#define CFFT_TWID_IMAG 11
#define CFFT_IREDM 12
#define CFFT_BRMODIFY 13
#define CFFT_IREPM 14
#define CFFT_N 15
#define CFFT_N2 16
#define CFFT_N4 17
#define CFFT_mN4 18
#define CFFT_N8 19
#define CFFT_N2p2 20
#define CFFT_mN2 21
#define CFFT_N4m1 22
#define CFFT_STAGESm5 23
/*************************** Variables ************************************/
.SEGMENT/DM seg_dmda;
.ALIGN 2;
.var SaveStack[2];
.ENDSEG;
.SEGMENT/PM seg_pmco;
/*______________________________begin FFT__________________________________*/
.global _cfft_simd;
_cfft_simd:
// bit reverse input addresses
entry;
dm(SaveStack+0)=mode1;
bit set MODE1 BR0 | // Bit Reverse Addressing for i0
BR8 | // Bit Reverse Addressing for i8
RND32 | // Rount to 32 bit floating point
CBUFEN | // Enable circular buffering
PEYEN | // Enable PEY for simd
BDCST1 | // Broadcast mode when using i1
BDCST9 | // Broadcast mode when using i9
SRD1H | // Use alternate registers for dag1 high
SRD1L | // Use alternate registers for dag1 low
SRD2H | // Use alternate registers for dag2 high
SRD2L | // Use alternate registers for dag2 low
SRRFH | // Use alternate data registers high
SRRFL; // Use alternate data registers low
dm(SaveStack+1)=r4; // save pointer to cfft object, this instruction also allows the mode1 reg to settle
// from this point on alternate registers are used
i14=dm(SaveStack+1); // pointer to cfft_simd object stored in alternate i14
b0=pm(CFFT_IREDM,i14); // read bit reversed pointer to real input
m0 = pm(CFFT_BRMODIFY,i14); // bit reversed modifier to read input arrays
m8 = pm(CFFT_BRMODIFY,i14); // bit reversed N/2
b8=pm(CFFT_IREPM,i14); // read bit reversed pointer to imag input
b5 = pm(CFFT_REAL_FFT,i14); // Pointer to intermediate array
l5 = pm(CFFT_N,i14); // read size of fft
b4 = pm(CFFT_REAL_FFT,i14); // Pointer to intermediate array
l4 = pm(CFFT_N,i14); // read size of fft
b3 = pm(CFFT_REAL_FFT,i14); // Pointer to intermediate array
l3 = pm(CFFT_N,i14); // read size of fft
b2 = pm(CFFT_REAL_FFT,i14); // Pointer to intermediate array
l2 = pm(CFFT_N,i14); // read size of fft;
b13 = pm(CFFT_IMAG_FFT,i14); // Pointer to intermediate array
l13 = pm(CFFT_N,i14); // read size of fft;
b12 = pm(CFFT_IMAG_FFT,i14); // Pointer to intermediate array
l12 = pm(CFFT_N,i14); // read size of fft;
b11 = pm(CFFT_IMAG_FFT,i14); // Pointer to intermediate array
l11 = pm(CFFT_N,i14); // read size of fft;
b1 = pm(CFFT_TWID_REAL,i14); // Pointer to cosine table
l1 = pm(CFFT_N2,i14); // read N/2
b9 = pm(CFFT_TWID_IMAG,i14); // Pointer to sine table
l9 = pm(CFFT_N2,i14); // read N/2
m1 = 2;
m9 = 2;
/****************** Stage 1, 2 and Bitrev ******************/
f0=dm(i0,m0), f8=pm(i8,m8); /* prime the pipeline */
f1=dm(i0,m0), f9=pm(i8,m8);
f4=f0+f1, f0=f0-f1, f2=dm(i0,m0), f10=pm(i8,m8);
f12=f8+f9, f13=f8-f9, f3=dm(i0,m0), f11=pm(i8,m8);
f6=f2+f3, f3=f2-f3, b10=b11;
f14=f10+f11, f15=f10-f11, l10=l11;
m14=pm(CFFT_N8,i14);
lcntr = m14, do _in_loop1 until lce;
f5=f0+f15, f7=f0-f15, f0=dm(i0,m0), f8=pm(i8,m8);
f12=f12+f14, f14=f12-f14, f1=dm(i0,m0), f9=pm(i8,m8);
f4=f4+f6, f6=f4-f6, f2=dm(i0,m0), f10=pm(i8,m8);
f15=f13+f3, f13=f13-f3, f3=dm(i0,m0), f11=pm(i8,m8);
f4=f0+f1, f0=f0-f1, dm(i2,m1)=f4, pm(i10,m9)=f12;
f12=f8+f9, f13=f8-f9, dm(i2,m1)=f5, pm(i10,m9)=f13;
f6=f2+f3, f3=f2-f3, dm(i2,m1)=f6, pm(i10,m9)=f14;
_in_loop1: f14=f10+f11, f15=f10-f11, dm(i2,m1)=f7, pm(i10,m9)=f15;
/******************** Stages ***********************/
_end_1:
m4 = -4;
m3 = 12;
m2 = pm(CFFT_N4,i14); // read N/4
i4 = pm(CFFT_REAL_FFTp8,i14); // read pointer to real_fft+8
i12 = pm(CFFT_IMAG_FFTp8,i14); // read pointer to imag_fft+8
r3=pm(CFFT_STAGESm5,i14);
lcntr = r3, do _out_loop until lce; // loop STAGES-5 times
r3 = m4;
r1 = r3 + r3, r0 = m3;
r3 = - r3, m4 = r1;
r1 = r0 + r0, m12 = r1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -