📄 hhcfft4.asm
字号:
/*******************************************************************************
*
* Function: _Rad4FFT - Fast complex floating point FFT
*
* Synopsis: #include "Rad4FFT.h"
* void Rad4FFT();
* Description:
* This routine performs a single channel SIMD, complex, radix 4 Fast Fourier
* Transform (FFT). The FFT length (N) must be a power of 4 and a minimum of
* 64 points. The real part of the input data is placed in DM and the complex
* part in PM. This data is destroyed during the course of the computation.
* The real and complex output of the FFT is placed in separate locations in DM.
*
* Since this routine takes care of all necessary address digit-reversals, the
* input and output data are in normal order. The digit reversal is accomplished
* by using a modified radix 4 butterfly throughout which swaps the inner two
* nodes resulting with bit reversed data. The digit reversal is completed by
* bit reversing the real data in the final stage and then bit reversing the
* imaginary so that it ends up in DM.
*
* To implement an inverse FFT, you only have to (1) swap the incoming datas real
* and imaginary parts, (2) run the forward FFT, (3) swap the outgoing datas
* real and imaginary parts and (4) scale the data by 1/N.
*
*
*
* Before calling the Rad4FFT function. The user must do the following:
* 1. Create 2 arrays, float real_input[N] and float imag_input[N].
* Where N is the size of the fft computation, and must be a
* power of 4. These arrays must be aligned on a
* multilple of N boundary.
* 2. Create 2 arrays, float real_fft[N] and float imag_fft[n].
* These arrays hold the final results for the algorithm,
* and any data in them will be written over.
* 3. Create 2 arrays, float twid_real[N*3/4] and float twid_imag[N*3/4].
* These arrays will contain the complex twiddle factors used in
* Calculating the FFT.
*
* 4. Calculate the twiddle factors using the function:
* void Rad4_Twiddle_Calc(TCFFT4_simd_size length,float *tc,float pm *ts)
* 5. Initialize the FFT by calling the function:
* void Rad4FFT_Init(
* TCFFT4_simd_size N,// size of the FFT (power of 4)
* float *redata, // real input data
* float pm *imdata, // imaginary input data
* float *refft, // real output data
* float pm *imfft, // imaginary output data
* float *retwid, // cosine twiddle factors
* float pm *imtwid);// sine twiddle factors
* 6. Populate the input arrays and call Rad4FFT.
*
* Note that the names of the arrays in the above procedure are used as
* examples and are not unique.
*
* Note that in order to compute a different size FFT, change
* the input or output arrays all the steps listed above
* must be completed again, except calculating the twiddle
* factors if N does not change.
*
*
* Assumptions:
* All arrays must start on even address boundaries.
*
* Example:
*
* #define N 4096 // define fft size
* // declare arrays for input data
* float redata[N];
* float pm imdata[N];
* // declare arrays for output data aligned on N boundaries
* #pragma align 4096
* float refft[N];
* #pragma align 4096
* float pm imfft[N];
* // declare arrays for twiddle factors
* #pragma align 2
* float retwid[N*3/4];
* #pragma align 2
* float pm imtwid[N*3/4];
* // FFTSize is an emurerated type, (power of 4)
* TCFFT4_simd_size FFTSize = N4096;
* // Calculate twiddle factors
* Rad4_Twiddle_Calc(FFTSize,retwid,imtwid);
* // Initialize the FFT
* Rad4FFT_Init(
* FFTSize,
* &redata[0],
* &imdata[0],
* refft,imfft,
* retwid,
* imtwid);
* // Calculate FFT
* Rad4FFT();
*
* Memory Usage:
* pm code = 200 words, pm data = 1.75*N words, dm data = 3.75*N words
*
* Cycle Counts:
*
* FFT_Size Cycles*
* -------- ------
* 64 769
* 256 3538
* 1024 17203
* 4096 82516
*
* *cycle counts include the basic overheads in
* argument passing and in calling the function
*
* Author: Karl Schwarz & Raimund Meyer, Universitaet Erlangen Nuernberg
*
* Revisions:
* 25-APR-91, RY
* 18-JUN-91, RY
* 05-MAR-99, RFG
* 24-MAY-00, GGL
* 08-JUN-00, GGL verified in 0.1 Si
* Modified to be C callable. June, 2003
* Darrel Judd
* Judd Labs, Inc.
* 801-756-2057
* drjudd@ieee.org
*******************************************************************************/
//==============================================================================
// include files
//==============================================================================
#include "def21160.h" /* Symbol Definition File */
#include <asm_sprt.h>
//==============================================================================
// global declarations
//==============================================================================
.global _Rad4FFT_Init;
.global _Rad4FFT_ORE; /*bitrev(32 bit addr of output real in dm), addr is 0,N,2N,3N,...*/
.global _Rad4FFT_OIM; /*bitrev(24 bit addr of output imag. in dm), addr is 0,N,2N,3N,...*/
.global _Rad4FFT_OST; /*bitrev(32 bit N/2)*/
.global _Rad4FFT_N;
.global _Rad4FFT_N8;
.global _Rad4FFT_StagesM2;
.global _Rad4FFT_N4M1;
.global _Rad4FFT_N16;
.global _Rad4FFT_N16T3;
.global _Rad4FFT_redata;
.global _Rad4FFT_imdata;
.global _Rad4FFT_cosine;
.global _Rad4FFT_sine;
.global _Rad4FFT_redataPN4;
.global _Rad4FFT_redataPN2;
.global _Rad4FFT_redataPN34;
.global _Rad4FFT_imdataPN4;
.global _Rad4FFT_imdataPN2;
.global _Rad4FFT_imdataPN34;
//===========================================================================
// varialble declarations
//===========================================================================
// parameters passed by memory
.section/dm seg_dmda;
.var _Rad4FFT_ORE; // bit reversed address of real input
.var _Rad4FFT_OIM; // bit reversed address of imaginary input
.var _Rad4FFT_OST; // bit reverse of N/2
.var _Rad4FFT_N; // FFT size
.var _Rad4FFT_N8; // N/8
.var _Rad4FFT_StagesM2; // log2(N)-2
.var _Rad4FFT_N4M1; // N/4-1
.var _Rad4FFT_N16; // N/16
.var _Rad4FFT_N16T3; // N*3/16
.var _Rad4FFT_redata; // Real input address
.var _Rad4FFT_imdata; // Imaginary input address
.var _Rad4FFT_cosine; // address of real twiddle factors
.var _Rad4FFT_sine; // address of imaginary twiddle factors
.var _Rad4FFT_redataPN4; // redata + N/4
.var _Rad4FFT_redataPN2; // redata + N/2
.var _Rad4FFT_redataPN34; // redata + N*3/4
.var _Rad4FFT_imdataPN4; // imdata + N/4
.var _Rad4FFT_imdataPN2; // imdata + N/2
.var _Rad4FFT_imdataPN34; // imdata + N*3/4
.align 2;
.var SaveStack[15]; // register storage
//=============================================================================
// program memory code
//=============================================================================
.section/pm seg_pmco;
/*_______first stage radix-4 butterfly without twiddles______*/
.global _Rad4FFT;
_Rad4FFT:
dm(SaveStack)=mode1; // save mode1 register
bit set mode1 SRD1H | // use alternate dag1 high
SRD1L | // use alternate dag1 low
SRD2H | // use alternate dag2 high
SRD2L | // use alternate dag2 low
SRRFH | // use alternate registers high
SRRFL | // use alternate registers low
RND32 | // round to 32 bits
CBUFEN | // circular buffer enable
PEYEN ; // enable PEY (SIMD)
nop; // 1 cycle latency before we can modify the alternate dags
// set l regs to 0
l0=0;l1=0;l2=0;l3=0;l4=0;l5=0;l6=0;l7=0;
l8=0;l9=0;l10=0;l11=0;l12=0;l13=0;l14=0;l15=0;
// load addresses of arrays into dags
i0=dm(_Rad4FFT_redata);
i1=dm(_Rad4FFT_redataPN4);
i2=dm(_Rad4FFT_redataPN2);
b3=dm(_Rad4FFT_redataPN34);
i8=dm(_Rad4FFT_imdata);
i9=dm(_Rad4FFT_imdataPN4);
i10=dm(_Rad4FFT_imdataPN2);
b11=dm(_Rad4FFT_imdataPN34);
m5=4;
m10=m5;
m1=-2;
m9=m1;
m0=2; /* first stage modify values */
m8=2; /* first stage modify values */
f0=dm(i0,m0), f1=pm(i8,m8);
f2=dm(i2,m0), f3=pm(i10,m8);
f0=f0+f2, f2=f0-f2, f4=dm(i1,m0), f5=pm(i9,m8);
f1=f1+f3, f3=f1-f3, f6=dm(i3,m0), f7=pm(i11,m8);
f4=f6+f4, f6=f6-f4;
f5=f5+f7, f7=f5-f7;
f8=f0+f4, f9=f0-f4;
f10=f1+f5, f11=f1-f5;
r0=dm(_Rad4FFT_N8);
lcntr=r0, do fstage until lce; /* do N/4 simple radix-4 butterflies */
f12=f2+f7, f13=f2-f7, f0=dm(i0,m1), f1=pm(i8,m9);
f14=f3+f6, f15=f3-f6, f2=dm(i2,m1), f3=pm(i10,m9);
f0=f0+f2, f2=f0-f2, f4=dm(i1,m1), f5=pm(i9,m9);
f1=f1+f3, f3=f1-f3, f6=dm(i3,m1), f7=pm(i11,m9);
f4=f6+f4, f6=f6-f4, dm(i0,m5)=f8, pm(i8,m10)=f10;
f5=f5+f7, f7=f5-f7, dm(i1,m5)=f9, pm(i9,m10)=f11;
f8=f0+f4, f9=f0-f4, dm(i2,m5)=f12, pm(i10,m10)=f14;
fstage:
f10=f1+f5, f11=f1-f5, dm(i3,m5)=f13, pm(i11,m10)=f15;
bit clr mode1 PEYEN; /* added for testing SIMD */
m0=1;
m8=1;
/*_____________Middle stages with radix-4 main butterfly___________________*/
/* m0=1 and m8=1 is still preset */
// m1=-2; /* reverse step for twiddles */
// m9=m1;
m2=3; /* forward step for twiddles */
m10=m2;
//m5=4; /* first there are 4 groups */
r2=dm(_Rad4FFT_N16); /* with N/16 butterflies in each group */
r3=dm(_Rad4FFT_N16T3); /* step to next group */
r0=dm(_Rad4FFT_StagesM2);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -