📄 rfft_fr16.asm
字号:
/************************************************************************
*
* rfft_fr16.asm : $Revision: 1.3 $
*
* (c) Copyright 2000-2003 Analog Devices, Inc. All rights reserved.
*
************************************************************************/
#if 0
N point radix 2 real input FFT
Synopsis
#include <filter.h>
void rfft_fr16(
const fract16 in[] /* input sequence */
complex_fract16 t[]; /* temporary working buffer */
complex_fract16 out[]; /* output sequence */
const complex_fract16 w[]; /* twiddle sequence */
int wst; /* twiddle factor stride */
int n; /* number of FFT points */
int *block_exponent; /* block exponent of output data */
int scale method; /* scaling method desired 0-none,
1-static, 2-dynamic */
Description
This function transforms the time domain real input signal sequence to
the frequency domain by using the radix-2 FFT. The function takes
advantage of the fact that the imaginary part of the input equals zero,
which in turn eliminates half of the multiplications in the butterfly.
The size of the input array in, the output array out, and the temporary
working buffer t is n, where n represents the number of points in the FFT.
The function does not impose any special memory alignment requirements
on the arrays. However, benefits in run-time performance will be
realized if the output array is allocated on an address boundary that is
multiple of twice the FFT size. If the input data can be overwritten, then
optimum memory usage can be achieved by specifying the input array as
either the output array or as the temporary array provided that the memory
size of the input array is at least 2*n. Specifying the input array as the
temporary array will also result in increased run-time performance.
The twiddle table is passed in the argument w, which must contain at least
n/2 twiddle factors. The function twidfftrad2_fr16 may be used to initialize
the array. If the twiddle table contains more factors than needed for
a particular call on rfft_fr16, then the stride factor has to be set
appropriately; otherwise it should be 1.
The argument scale_method controls how the function should scale the
output to avoid overflow. If no scaling is selected by setting scale_method
to zero, then the input signal should be sufficiently conditioned to avoid
overflow. The block_exponent argument will be set to zero.
The function will perform static scaling if scale_method is set to 1. For
static scaling, the function will scale intermediate results to prevent
overflow. The final output will be scaled by 1/n, and block_exponent will be
set to log2(n).
If scale_method is set to 2, then the function will select dynamic
scaling. Under dynamic scaling, the function will inspect the intermediate
results and will only scale to avoid overflow. Dynamic scaling therefore
minimizes loss of precision but at the possible cost of slightly reduced
performance. The block_exponent argument will be set to a value between
0 (which indicates that no scaling was performed) and log2(n) (as if static
scaling was performed).
Domain
Input sequence length n must be a power of two and at least 8.
#endif
/* parameters, offset from FP (I5) */
#define IN_DATA 1
#define TMP_BUF 2
#define OUT_DATA 3
#define TWIDDLES 4
#define TWID_STRIDE 5
#define FFT_SIZE 6
#define FFT_EXPON_ADDR 7
#define SCALE_METHOD 8
/* scale methods */
#define NO_SCALE 0
#define STATIC_SCALE 1
#define DYNAMIC_SCALE 2
#include "lib_fft_funcs.h"
/* local data in preserved stack area, offset from SP (I4) */
#define FFT_SIZE2 M5
.section/code program;
.extern __cfft_fr16, DS_FUNC, SS_FUNC, NOS_FUNC, BREV_FUNC_IR;
.global __rfft_fr16;
__rfft_fr16: // real fft radix-2 implementation
/* some sanity checks */
AX1 = DM( I4 + FFT_SIZE ); // load n
AF = AX1 - 8;
IF LT RTS; // if n<8, return
//------------------------DO CFFT----------------------------------------------
/* We implement rfft using cfft of half input size. The cfft input
* has even inputs in reals and odd inputs in imags. A final trigonometric
* pass generates the correct rfft output.
*/
AR = DM ( I4 + TWID_STRIDE ); // load twiddle stride parameter
SR = LSHIFT AR BY 1 (LO); // twiddle stride*=2
DM ( I4 + TWID_STRIDE ) = SR0; // store new twiddle stride parameter
CALL __cfft_fr16 (DB); // CALL to cfft
SR = LSHIFT AX1 BY -1 (LO); // SR0 = n/2
DM( I4 + FFT_SIZE ) = SR0; // store new size
//------------------------FUNCTION PROLOGUE-------------------------------------
DM(I4 += M5) = I5, I5 = I4; // save preserved registers
DM(I4 += M5) = I2;
DM(I4 += M5) = I3;
DM(I4 += M5) = I7;
DM(I4 += M5) = M0;
//------------------------FINAL SCALE-------------------------------------------
DIS M_MODE, ENA AR_SAT; // set fractional multiplier and ALU sat
AR = DM(I5 + OUT_DATA);
I7 = AR; // I7 -> out
/* get scale function and call for final time */
AR = DM ( I5 + SCALE_METHOD );
AF = AR - NO_SCALE;
IF EQ JUMP scale_done (DB);
SE = -1;
M1 = 1;
IJPG = PAGE( DS_FUNC ); // set default top 8-bit address for
// scale jumps. all three functions
// use a common implementation so do
// not need to set IJPG for each method
AY1 = DM ( I5 + FFT_EXPON_ADDR ); // get block exponent address
AF = AR - DYNAMIC_SCALE;
IF EQ JUMP do_scale (DB);
I6 = ADDRESS(DS_FUNC);
I3 = AY1;
I6 = ADDRESS(SS_FUNC);
AY1 = DM ( I3 + 0 ); // get block exponent
AR = AY1 + 1; // add 1 to current
DM ( I3 + 0 ) = AR; // save new block exponent
do_scale:
// requires: SE=-1, M1==1, I3==blk_exp addr, I7==output addr, fract MAC mode,
// 2 * n held at I4-1(written by previous cfft call)
CALL (I6);
SE = -1; // changed by dynamic scale
scale_done:
//------------------------TRIG-RECOMBINATION------------------------------------
#if 0
example trigonometric recombination C code:
rfftroutN[0] = rfftroutNover2[0] + rfftioutNover2[0];
rfftioutN[0] = 0;
for (k=1; k<Nover2; k++) {
Realsum= ( rfftroutNover2[k] + rfftroutNover2[(Nover2-k)] ) / 2;
Imagsum= ( rfftioutNover2[k] + rfftioutNover2[(Nover2-k)] ) / 2;
Realdif= ( rfftroutNover2[k] - rfftroutNover2[(Nover2-k)] ) / 2;
Imagdif= ( rfftioutNover2[k] - rfftioutNover2[(Nover2-k)] ) / 2;
rfftroutN[k] = (Realsum +
(((unsigned long)twid_real[k] * Imagsum)>>15) -
((((unsigned long)twid_imag[k] * Realdif))>>15));
rfftioutN[k] = (Imagdif -
(((long)-twid_imag[k] * Imagsum)>>15) -
((((unsigned long)twid_real[k] * Realdif))>>15));
}
for (k=Nover2; k<N; k++) {
rfftroutN[k] = rfftroutN[N-k];
rfftioutN[k] = -(rfftioutN[N-k]);
}
#endif
// copy size/2 complex results to tmp
AY1 = DM(I5 + FFT_SIZE); // get n/2
CNTR = AY1;
I0 = I7; // I0,I6 -> out
I6 = I7;
AR = DM(I5 + TMP_BUF); // I1 -> tmp
I1 = AR;
M3 = 2;
M7 = 2;
MODIFY(I6 += 1); // I6 -> tmp.im
DO copy_to_tmp UNTIL CE;
AX0 = DM(I0 += M3), AY0 = PM(I6 += M7);
DM(I1 += 1) = AX0;
copy_to_tmp: DM(I1 += 1) = AY0;
I0 = AR; // I0,I6 -> tmp
I6 = AR;
I1 = I7; // I1 -> out
AR = AY1 - 1, SR0 = AY1;
CNTR = AR; // for (k=1; k<Nover2; k++) {
AR = SR0 + AY1, AX0 = DM(I0 += M1); // n/2+n/2, AX0 = rfftroutNover2[0]
DM(I5 + FFT_SIZE) = AR; // n re-written(had been changed to n/2)
M7 = AR; // M7,M3 = n/2 complex offset
M3 = AR;
AR = -AR;
M0 = AR; // M0 = -n/2 complex offset
AY0 = DM(I0 += M1); // AY0 = rfftioutNover2[0]
AR = AX0 + AY0, SI = DM(I6 += M7); // rfftroutNover2[0]+rfftioutNover2[0],
// I6 -> tmp[n/2].re
// rfftioutN[0] = 0, written first
// loop iter
AR = AX0 - AY0, DM(I1 += M3) = AR; // rfftroutNover2[0]-rfftioutNover2[0],
// rfftroutN[0] = AR(I1->rfftroutN[n/2])
AR = PASS 0, DM(I1 += M1) = AR; // rfftroutN[n/2] = AR
DM(I1 += M0) = AR; // rfftioutN[n/2] = 0(I1->out[0].im)
SI = DM(I5 + TWID_STRIDE); // load new twiddle stride parameter
SR = LSHIFT SI (LO), AY1 = DM(I6 += M5);
// twiddle stride/=2,
// I6 -> tmp[n/2-1].im
DM(I5 + TWID_STRIDE) = SR0; // restore twiddle stride parameter
M0 = SI; // M0 is twiddle step
AY1 = DM(I5 + TWIDDLES);
I2 = AY1; // I2->twiddles
AR = AY1 + 1, AX0 = DM(I0 += M1); // load rfftroutNover2[1]
AY1 = DM(I6 += M5); // load rfftioutNover2[N/2-1]
SR = 0, AY0 = DM(I6 += M5); // load rfftroutNover2[N/2-1]
I3 = AR; // I3->twiddles(imag offset)
SI = 0x4000; // SI = 0.5
MODIFY(I2 += M0); // I2->twiddles[1]
MODIFY(I3 += M0); // I3->imag twiddles[1]
DO bfly_rfft_tr UNTIL CE;
// Realsum= ( rfftroutNover2[k] + rfftroutNover2[(Nover2-k)] ) / 2;,
AR = AX0 + AY0, MY1 = SI;
MR = AR * MY1 (SS), DM(I1 += M1) = SR1;
// MR ==> Realsum[k],
// rfftioutNover2[k-1] = SR1
// Realdif= ( rfftroutNover2[k] - rfftroutNover2[(Nover2-k)] ) / 2;
AR = AX0 - AY0, MX0 = DM(I2 += M0);
SR = AR * MY1 (RND), AX1 = DM(I0 += M1);
// AX1 = rfftioutNover2[k]
// MX0 ==> twid_real[k]
// Imagsum= ( rfftioutNover2[k] + rfftioutNover2[(Nover2-k)] ) / 2;
AR = AX1 + AY1, MY0 = SR1; // MY0 ==> Realdif[k]
SR = AR * MY1 (RND), AX0 = DM(I0 += M1);
// AX0 = rfftroutNover2[k+1]
// Imagdif= ( rfftioutNover2[k] - rfftioutNover2[(Nover2-k)] ) / 2;
AR = AX1 - AY1, MY1 = SR1; // MY1 ==> Imagsum[k]
SR = ASHIFT AR(HI), MX1 = DM(I3 += M0);
// SR ==> Imagdif[k]
// MX1 ==> twid_imag[k]
// rfftroutN[k] = (Realsum +
// (((unsigned long)twid_real[k] * Imagsum)>>15) -
// ((((unsigned long)-twid_imag[k] * Realdif))>>15));
MR = MR + MX0 * MY1 (SS), AY1 = DM(I6 += M5);
MR = MR + MX1 * MY0 (RND), AY0 = DM(I6 += M5);
// AY1 = rfftioutNover2[(Nover2-(k+1))]
// AY0 = rfftroutNover2[(Nover2-(k+1))]
// rfftioutN[k] = (Imagdif -
// (((long)-twid_imag[k] * Imagsum)>>15) -
// ((((unsigned long)twid_real[k] * Realdif))>>15));
SR = SR + MX1 * MY1 (SS), DM( I1 += M1 ) = MR1;
bfly_rfft_tr: SR = SR - MX0 * MY0 (RND);
DM( I1 += M1 ) = SR1;
//------------------------GEN N/2-N OUTPUTS-------------------------------------
I6 = I1;
SI = DM(I5 + FFT_SIZE);
SR = LSHIFT SI BY -1 (LO); // div 2
CNTR = SR0;
// mirror N/2 output in second half
// around the Nyquist point of the
// spectrum to get the negative part
// of the frequency spectrum
I0 = I1; // I0 -> real +ve frequency values
M3 = -2;
M7 = -2;
MODIFY (I6 += 1); // I6 -> imag +ve frequency values
DO rfft_mirror UNTIL CE;
AX0 = DM( I0 += M3 ), AY0 = PM( I6 += M7 );
// read real and modify to previous
// read imag and modify to previous
AR = -AY0, DM( I1 += M1 ) = AX0;
rfft_mirror: DM( I1 += M1 ) = AR;
//------------------------FUNCTION EPILOGUE-------------------------------------
I6 = I5;
ENA M_MODE, DIS AR_SAT; // set integer multiplier and ALU sat
MODIFY (I6 += M5); // restore preserved registers
I2 = DM (I6 += M5);
I3 = DM (I6 += M5);
I7 = DM (I6 += M5);
M0 = DM (I6 += M5);
RTS (DB); // return
I4 = I5;
I5 = DM (I5 += M5);
.__rfft_fr16.end:
// end of file
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -