📄 cfft_fr16.asm
字号:
/************************************************************************
*
* cfft_fr16.asm : $Revision: 1.3 $
*
* (c) Copyright 2000-2003 Analog Devices, Inc. All rights reserved.
*
************************************************************************/
#if 0
N point radix 2 complex input FFT
Synopsis
#include <filter.h>
void cfft_fr16(
const complex_fract16 in[] /* input sequence */
complex_fract16 t[]; /* temporary working buffer */
complex_fract16 out[]; /* output sequence */
const complex_fract16 w[]; /* twiddle sequence */
int wst; /* twiddle factor stride */
int n; /* number of FFT points */
int *block_exponent; /* block exponent of output data */
int scale method; /* scaling method desired 0-none,
1-static, 2-dynamic */
Description
This function transforms the time domain complex input signal sequence
to the frequency domain by using the radix-2 Fast Fourier Transform
(FFT).
The size of the input array in the temporary array t and the output
array out is n, where n represents the number of points in the FFT. The
function does not impose any special memory alignment requirements on the
arrays. However, benefits in run-time performance will be realized if the
output array is allocated on an address boundary that is multiple of twice
the FFT size. If the input data can be overwritten, then optimum memory
usage can be achieved by specifying the input array as either the output
array or as the temporary array. Specifying the input array as the
temporary array will also result in increased run-time performance.
The twiddle table is passed in the argument w, which must contain at least
n/2 twiddle factors. The function twidfftrad2_fr16 may be used to initialize
the array. If the twiddle table contains more factors than needed for
a particular call on cfft_fr16, then the stride factor has to be set
appropriately; otherwise it should be 1.
The argument scale_method controls how the function should scale the
output to avoid overflow. If no scaling is selected by setting scale_method
to zero, then the input signal should be sufficiently conditioned to avoid
overflow. The block_exponent argument will be set to zero.
The function will perform static scaling if scale_method is set to 1. For
static scaling, the function will scale intermediate results to prevent
overflow. The final output will be scaled by 1/n, and block_exponent will
be set to log2(n).
If scale_method is set to 2, then the function will select dynamic
scaling. Under dynamic scaling, the function will inspect the intermediate
results and will only scale to avoid overflow. Dynamic scaling therefore
minimizes loss of precision but at the possible cost of slightly reduced
performance. The block_exponent argument will be set to a value between
0 (which indicates that no scaling was performed) and log2(n) (as if static
scaling was performed).
Domain
Input sequence length n must be a power of two and at least 4.
#endif
/* parameters, offset from FP (I5) */
#define IN_DATA 1
#define TMP_BUF 2
#define OUT_DATA 3
#define TWIDDLES 4
#define TWID_STRIDE 5
#define FFT_SIZE 6
#define FFT_EXPON_ADDR 7
#define SCALE_METHOD 8
/* scale methods */
#define NO_SCALE 0
#define STATIC_SCALE 1
#define DYNAMIC_SCALE 2
#include "lib_fft_funcs.h"
/* local data in preserved stack area, offset from SP (I4) */
#define FFT_SIZE2 M5
.section/code program;
.extern DS_FUNC, SS_FUNC, NOS_FUNC, BREV_FUNC_IR;
.global __cfft_fr16;
__cfft_fr16: // complex fft radix-2 implementation
/* some sanity checks */
AX1 = DM( I4 + FFT_SIZE ); // load n
AF = AX1 - 4;
IF LT RTS; // if n<4, return
//------------------------FUNCTION PROLOGUE-------------------------------------
DM (I4 += M5) = I5, I5 = I4; // save preserved registers
DM (I4 += M5) = I2;
DM (I4 += M5) = I3;
DM (I4 += M5) = I7;
DM (I4 += M5) = M0;
SR = LSHIFT AX1 BY 1 (LO); // save n*2
DM(I4 + FFT_SIZE2) = SR0;
//------------------------BIT-REVERSAL------------------------------------------
/* First the input array is copied to the output buffer in bit-
* reversed order. Uses a routine called
* ___lib_bit_complex_reversal_sort_inregs which will use the DAG1 bit-reversal
* mode when possible.
* ___lib_bit_complex_reversal_sort_inregs params:
* MR1 - input complex array address
* AX1 - length of arrays
* MR0 - output complex array address
* AX0 - temp complex array address
* The fft bit-reversal is done at the start, instead of the more
* conventionally at the end, because this first stage this way only has one
* addition for each data point and will not overflow. An implementation in the
* other direction may easily overflow in the first stage.
*/
AX0 = DM (I5 + TMP_BUF); // complex temp array address
MR1 = DM (I5 + IN_DATA); // complex input array address
CALL BREV_FUNC_IR(DB);
MR0 = DM (I5 + OUT_DATA); // complex tmp buffer address
I7 = MR0; // I7->output complex array
DIS M_MODE, ENA AR_SAT; // set fractional multiplier and ALU sat
/* load block_exponent pointer */
AX0 = DM ( I5 + FFT_EXPON_ADDR );
I3 = AX0; // I3->blk_exponent
/* calculate the number of stages */
SI = DM( I5 + FFT_SIZE ); // load n
SE = EXP SI (HI);
AR = SE; // exponent of n
AY0 = 14;
AR = AR + AY0; // log2(n)
/* get scale method function pointer and initialize block_exponent */
IJPG = PAGE( DS_FUNC ); // set default top 8-bit address for
// scale jumps. all three functions
// use a common implementation so do
// not need to set IJPG for each method
AX0 = DM ( I5 + SCALE_METHOD );
AF = AX0 - NO_SCALE;
IF EQ JUMP scale_set (DB);
AX1 = ADDRESS(NOS_FUNC);
IF EQ AR = PASS 0;
AF = AX0 - DYNAMIC_SCALE;
IF EQ JUMP scale_set (DB);
AX1 = ADDRESS(DS_FUNC);
IF EQ AR = PASS 0;
AX1 = ADDRESS(SS_FUNC);
scale_set:
I2 = AX1; // I2->scale function
DM ( I3 + 0 ) = AR; // blk_exponent = 0 (no_scale)
// blk_exponent = 0 (dynamic_scale)
// blk_exponent =
// log2size (static_scale)
/* Stage 1 and 2 of the butterfly structure are implemented separately. The
* main reason for separating these out from the general computation, is that
* the multiplications with the twiddle values in both these stages can be
* avoided.
*/
//------------------------STAGE 1-----------------------------------------------
/* The first stage of signal flow, there are n/2 number of butterflies. Each
* butterfly works on two inputs. These input are multiplied by w[0] and
* added and subtracted. Multiplying a data with w[0], which is always equal
* to 1+0i, will result the same data.
*/
CALL (I2) (DB); // call scaling routine
// (SI preserved over scaling call)
M1 = 1; // set required values for scaling
SE = -1;
I0 = I7; // I0->output address(xi)
I6 = I7; // I6->output address(xip)
SR = LSHIFT SI BY -1 (LO); // SI holds n, divide by 2
CNTR = SR0; // stage 1 has n/2 butterflies
SI = SR0;
M7 = 1;
M3 = -1;
M0 = 3; M6 = 3;
MODIFY(I6 += 2);
AX0 = DM(I0 += M1), AY0 = PM(I6 += M7);
DO stage1 UNTIL CE;
AR = AX0 + AY0, AX1 = DM(I0 += M3), AY1 = PM(I6 += M5);
AR = AX0 - AY0, DM(I0 += M1) = AR;
AR = AX1 + AY1, DM(I6 += M7) = AR;
AR = AX1 - AY1, DM(I0 += M0) = AR;
DM(I6 += M6) = AR;
stage1: AX0 = DM(I0 += M1), AY0 = PM(I6 += M7);
//------------------------STAGE 2-----------------------------------------------
/* The second stage number of butterflies is n/4. The data are added and
* subtracted after multiplication with w[0] and w[n/4]. The multiplication
* with w[0](1+0i) does not have any impact. The multiplication of data
* x+iy with w[n/4](0-1i) will give the value y-xi.
*/
CALL (I2) (DB); // call scaling routine
// (SI preserved over scaling call)
M1 = 1; // set required values for scaling
SE = -1;
I0 = I7; // I0->output address(xi)
I6 = I7; // I6->output address(xip)
SR = LSHIFT SI BY -1 (LO); // SI holds n/2, divide by 2
CNTR = SR0; // stage 2 has n/4 butterflies
SI = SR0;
M7 = 1;
M3 = -1;
M0 = 5; M6 = 5;
MODIFY(I6 += 4);
AX0 = DM(I0 += M1), AY0 = PM(I6 += M7);
DO stage2 UNTIL CE;
AR = AX0 + AY0, AX1 = DM(I0 += M3), AY1 = PM(I6 += M5);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -