cfft_fr16.asm

来自「本函数利用调用库函数的方法实现了实函数快速傅里叶变换」· 汇编代码 · 共 475 行 · 第 1/2 页
ASM
475 行
/************************************************************************
 *
 * cfft_fr16.asm : $Revision: 1.3 $
 *
 * (c) Copyright 2000-2003 Analog Devices, Inc.  All rights reserved.
 *
 ************************************************************************/

#if 0

   N point radix 2 complex input FFT
  
   Synopsis

     #include <filter.h>
     void cfft_fr16(
          const complex_fract16 in[]    /* input sequence */
          complex_fract16 t[];          /* temporary working buffer */
          complex_fract16 out[];        /* output sequence */
          const complex_fract16 w[];    /* twiddle sequence */
          int wst;                      /* twiddle factor stride */
          int n;                        /* number of FFT points */
          int *block_exponent;          /* block exponent of output data */
          int scale method;             /* scaling method desired 0-none,
                                           1-static, 2-dynamic */

   Description

      This function transforms the time domain complex input signal sequence
   to the frequency domain by using the radix-2 Fast Fourier Transform
   (FFT).

      The size of the input array in the temporary array t and the output 
   array out is n, where n represents the number of points in the FFT. The 
   function does not impose any special memory alignment requirements on the 
   arrays. However, benefits in run-time performance will be realized if the 
   output array is allocated on an address boundary that is multiple of twice
   the FFT size. If the input data can be overwritten, then optimum memory 
   usage can be achieved by specifying the input array as either the output 
   array or as the temporary array. Specifying the input array as the 
   temporary array will also result in increased run-time performance.

      The twiddle table is passed in the argument w, which must contain at least
   n/2 twiddle factors. The function twidfftrad2_fr16 may be used to initialize
   the array. If the twiddle table contains more factors than needed for
   a particular call on cfft_fr16, then the stride factor has to be set 
   appropriately; otherwise it should be 1.

      The argument scale_method controls how the function should scale the
   output to avoid overflow. If no scaling is selected by setting scale_method
   to zero, then the input signal should be sufficiently conditioned to avoid
   overflow. The block_exponent argument will be set to zero.

      The function will perform static scaling if scale_method is set to 1. For
   static scaling, the function will scale intermediate results to prevent 
   overflow. The final output will be scaled by 1/n, and block_exponent will 
   be set to log2(n).

      If scale_method is set to 2, then the function will select dynamic 
   scaling. Under dynamic scaling, the function will inspect the intermediate 
   results and will only scale to avoid overflow. Dynamic scaling therefore 
   minimizes loss of precision but at the possible cost of slightly reduced
   performance. The block_exponent argument will be set to a value between
   0 (which indicates that no scaling was performed) and log2(n) (as if static
   scaling was performed).

   Domain
      Input sequence length n must be a power of two and at least 4.

#endif

/* parameters, offset from FP (I5) */
#define IN_DATA           1
#define TMP_BUF           2
#define OUT_DATA          3
#define TWIDDLES          4
#define TWID_STRIDE       5
#define FFT_SIZE          6
#define FFT_EXPON_ADDR    7
#define SCALE_METHOD      8

/* scale methods */
#define NO_SCALE          0
#define STATIC_SCALE      1
#define DYNAMIC_SCALE     2

#include "lib_fft_funcs.h"

/* local data in preserved stack area, offset from SP (I4) */
#define FFT_SIZE2         M5

.section/code program; 

.extern DS_FUNC, SS_FUNC, NOS_FUNC, BREV_FUNC_IR;

.global __cfft_fr16;

__cfft_fr16:                            // complex fft radix-2 implementation

/* some sanity checks */
   AX1 = DM( I4 + FFT_SIZE );           // load n
   AF = AX1 - 4; 
   IF LT RTS;                           // if n<4, return

//------------------------FUNCTION PROLOGUE-------------------------------------
   DM (I4 += M5) = I5, I5 = I4;         // save preserved registers
   DM (I4 += M5) = I2;
   DM (I4 += M5) = I3;
   DM (I4 += M5) = I7;
   DM (I4 += M5) = M0;

   SR = LSHIFT AX1 BY 1 (LO);           // save n*2
   DM(I4 + FFT_SIZE2) = SR0;

//------------------------BIT-REVERSAL------------------------------------------
/* First the input array is copied to the output buffer in bit-
 * reversed order. Uses a routine called 
 * ___lib_bit_complex_reversal_sort_inregs which will use the DAG1 bit-reversal
 * mode when possible.
 *    ___lib_bit_complex_reversal_sort_inregs params:
 *       MR1 - input complex array address
 *       AX1 - length of arrays
 *       MR0 - output complex array address
 *       AX0 - temp complex array address
 * The fft bit-reversal is done at the start, instead of the more 
 * conventionally at the end, because this first stage this way only has one
 * addition for each data point and will not overflow. An implementation in the
 * other direction may easily overflow in the first stage.
 */

   AX0 = DM (I5 + TMP_BUF);             // complex temp array address
   MR1 = DM (I5 + IN_DATA);             // complex input array address
   CALL BREV_FUNC_IR(DB);
     MR0 = DM (I5 + OUT_DATA);          // complex tmp buffer address
     I7 = MR0;                          // I7->output complex array

   DIS M_MODE, ENA AR_SAT;              // set fractional multiplier and ALU sat

/* load block_exponent pointer */
    AX0 = DM ( I5 + FFT_EXPON_ADDR );
    I3 = AX0;                           // I3->blk_exponent

/* calculate the number of stages */
   SI = DM( I5 + FFT_SIZE );            // load n
   SE = EXP SI (HI); 
   AR = SE;                             // exponent of n
   AY0 = 14; 
   AR = AR + AY0;                       // log2(n)

/* get scale method function pointer and initialize block_exponent */
    IJPG = PAGE( DS_FUNC );             // set default top 8-bit address for 
                                        // scale jumps. all three functions
                                        // use a common implementation so do
                                        // not need to set IJPG for each method
    AX0 = DM ( I5 + SCALE_METHOD );
    AF = AX0 - NO_SCALE;
    IF EQ JUMP scale_set (DB);
      AX1 = ADDRESS(NOS_FUNC);
      IF EQ AR = PASS 0;
    AF = AX0 - DYNAMIC_SCALE;
    IF EQ JUMP scale_set (DB);
      AX1 = ADDRESS(DS_FUNC);
      IF EQ AR = PASS 0;
    AX1 = ADDRESS(SS_FUNC);
scale_set: 
    I2 = AX1;                           // I2->scale function
    DM ( I3 + 0 ) = AR;                 // blk_exponent = 0 (no_scale)
                                        // blk_exponent = 0 (dynamic_scale)
                                        // blk_exponent = 
                                        //             log2size (static_scale)

/* Stage 1 and 2 of the butterfly structure are implemented separately. The 
 * main reason for separating these out from the general computation, is that 
 * the multiplications with the twiddle values in both these stages can be
 * avoided. 
 */

//------------------------STAGE 1-----------------------------------------------
/* The first stage of signal flow, there are n/2 number of butterflies. Each 
 * butterfly works on two inputs. These input are multiplied by w[0] and 
 * added and subtracted. Multiplying a data with w[0], which is always equal
 * to 1+0i, will result the same data.
 */
   CALL (I2) (DB);                     // call scaling routine
                                       // (SI preserved over scaling call) 
     M1 = 1;                           // set required values for scaling
     SE = -1;

   I0 = I7;                            // I0->output address(xi)
   I6 = I7;                            // I6->output address(xip)

   SR = LSHIFT SI BY -1 (LO);          // SI holds n, divide by 2
   CNTR = SR0;                         // stage 1 has n/2 butterflies
   SI = SR0;

   M7 = 1;
   M3 = -1;
   M0 = 3; M6 = 3;
   MODIFY(I6 += 2);
   
   AX0 = DM(I0 += M1), AY0 = PM(I6 += M7);
  
   DO stage1 UNTIL CE;
        AR = AX0 + AY0, AX1 = DM(I0 += M3), AY1 = PM(I6 += M5);
        AR = AX0 - AY0, DM(I0 += M1) = AR;
        AR = AX1 + AY1, DM(I6 += M7) = AR;
        AR = AX1 - AY1, DM(I0 += M0) = AR;
        DM(I6 += M6) = AR;
stage1: AX0 = DM(I0 += M1), AY0 = PM(I6 += M7);

//------------------------STAGE 2-----------------------------------------------
/* The second stage number of butterflies is n/4. The data are added and
 * subtracted after multiplication with w[0] and w[n/4]. The multiplication 
 * with w[0](1+0i) does not have any impact. The multiplication of data 
 * x+iy with w[n/4](0-1i) will give the value y-xi.
 */

   CALL (I2) (DB);                     // call scaling routine
                                       // (SI preserved over scaling call) 
     M1 = 1;                           // set required values for scaling
     SE = -1;

   I0 = I7;                            // I0->output address(xi)
   I6 = I7;                            // I6->output address(xip)

   SR = LSHIFT SI BY -1 (LO);          // SI holds n/2, divide by 2
   CNTR = SR0;                         // stage 2 has n/4 butterflies
   SI = SR0;

   M7 = 1;
   M3 = -1;
   M0 = 5; M6 = 5;
   MODIFY(I6 += 4);

   AX0 = DM(I0 += M1), AY0 = PM(I6 += M7);

   DO stage2 UNTIL CE;
        AR = AX0 + AY0, AX1 = DM(I0 += M3), AY1 = PM(I6 += M5);
cfft_fr16.asm - 源码说明

本页面展示了「本函数利用调用库函数的方法实现了实函数快速傅里叶变换」中的 cfft_fr16.asm 源码文件，采用汇编编程语言编写，共 475 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与函数相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?