rfft_fr16.asm

来自「本函数利用调用库函数的方法实现了实函数快速傅里叶变换」· 汇编代码 · 共 340 行
ASM
340 行
/************************************************************************
 *
 * rfft_fr16.asm : $Revision: 1.3 $
 *
 * (c) Copyright 2000-2003 Analog Devices, Inc.  All rights reserved.
 *
 ************************************************************************/

#if 0

   N point radix 2 real input FFT
  
   Synopsis

     #include <filter.h>
     void rfft_fr16(
          const fract16 in[]            /* input sequence */
          complex_fract16 t[];          /* temporary working buffer */
          complex_fract16 out[];        /* output sequence */
          const complex_fract16 w[];    /* twiddle sequence */
          int wst;                      /* twiddle factor stride */
          int n;                        /* number of FFT points */
          int *block_exponent;          /* block exponent of output data */
          int scale method;             /* scaling method desired 0-none,
                                           1-static, 2-dynamic */

   Description

      This function transforms the time domain real input signal sequence to
   the frequency domain by using the radix-2 FFT. The function takes
   advantage of the fact that the imaginary part of the input equals zero,
   which in turn eliminates half of the multiplications in the butterfly.

      The size of the input array in, the output array out, and the temporary
   working buffer t is n, where n represents the number of points in the FFT.
   The function does not impose any special memory alignment requirements
   on the arrays. However, benefits in run-time performance will be
   realized if the output array is allocated on an address boundary that is
   multiple of twice the FFT size. If the input data can be overwritten, then
   optimum memory usage can be achieved by specifying the input array as
   either the output array or as the temporary array provided that the memory
   size of the input array is at least 2*n. Specifying the input array as the
   temporary array will also result in increased run-time performance.

   The twiddle table is passed in the argument w, which must contain at least
   n/2 twiddle factors. The function twidfftrad2_fr16 may be used to initialize
   the array. If the twiddle table contains more factors than needed for
   a particular call on rfft_fr16, then the stride factor has to be set 
   appropriately; otherwise it should be 1.

      The argument scale_method controls how the function should scale the
   output to avoid overflow. If no scaling is selected by setting scale_method
   to zero, then the input signal should be sufficiently conditioned to avoid
   overflow. The block_exponent argument will be set to zero.

      The function will perform static scaling if scale_method is set to 1. For
   static scaling, the function will scale intermediate results to prevent 
   overflow. The final output will be scaled by 1/n, and block_exponent will be
   set to log2(n).

      If scale_method is set to 2, then the function will select dynamic 
   scaling.  Under dynamic scaling, the function will inspect the intermediate 
   results and will only scale to avoid overflow. Dynamic scaling therefore 
   minimizes loss of precision but at the possible cost of slightly reduced
   performance. The block_exponent argument will be set to a value between
   0 (which indicates that no scaling was performed) and log2(n) (as if static
   scaling was performed).

   Domain
      Input sequence length n must be a power of two and at least 8.

#endif

/* parameters, offset from FP (I5) */
#define IN_DATA           1
#define TMP_BUF           2
#define OUT_DATA          3
#define TWIDDLES          4
#define TWID_STRIDE       5
#define FFT_SIZE          6
#define FFT_EXPON_ADDR    7
#define SCALE_METHOD      8

/* scale methods */
#define NO_SCALE          0
#define STATIC_SCALE      1
#define DYNAMIC_SCALE     2

#include "lib_fft_funcs.h"

/* local data in preserved stack area, offset from SP (I4) */
#define FFT_SIZE2         M5

.section/code program;

.extern __cfft_fr16, DS_FUNC, SS_FUNC, NOS_FUNC, BREV_FUNC_IR;

.global __rfft_fr16;

__rfft_fr16:                            // real fft radix-2 implementation

/* some sanity checks */
   AX1 = DM( I4 + FFT_SIZE );           // load n
   AF = AX1 - 8;
   IF LT RTS;                           // if n<8, return

//------------------------DO CFFT----------------------------------------------
/* We implement rfft using cfft of half input size. The cfft input
 * has even inputs in reals and odd inputs in imags. A final trigonometric 
 * pass generates the correct rfft output.
 */

   AR = DM ( I4 + TWID_STRIDE );       // load twiddle stride parameter
   SR = LSHIFT AR BY 1 (LO);           // twiddle stride*=2
   DM ( I4 + TWID_STRIDE ) = SR0;      // store new twiddle stride parameter

   CALL __cfft_fr16 (DB);              // CALL to cfft
     SR = LSHIFT AX1 BY -1 (LO);       // SR0 = n/2
     DM( I4 + FFT_SIZE ) = SR0;        // store new size

//------------------------FUNCTION PROLOGUE-------------------------------------
    DM(I4 += M5) = I5, I5 = I4;         // save preserved registers
    DM(I4 += M5) = I2;
    DM(I4 += M5) = I3;
    DM(I4 += M5) = I7;
    DM(I4 += M5) = M0;

//------------------------FINAL SCALE-------------------------------------------
    DIS M_MODE, ENA AR_SAT;             // set fractional multiplier and ALU sat

    AR = DM(I5 + OUT_DATA); 
    I7 = AR;                            // I7 -> out 

/* get scale function and call for final time */

    AR = DM ( I5 + SCALE_METHOD );
    AF = AR - NO_SCALE;
    IF EQ JUMP scale_done (DB);
      SE = -1;
      M1 = 1;

    IJPG = PAGE( DS_FUNC );             // set default top 8-bit address for
                                        // scale jumps. all three functions
                                        // use a common implementation so do
                                        // not need to set IJPG for each method

    AY1 = DM ( I5 + FFT_EXPON_ADDR );   // get block exponent address

    AF = AR - DYNAMIC_SCALE;
    IF EQ JUMP do_scale (DB);
      I6 = ADDRESS(DS_FUNC);
      I3 = AY1;

    I6 = ADDRESS(SS_FUNC);

    AY1 = DM ( I3 + 0 );                // get block exponent
    AR = AY1 + 1;                       // add 1 to current 
    DM ( I3 + 0 ) = AR;                 // save new block exponent

do_scale:
// requires: SE=-1, M1==1, I3==blk_exp addr, I7==output addr, fract MAC mode,
//           2 * n held at I4-1(written by previous cfft call)
    CALL (I6);
    SE = -1;                            // changed by dynamic scale

scale_done: 

//------------------------TRIG-RECOMBINATION------------------------------------
#if 0
   example trigonometric recombination C code:

      rfftroutN[0] = rfftroutNover2[0] + rfftioutNover2[0];
      rfftioutN[0] = 0;

      for (k=1; k<Nover2; k++) {
         Realsum= ( rfftroutNover2[k] + rfftroutNover2[(Nover2-k)]  ) / 2;
         Imagsum= ( rfftioutNover2[k] + rfftioutNover2[(Nover2-k)]  ) / 2;
         Realdif= ( rfftroutNover2[k] - rfftroutNover2[(Nover2-k)]  ) / 2;
         Imagdif= ( rfftioutNover2[k] - rfftioutNover2[(Nover2-k)]  ) / 2;

         rfftroutN[k] = (Realsum +
                          (((unsigned long)twid_real[k] * Imagsum)>>15) -
                          ((((unsigned long)twid_imag[k] * Realdif))>>15));
         rfftioutN[k] = (Imagdif -
                          (((long)-twid_imag[k] * Imagsum)>>15) -
                          ((((unsigned long)twid_real[k] * Realdif))>>15));
      }
      for (k=Nover2; k<N; k++) {
         rfftroutN[k] = rfftroutN[N-k];
         rfftioutN[k] = -(rfftioutN[N-k]);
      } 
#endif
   
                                        // copy size/2 complex results to tmp
    AY1 = DM(I5 + FFT_SIZE);            // get n/2
    CNTR = AY1;

    I0 = I7;                            // I0,I6 -> out
    I6 = I7;                           

    AR = DM(I5 + TMP_BUF);              // I1 -> tmp
    I1 = AR;

    M3 = 2;
    M7 = 2;
    MODIFY(I6 += 1);                    // I6 -> tmp.im
    DO copy_to_tmp UNTIL CE;
             AX0 = DM(I0 += M3), AY0 = PM(I6 += M7);
             DM(I1 += 1) = AX0;
copy_to_tmp: DM(I1 += 1) = AY0;

    I0 = AR;                            // I0,I6 -> tmp
    I6 = AR;
    I1 = I7;                            // I1 -> out

    AR = AY1 - 1, SR0 = AY1;
    CNTR = AR;                          // for (k=1; k<Nover2; k++) {

    AR = SR0 + AY1, AX0 = DM(I0 += M1); // n/2+n/2, AX0 = rfftroutNover2[0]

    DM(I5 + FFT_SIZE) = AR;             // n re-written(had been changed to n/2)

    M7 = AR;                            // M7,M3 = n/2 complex offset
    M3 = AR;
    AR = -AR;
    M0 = AR;                            // M0 = -n/2 complex offset

    AY0 = DM(I0 += M1);                 // AY0 = rfftioutNover2[0]

    AR = AX0 + AY0, SI = DM(I6 += M7);  // rfftroutNover2[0]+rfftioutNover2[0], 
                                        // I6 -> tmp[n/2].re

                                        // rfftioutN[0] = 0, written first 
                                        // loop iter

    AR = AX0 - AY0, DM(I1 += M3) = AR;  // rfftroutNover2[0]-rfftioutNover2[0],
                                        // rfftroutN[0] = AR(I1->rfftroutN[n/2])

    AR = PASS 0, DM(I1 += M1) = AR;     // rfftroutN[n/2] = AR
    DM(I1 += M0) = AR;                  // rfftioutN[n/2] = 0(I1->out[0].im)

    SI = DM(I5 + TWID_STRIDE);          // load new twiddle stride parameter
    SR = LSHIFT SI (LO), AY1 = DM(I6 += M5); 
                                        // twiddle stride/=2,
                                        // I6 -> tmp[n/2-1].im
    DM(I5 + TWID_STRIDE) = SR0;         // restore twiddle stride parameter

    M0 = SI;                            // M0 is twiddle step

    AY1 = DM(I5 + TWIDDLES); 
    I2 = AY1;                           // I2->twiddles

    AR = AY1 + 1, AX0 = DM(I0 += M1);   // load rfftroutNover2[1]
    AY1 = DM(I6 += M5);                 // load rfftioutNover2[N/2-1]
    SR = 0, AY0 = DM(I6 += M5);         // load rfftroutNover2[N/2-1]

    I3 = AR;                            // I3->twiddles(imag offset)

    SI = 0x4000;                        // SI = 0.5

    MODIFY(I2 += M0);                   // I2->twiddles[1]
    MODIFY(I3 += M0);                   // I3->imag twiddles[1]

    DO bfly_rfft_tr UNTIL CE;
       // Realsum= ( rfftroutNover2[k] + rfftroutNover2[(Nover2-k)]  ) / 2;,
       AR = AX0 + AY0, MY1 = SI;
       MR = AR * MY1 (SS), DM(I1 += M1) = SR1;
                                        // MR ==> Realsum[k],
                                        // rfftioutNover2[k-1] = SR1
                                        
       // Realdif= ( rfftroutNover2[k] - rfftroutNover2[(Nover2-k)]  ) / 2;
       AR = AX0 - AY0, MX0 = DM(I2 += M0);
       SR = AR * MY1 (RND), AX1 = DM(I0 += M1);
                                        // AX1 = rfftioutNover2[k]
                                        // MX0 ==> twid_real[k]

       // Imagsum= ( rfftioutNover2[k] + rfftioutNover2[(Nover2-k)]  ) / 2;
       AR = AX1 + AY1, MY0 = SR1;       // MY0 ==> Realdif[k]
       SR = AR * MY1 (RND), AX0 = DM(I0 += M1);
                                        // AX0 = rfftroutNover2[k+1]

       // Imagdif= ( rfftioutNover2[k] - rfftioutNover2[(Nover2-k)]  ) / 2;
       AR = AX1 - AY1, MY1 = SR1;       // MY1 ==> Imagsum[k]
       SR = ASHIFT AR(HI), MX1 = DM(I3 += M0);
                                        // SR  ==> Imagdif[k]
                                        // MX1 ==> twid_imag[k]

       // rfftroutN[k] = (Realsum +
       //                 (((unsigned long)twid_real[k] * Imagsum)>>15) -
       //                 ((((unsigned long)-twid_imag[k] * Realdif))>>15));
       MR = MR + MX0 * MY1 (SS), AY1 = DM(I6 += M5);
       MR = MR + MX1 * MY0 (RND), AY0 = DM(I6 += M5);
                                        // AY1 = rfftioutNover2[(Nover2-(k+1))]
                                        // AY0 = rfftroutNover2[(Nover2-(k+1))]

       // rfftioutN[k] = (Imagdif -
       //                 (((long)-twid_imag[k] * Imagsum)>>15) -
       //                 ((((unsigned long)twid_real[k] * Realdif))>>15));
       SR = SR + MX1 * MY1 (SS), DM( I1 += M1 ) = MR1;
bfly_rfft_tr: SR = SR - MX0 * MY0 (RND);

       DM( I1 += M1 ) = SR1;

//------------------------GEN N/2-N OUTPUTS-------------------------------------
    I6 = I1;
    SI = DM(I5 + FFT_SIZE); 
    SR = LSHIFT SI BY -1 (LO);          // div 2
    CNTR = SR0;
                                        // mirror N/2 output in second half
                                        // around the Nyquist point of the
                                        // spectrum to get the negative part
                                        // of the frequency spectrum

    I0 = I1;                            // I0 -> real +ve frequency values
    M3 = -2;
    M7 = -2;
    MODIFY (I6 += 1);                   // I6 -> imag +ve frequency values
    DO rfft_mirror UNTIL CE;
             AX0 = DM( I0 += M3 ), AY0 = PM( I6 += M7 );      
                                        // read real and modify to previous
                                        // read imag and modify to previous
             AR = -AY0, DM( I1 += M1 ) = AX0;
rfft_mirror: DM( I1 += M1 ) = AR;

//------------------------FUNCTION EPILOGUE-------------------------------------
    I6 = I5;
    ENA M_MODE, DIS AR_SAT;             // set integer multiplier and ALU sat
    MODIFY (I6 += M5);                  // restore preserved registers
    I2 = DM (I6 += M5);
    I3 = DM (I6 += M5);
    I7 = DM (I6 += M5);
    M0 = DM (I6 += M5);
    RTS (DB);                           // return
      I4 = I5; 
      I5 = DM (I5 += M5);

    .__rfft_fr16.end:

// end of file
rfft_fr16.asm - 源码说明

本页面展示了「本函数利用调用库函数的方法实现了实函数快速傅里叶变换」中的 rfft_fr16.asm 源码文件，采用汇编编程语言编写，共 340 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与函数相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?