⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 example 3-19.asm

📁 《基于TI DSP的通用算法实现》程序代码
💻 ASM
📖 第 1 页 / 共 2 页
字号:

;Example 3 - 19. Complex DIF Radix-2 FFT Implementation ASM Listing for TMS320C62x
	
* ========================================================================= *
*                                                                           *
*   TEXAS INSTRUMENTS, INC.                                                 *
*                                                                           *
*   NAME                                                                    *
*       DSP_radix2 -- In-place Radix-2 FFT (Little Endian)                  *
*                                                                           *
*   REVISION DATE                                                           *
*       10-Dec-2001                                                         *
*                                                                           *
*   USAGE                                                                   *
*       This routine is C-callable and can be called as:                    *
*                                                                           *
*       void DSP_radix2(int n, short *restrict xy,                          *
*                   const short *restrict w);                               *
*                                                                           *
*       n    -- FFT size                            (input)                 *
*       xy[] -- input and output sequences (dim-n)  (input/output)          *
*       w[]  -- FFT coefficients (dim-n/2)          (input)                 *
*                                                                           *
*   DESCRIPTION                                                             *
*       This routine is used to compute FFT of a complex sequece of size    *
*       n, a power of 2, with "decimation-in-frequency decomposition"       *
*       method, ie, the output is in bit-reversed order. Each complex       *
*       value is with interleaved 16-bit real and imaginary parts. To       *
*       prevent overflow, input samples may have to be scaled by 1/n.       *
*                                                                           *
*       void DSP_radix2(int n, short *restrict xy,                          *
*                   const short *restrict w)                                *
*       {                                                                   *
*           short n1,n2,ie,ia,i,j,k,l;                                      *
*           short xt,yt,c,s;                                                *
*                                                                           *
*           n2 = n;                                                         *
*           ie = 1;                                                         *
*           for (k=n; k > 1; k = (k >> 1) )                                 *
*           {                                                               *
*               n1 = n2;                                                    *
*               n2 = n2>>1;                                                 *
*               ia = 0;                                                     *
*               for (j=0; j < n2; j++)                                      *
*               {                                                           *
*                   c = w[2*ia];                                            *
*                   s = w[2*ia+1];                                          *
*                   ia = ia + ie;                                           *
*                   for (i=j; i < n; i += n1)                               *
*                   {                                                       *
*                       l = i + n2;                                         *
*                       xt      = xy[2*l] - xy[2*i];                        *
*                       xy[2*i] = xy[2*i] + xy[2*l];                        *
*                       yt      = xy[2*l+1] - xy[2*i+1];                    *
*                       xy[2*i+1] = xy[2*i+1] + xy[2*l+1];                  *
*                       xy[2*l]   = (c*xt + s*yt)>>15;                      *
*                       xy[2*l+1] = (c*yt - s*xt)>>15;                      *
*                   }                                                       *
*               }                                                           *
*               ie = ie<<1;                                                 *
*           }                                                               *
*       }                                                                   *
*                                                                           *
*   ASSUMPTIONS                                                             *
*       16 <= n <= 32768                                                    *
*       Both input xy and coefficient w must be aligned on word boundary.   *
*       w coef stored ordered is k*(-cos[0*delta]), k*(-sin[0*delta]),      *
*       k*(-cos[1*delta]), ...  where delta = 2*PI/N, k = 32767             *
*       Assembly code is written for processor in Little Endian mode        *
*       Input xy and coefficients w are 16 bit data.                        *
*                                                                           *
*   MEMORY NOTE                                                             *
*       Align xy and w on different word boundaries to minimize             *
*       memory bank hits.                                                   *
*                                                                           *
*   TECHNIQUES                                                              *
*       1. Loading input xy as well as coefficient w in word.               *
*       2. Both loops j and i shown in the C code are placed in the         *
*          INNERLOOP of the assembly code.                                  *
*                                                                           *
*   CYCLES                                                                  *
*       cycles = log2(N) * (4*N/2+7) + 34 + N/4.                            *
*                                                                           *
*       (The N/4 term is due to bank conflicts that occur when xy and w     *
*       are aligned as suggested above, under "MEMORY NOTE.")               *
*                                                                           *
*       For N = 256, cycles = 4250.                                         *
*                                                                           *
*   CODESIZE                                                                *
*       800 bytes                                                           *
*                                                                           *
* ------------------------------------------------------------------------- *
*             Copyright (c) 2002 Texas Instruments, Incorporated.           *
*                            All Rights Reserved.                           *
* ========================================================================= *

        .text
        .global _DSP_radix2
_DSP_radix2:
        STW     .D2T1   A15, *B15--[12] ; push A15 to SP[12]
||      SUB     .L1X    B15, 4, A15

        STW     .D1T1   A10, *A15--[10] ; push A10 to SP[11]
||      STW     .D2T2   B10, *+B15[10]  ; push B10 to SP[10]
||      MVC     .S2     CSR, B0

        STW     .D1T1   A11, *+A15[8]   ; push A11 to SP[ 9]
||      STW     .D2T2   B11, *+B15[8]   ; push B11 to SP[ 8]
||      AND     .S2     B0, -2, B1
;-
        STW     .D1T1   A12, *+A15[6]   ; push A12 to SP[ 7]
||      STW     .D2T2   B12, *+B15[6]   ; push B12 to SP[ 6]
||      MVC     .S2     B1,  CSR
        ;== Interrupts disabled here ===

        STW     .D1T1   A13, *+A15[4]   ; push A13 to SP[ 5]
||      STW     .D2T2   B13, *+B15[4]   ; push B13 to SP[ 4]

        STW     .D1T1   A14, *+A15[2]   ; push A14 to SP[ 3]
||      STW     .D2T2   B14, *+B15[2]   ; push B14 to SP[ 2]
||      MV      .L1X    B4,A9           ; &XY
;-
        LMBD    .L1     1,A4,A1         ; outer loop count calculation
||      MV      .L2X    A4,B13          ; &N
||      STW     .D1T2   B0,  *+A15[0]   ; push CSR to SP[ 1]

        MVK     .S1     1,A2            ; IE = 1
||      MV      .D2     B13,B10         ; XY index setup
||      MV      .L1     A4,A7           ; XY index setup
||      SHL     .S2     B13,2,B14       ; calculating reset offset
||      MV      .L2X    A6,B12          ; permanent ptr for W[0]
;-
        SHR     .S2     B13,1,B13       ; used for loop count
||      SUB     .D2     B14,4,B14       ; calculating reset offset
||      SUB     .L1X    B14,4,A8        ; calculating reset offset

        ADDAH   .D1     A9,A7,A3        ; setup ptr for X[i+N2] & Y[i+N2]
||      MV      .L2X    A9,B9           ; setup ptr for X[i] & Y[i]
||      SUB     .D2     B13,4,B13       ; inner loop count
||      MVK     .S2     31,B7           ; outer loop count calculation
;-
        MV      .S2     B9,B4           ; setup store ptr for X[i] & Y[i]
||      MV      .L2     B9,B11          ; permanent ptr for X[0] & Y[0]
||      SUB     .L1X    B7,A1,A1        ; outer loop conter

        LDW     .D2     *B9++[B10],B7   ; X[i] & Y[i]
||      MV      .L2X    A2,B2           ; reset twiddle factor counter
||      LDW     .D1     *A6++[A2],A5    ; CS = W[IA] & W[IA+1]
||      SHL     .S1     A7,1,A0         ; calculating reset offset
;-
  [ B2] SUB             B2,1,B2         ; decrement twiddle factor counter
||      LDW     .D1     *A3++[A7],A13   ; X[i+N2] & Y[i+N2]

  [!B2] SUB             B9,B14,B9       ; reset load X[i] & Y[i] ptrs

  [!B2] SUB             A3,A8,A3        ; reset load X[i+N2] & Y[i+N2] ptrs

OUTLOOP:
        LDW     .D2     *B9++[B10],B7   ;* X[i] & Y[i]
||[!B2] MV      .L2X    A2,B2           ;* reset twiddle factor counter
||[!B2] LDW     .D1     *A6++[A2],A5    ;* CS = W[IA] & W[IA+1]
||      MPY     .M2     B2,1,B0         ; move to next iteration
;-
  [ B2] SUB             B2,1,B2         ;* decrement twiddle factor counter
||      LDW     .D1     *A3++[A7],A13   ;* X[i+N2] & Y[i+N2]

        SUB2    .S1X    A13,B7,A9       ; XYT = X[i]-X[i+N2] & Y[i]-Y[i+N2]
||      MV              A5,A10          ; move to other file

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -