dsp_fft.h64

来自「dm642函数库」· H64 代码 · 共 532 行 · 第 1/3 页

H64
532
字号
*        j = 0;                                                             *
*        fft_jmp = stride + (stride>>1);                                    *
*        h2 = stride>>1;                          /* n/4 */                 *
*        l1 = stride;                             /* n/2 */                 *
*        l2 = stride + (stride>>1);               /* 3n/4 */                *
*        x = ptr_x;                                                         *
*        w = ptr_w + tw_offset;                                             *
*        tw_offset += fft_jmp;                                              *
*        stride = stride>>2;                                                *
*                                                                           *
*        for (i = 0; i < n>>1; i += 4)                                      *
*        {                                                                  *
*            co10 = w[j+1];    si10 = w[j+0];   /* W  */                    *
*            co11 = w[j+3];    si11 = w[j+2];                               *
*            co20 = w[j+5];    si20 = w[j+4];   /* W^2 */                   *
*            co21 = w[j+7];    si21 = w[j+6];                               *
*            co30 = w[j+9];    si30 = w[j+8];   /* W^3 */                   *
*            co31 = w[j+11];   si31 = w[j+10];                              *
*                                                                           *
*            x_0 = x[0];       x_1 = x[1];         /* perform 2 parallel */ *
*            x_2 = x[2];       x_3 = x[3];         /* radix4 butterflies */ *
*                                                                           *
*            x_l1_0 = x[l1  ]; x_l1_1 = x[l1+1];                            *
*            x_l1_2 = x[l1+2]; x_l1_3 = x[l1+3];                            *
*                                                                           *
*            x_l2_0 = x[l2  ]; x_l2_1 = x[l2+1];                            *
*            x_l2_2 = x[l2+2]; x_l2_3 = x[l2+3];                            *
*                                                                           *
*            x_h2_0 = x[h2  ]; x_h2_1 = x[h2+1];                            *
*            x_h2_2 = x[h2+2]; x_h2_3 = x[h2+3];                            *
*                                                                           *
*            xh0_0  = x_0    + x_l1_0; xh1_0  = x_1    + x_l1_1;            *
*            xh0_1  = x_2    + x_l1_2; xh1_1  = x_3    + x_l1_3;            *
*                                                                           *
*            xl0_0  = x_0    - x_l1_0; xl1_0  = x_1    - x_l1_1;            *
*            xl0_1  = x_2    - x_l1_2; xl1_1  = x_3    - x_l1_3;            *
*                                                                           *
*            xh20_0 = x_h2_0 + x_l2_0; xh21_0 = x_h2_1 + x_l2_1;            *
*            xh20_1 = x_h2_2 + x_l2_2; xh21_1 = x_h2_3 + x_l2_3;            *
*                                                                           *
*            xl20_0 = x_h2_0 - x_l2_0; xl21_0 = x_h2_1 - x_l2_1;            *
*            xl20_1 = x_h2_2 - x_l2_2; xl21_1 = x_h2_3 - x_l2_3;            *
*                                                                           *
*            x0 = x;                                                        *
*            x2 = x0;                 /* copy pointers for output*/         *
*                                                                           *
*            j += 12;                                                       *
*            x += 4;                                                        *
*            predj = (j - fft_jmp);   /* check if reached end of */         *
*            if (!predj) x += fft_jmp;/* current twiddle factor section */  *
*            if (!predj) j = 0;                                             *
*                                                                           *
*            x0[0] = xh0_0 + xh20_0; x0[1] = xh1_0 + xh21_0;                *
*            x0[2] = xh0_1 + xh20_1; x0[3] = xh1_1 + xh21_1;                *
*                                                                           *
*            xt0_0 = xh0_0 - xh20_0;  yt0_0 = xh1_0 - xh21_0;               *
*            xt0_1 = xh0_1 - xh20_1;  yt0_1 = xh1_1 - xh21_1;               *
*                                                                           *
*            xt1_0 = xl0_0 + xl21_0;  yt2_0 = xl1_0 + xl20_0;               *
*            xt2_0 = xl0_0 - xl21_0;  yt1_0 = xl1_0 - xl20_0;               *
*            xt1_1 = xl0_1 + xl21_1;  yt2_1 = xl1_1 + xl20_1;               *
*            xt2_1 = xl0_1 - xl21_1;  yt1_1 = xl1_1 - xl20_1;               *
*                                                                           *
*            x2[h2  ] = (si10 * yt1_0 + co10 * xt1_0) >> 15;                *
*            x2[h2+1] = (co10 * yt1_0 - si10 * xt1_0) >> 15;                *
*                                                                           *
*            x2[h2+2] = (si11 * yt1_1 + co11 * xt1_1) >> 15;                *
*            x2[h2+3] = (co11 * yt1_1 - si11 * xt1_1) >> 15;                *
*                                                                           *
*            x2[l1  ] = (si20 * yt0_0 + co20 * xt0_0) >> 15;                *
*            x2[l1+1] = (co20 * yt0_0 - si20 * xt0_0) >> 15;                *
*                                                                           *
*            x2[l1+2] = (si21 * yt0_1 + co21 * xt0_1) >> 15;                *
*            x2[l1+3] = (co21 * yt0_1 - si21 * xt0_1) >> 15;                *
*                                                                           *
*            x2[l2  ] = (si30 * yt2_0 + co30 * xt2_0) >> 15;                *
*            x2[l2+1] = (co30 * yt2_0 - si30 * xt2_0) >> 15;                *
*                                                                           *
*            x2[l2+2] = (si31 * yt2_1 + co31 * xt2_1) >> 15;                *
*            x2[l2+3] = (co31 * yt2_1 - si31 * xt2_1) >> 15;                *
*        }                                                                  *
*    }-* end while *-                                                       *
*                                                                           *
*    y0 = ptr_y;                                                            *
*    y1 = y0 + (int)(n>>1);                                                 *
*    y2 = y1 + (int)(n>>1);                                                 *
*    y3 = y2 + (int)(n>>1);                                                 *
*    x0 = ptr_x;                                                            *
*    x2 = ptr_x + (int)(n>>1);                                              *
*    l1 = _norm(n) + 2;                                                     *
*    j = 0;                                                                 *
*    for (i = 0; i < n; i += 8)                                             *
*    {                                                                      *
*        h2 = _deal(j);                                                     *
*        h2 = _bitr(h2);                                                    *
*        h2 = _rotl(h2, 16);                                                *
*        h2 = _shfl(h2);                                                    *
*        h2 >>= l1;                                                         *
*                                                                           *
*        x_0 = x0[0]; x_1 = x0[1];                                          *
*        x_2 = x0[2]; x_3 = x0[3];                                          *
*        x_4 = x0[4]; x_5 = x0[5];                                          *
*        x_6 = x0[6]; x_7 = x0[7];                                          *
*        x0 += 8;                                                           *
*                                                                           *
*        xh0_0  = x_0 + x_4; xh1_0  = x_1 + x_5;                            *
*        xl0_0  = x_0 - x_4; xl1_0  = x_1 - x_5;                            *
*        xh20_0 = x_2 + x_6; xh21_0 = x_3 + x_7;                            *
*        xl20_0 = x_2 - x_6; xl21_0 = x_3 - x_7;                            *
*                                                                           *
*        xt0_0 = xh0_0 - xh20_0;                                            *
*        yt0_0 = xh1_0 - xh21_0;                                            *
*        xt1_0 = xl0_0 + xl21_0;                                            *
*        yt2_0 = xl1_0 + xl20_0;                                            *
*        xt2_0 = xl0_0 - xl21_0;                                            *
*        yt1_0 = xl1_0 - xl20_0;                                            *
*                                                                           *
*        y0[2*h2  ] = xh0_0 + xh20_0;                                       *
*        y0[2*h2+1] = xh1_0 + xh21_0;                                       *
*        y1[2*h2  ] = xt1_0;                                                *
*        y1[2*h2+1] = yt1_0;                                                *
*        y2[2*h2  ] = xt0_0;                                                *
*        y2[2*h2+1] = yt0_0;                                                *
*        y3[2*h2  ] = xt2_0;                                                *
*        y3[2*h2+1] = yt2_0;                                                *
*                                                                           *
*        x_10 = x2[0]; x_11 = x2[1];                                        *
*        x_12 = x2[2]; x_13 = x2[3];                                        *
*        x_14 = x2[4]; x_15 = x2[5];                                        *
*        x_16 = x2[6]; x_17 = x2[7];                                        *
*        x2 += 8;                                                           *
*                                                                           *
*        xh0_1  = x_10 + x_14; xh1_1  = x_11 + x_15;                        *
*        xl0_1  = x_10 - x_14; xl1_1  = x_11 - x_15;                        *
*        xh20_1 = x_12 + x_16; xh21_1 = x_13 + x_17;                        *
*        xl20_1 = x_12 - x_16; xl21_1 = x_13 - x_17;                        *
*                                                                           *
*        xt0_1 = xh0_1 - xh20_1;                                            *
*        yt0_1 = xh1_1 - xh21_1;                                            *
*        xt1_1 = xl0_1 + xl21_1;                                            *
*        yt2_1 = xl1_1 + xl20_1;                                            *
*        xt2_1 = xl0_1 - xl21_1;                                            *
*        yt1_1 = xl1_1 - xl20_1;                                            *
*                                                                           *
*        y0[2*h2+2] = xh0_1 + xh20_1;                                       *
*        y0[2*h2+3] = xh1_1 + xh21_1;                                       *
*        y1[2*h2+2] = xt1_1;                                                *
*        y1[2*h2+3] = yt1_1;                                                *
*        y2[2*h2+2] = xt0_1;                                                *
*        y2[2*h2+3] = yt0_1;                                                *
*        y3[2*h2+2] = xt2_1;                                                *
*        y3[2*h2+3] = yt2_1;                                                *
*                                                                           *
*        j += 4;                                                            *
*        if (j == n>>2)                                                     *
*        {                                                                  *
*          j  += n>>2;                                                      *
*          x0 += (int) n>>1;                                                *
*          x2 += (int) n>>1;                                                *
*        }                                                                  *
*      }                                                                    *
*  }                                                                        *
* ------------------------------------------------------------------------- *
*             Copyright (c) 2003 Texas Instruments, Incorporated.           *
*                            All Rights Reserved.                           *
* ========================================================================= *

        .global _DSP_fft

* ========================================================================= *
*   End of file:  dsp_fft.h64                                               *
* ------------------------------------------------------------------------- *
*             Copyright (c) 2003 Texas Instruments, Incorporated.           *
*                            All Rights Reserved.                           *
* ========================================================================= *

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?