📄 dsp_fft32x32_cn.c

📁 TI c64x的FFT程序
💻 C
📖 第 1 页 / 共 3 页
字号:
/*      needs to be comprehended. The real part of the complex number is    *//*      in the lower half, and the imaginary part is in the upper half.     *//*      The flow breaks in case of "xl0" and "xl1" because in this case     *//*      the real part needs to be combined with the imaginary part because  *//*      of the multiplication by "j". This requires a packed quantity like  *//*      "xl21xl20" to be rotated as "xl20xl21" so that it can be combined   *//*       using add2's and sub2's. Hence the natural version of C code       *//*      shown below is transformed using packed data processing as shown:   *//*                                                                          *//*                       xl0  = x[2 * i0    ] - x[2 * i2    ];              *//*                       xl1  = x[2 * i0 + 1] - x[2 * i2 + 1];              *//*                       xl20 = x[2 * i1    ] - x[2 * i3    ];              *//*                       xl21 = x[2 * i1 + 1] - x[2 * i3 + 1];              *//*                                                                          *//*                       xt1  = xl0 + xl21;                                 *//*                       yt2  = xl1 + xl20;                                 *//*                       xt2  = xl0 - xl21;                                 *//*                       yt1  = xl1 - xl20;                                 *//*                                                                          *//*                       xl1_xl0   = _sub2(x21_x20, x21_x20)                *//*                       xl21_xl20 = _sub2(x32_x22, x23_x22)                *//*                       xl20_xl21 = _rotl(xl21_xl20, 16)                   *//*                                                                          *//*                       yt2_xt1   = _add2(xl1_xl0, xl20_xl21)              *//*                       yt1_xt2   = _sub2(xl1_xl0, xl20_xl21)              *//*                                                                          *//*      Also notice that xt1, yt1 endup on seperate words, these need to    *//*      be packed together to take advantage of the packed twiddle fact     *//*      ors that have been loaded. In order for this to be achieved they    *//*      are re-aligned as follows:                                          *//*                                                                          *//*      yt1_xt1 = _packhl2(yt1_xt2, yt2_xt1)                                *//*      yt2_xt2 = _packhl2(yt2_xt1, yt1_xt2)                                *//*                                                                          *//*      The packed words "yt1_xt1" allows the loaded"sc" twiddle factor     *//*      to be used for the complex multiplies. The real part os the         *//*      complex multiply is implemented using _dotp2. The imaginary         *//*      part of the complex multiply is implemented using _dotpn2           *//*      after the twiddle factors are swizzled within the half word.        *//*                                                                          *//*      (X + jY) ( C + j S) = (XC + YS) + j (YC - XS).                      *//*                                                                          *//*      The actual twiddle factors for the FFT are cosine, - sine. The      *//*      twiddle factors stored in the table are csine and sine, hence       *//*      the sign of the "sine" term is comprehended during multipli-        *//*      cation as shown above.                                              *//*                                                                          *//*                                                                          *//*  ASSUMPTIONS                                                             *//*                                                                          *//*      The size of the FFT, n, must be a power of 4 and greater than       *//*      or equal to 16 and less than 32768.                                 *//*                                                                          *//*      The arrays 'x[]', 'y[]', and 'w[]' all must be aligned on a         *//*      double-word boundary for the "optimized" implementations.           *//*                                                                          *//*      The input and output data are complex, with the real/imaginary      *//*      components stored in adjacent locations in the array.  The real     *//*      components are stored at even array indices, and the imaginary      *//*      components are stored at odd array indices.                         *//*                                                                          *//* ------------------------------------------------------------------------ *//*            Copyright (c) 2007 Texas Instruments, Incorporated.           *//*                           All Rights Reserved.                           *//* ======================================================================== */#pragma CODE_SECTION(DSP_fft32x32_cn, ".text:ansi");#include "DSP_fft32x32_cn.h"# define DIG_REV(i, m, j)                                            \    do {                                                             \        unsigned _ = (i);                                            \        _ = ((_ & 0x33333333) <<  2) | ((_ & ~0x33333333) >>  2);    \        _ = ((_ & 0x0F0F0F0F) <<  4) | ((_ & ~0x0F0F0F0F) >>  4);    \        _ = ((_ & 0x00FF00FF) <<  8) | ((_ & ~0x00FF00FF) >>  8);    \        _ = ((_ & 0x0000FFFF) << 16) | ((_ & ~0x0000FFFF) >> 16);    \        (j) = _ >> (m);                                              \    } while (0)#define SMPY_32(x,y)    \             (int)(((long long)(x)*(long long)(y))>>31)void DSP_fft32x32_cn (    int * ptr_w,    int npoints,    int * ptr_x,    int * ptr_y){    const int   *w;    int *x, *x2, *x0;    int *y0, *y1, *y2, *y3;    int i, j, l1, l2, h2, predj, tw_offset, stride, fft_jmp;    int xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0;    int xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0;    int xh0_1, xh1_1, xl0_1, xl1_1;    int x_0, x_1, x_2, x_3, x_l1_0, x_l1_1, x_l2_0, x_l2_1;    int xh0_2, xh1_2, xl0_2, xl1_2, xh0_3, xh1_3, xl0_3, xl1_3;    int x_4, x_5, x_6, x_7, x_h2_0, x_h2_1;    int x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f;    int si10, si20, si30, co10, co20, co30;    int n00, n10, n20, n30, n01, n11, n21, n31;    int n02, n12, n22, n32, n03, n13, n23, n33;    int n0, j0, radix, norm, m;    /*---------------------------------------------------------------------*/    /* Determine the magnitude od the number of points to be transformed.  */    /* Check whether we can use a radix4 decomposition or a mixed radix    */    /* transformation, by determining modulo 2.                            */    /*---------------------------------------------------------------------*/    for (i = 31, m = 1; (npoints & (1 << i)) == 0; i--, m++)        ;    radix = m & 1 ? 2 : 4;    norm = m - 2;              /*----------------------------------------------------------------------*/    /* The stride is quartered with every iteration of the outer loop. It   */    /* denotes the seperation between any two adjacent inputs to the butter */    /* -fly. This should start out at N/4, hence stride is initially set to */    /* N. For every stride, 6*stride twiddle factors are accessed. The      */    /* "tw_offset" is the offset within the current twiddle factor sub-     */    /* table. This is set to zero, at the start of the code and is used to  */    /* obtain the appropriate sub-table twiddle pointer by offseting it     */    /* with the base pointer "ptr_w".                                       */    /*----------------------------------------------------------------------*/     stride = npoints;     tw_offset = 0;    fft_jmp = 6 * stride;    #ifndef NOASSUME    _nassert(stride > 4);    #pragma MUST_ITERATE(1,,1);    #endif      while (stride > radix) {        /*-----------------------------------------------------------------*/        /* At the start of every iteration of the outer loop, "j" is set   */        /* to zero, as "w" is pointing to the correct location within the  */        /* twiddle factor array. For every iteration of the inner loop     */        /* 6 * stride twiddle factors are accessed. For eg,                */        /*                                                                 */        /* #Iteration of outer loop  # twiddle factors    #times cycled    */        /*  1                          6 N/4               1               */        /*  2                          6 N/16              4               */        /*  ...                                                            */        /*-----------------------------------------------------------------*/          j =  0;        fft_jmp >>=  2;        /*-----------------------------------------------------------------*/        /* Set up offsets to access "N/4", "N/2", "3N/4" complex point or  */        /* "N/2", "N", "3N/2" half word                                    */        /*-----------------------------------------------------------------*/         h2 = stride >> 1;        l1 = stride;        l2 = stride + (stride >> 1);        /*-----------------------------------------------------------------*/        /*  Reset "x" to point to the start of the input data array.       */        /* "tw_offset" starts off at 0, and increments by "6 * stride"     */        /*  The stride quarters with every iteration of the outer loop     */        /*-----------------------------------------------------------------*/        x = ptr_x;        w = ptr_w + tw_offset;        tw_offset += fft_jmp;        stride >>=  2;        /*----------------------------------------------------------------*/        /* The following loop iterates through the different butterflies, */        /* within a given stage. Recall that there are logN to base 4     */        /* stages. Certain butterflies share the twiddle factors. These   */        /* are grouped together. On the very first stage there are no     */        /* butterflies that share the twiddle factor, all N/4 butter-     */        /* flies have different factors. On the next stage two sets of    */        /* N/8 butterflies share the same twiddle factor. Hence after     */        /* half the butterflies are performed, j the index into the       */        /* factor array resets to 0, and the twiddle factors are reused.  */        /* When this happens, the data pointer 'x' is incremented by the  */        /* fft_jmp amount.                                                */        /*----------------------------------------------------------------*/        #ifndef NOASSUME        _nassert((int)(w) % 8 == 0);        _nassert((int)(x) % 8 == 0);        _nassert(h2 % 8 == 0);        _nassert(l1 % 8 == 0);        _nassert(l2 % 8 == 0);        _nassert(npoints >= 16);        #pragma MUST_ITERATE(4,,1);        #endif        for (i = 0; i < npoints; i += 4) {            /*------------------------------------------------------------*/            /*  Read the first three twiddle factor values. This loop co- */            /*  mputes one radix 4 butterfly at a time.                   */            /*------------------------------------------------------------*/            co10 = w[j+1];    si10 = w[j+0];            co20 = w[j+3];    si20 = w[j+2];            co30 = w[j+5];    si30 = w[j+4];                 /*-----------------------------------------------------------*/            /* Read in the data elements for the four inputs of radix4   */            /* butterfly.                                                */            /*-----------------------------------------------------------*/            x_0    = x[0];     x_1    = x[1];            x_l1_0 = x[l1];    x_l1_1 = x[l1+1];            x_l2_0 = x[l2];    x_l2_1 = x[l2+1];            x_h2_0 = x[h2];    x_h2_1 = x[h2+1];            /*-----------------------------------------------------------*/            /* Perform radix2 style DIF butterflies, for initial radix4  */            /*-----------------------------------------------------------*/                xh0_0  = x_0    + x_l1_0;                  xh1_0  = x_1    + x_l1_1;            xl0_0  = x_0    - x_l1_0;                  xl1_0  = x_1    - x_l1_1;            xh20_0 = x_h2_0 + x_l2_0;                  xh21_0 = x_h2_1 + x_l2_1;            xl20_0 = x_h2_0 - x_l2_0;                  xl21_0 = x_h2_1 - x_l2_1;            /*-----------------------------------------------------------*/            /* Derive output pointers using the input pointer "x"        */            /*-----------------------------------------------------------*/             x0 = x;            x2 = x0;            /*-----------------------------------------------------------*/            /* When the twiddle factors are not to be re-used, j is      */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -