📄 dsp_fft32x32.c

📁 TI c64x的FFT程序
💻 C
📖 第 1 页 / 共 4 页
字号:
/*                       xl1  = x[2 * i0 + 1] - x[2 * i2 + 1];              *//*                       xl20 = x[2 * i1    ] - x[2 * i3    ];              *//*                       xl21 = x[2 * i1 + 1] - x[2 * i3 + 1];              *//*                                                                          *//*                       xt1  = xl0 + xl21;                                 *//*                       yt2  = xl1 + xl20;                                 *//*                       xt2  = xl0 - xl21;                                 *//*                       yt1  = xl1 - xl20;                                 *//*                                                                          *//*                       xl1_xl0   = _sub2(x21_x20, x21_x20)                *//*                       xl21_xl20 = _sub2(x32_x22, x23_x22)                *//*                       xl20_xl21 = _rotl(xl21_xl20, 16)                   *//*                                                                          *//*                       yt2_xt1   = _add2(xl1_xl0, xl20_xl21)              *//*                       yt1_xt2   = _sub2(xl1_xl0, xl20_xl21)              *//*                                                                          *//*      Also notice that xt1, yt1 endup on seperate words, these need to    *//*      be packed together to take advantage of the packed twiddle fact     *//*      ors that have been loaded. In order for this to be achieved they    *//*      are re-aligned as follows:                                          *//*                                                                          *//*      yt1_xt1 = _packhl2(yt1_xt2, yt2_xt1)                                *//*      yt2_xt2 = _packhl2(yt2_xt1, yt1_xt2)                                *//*                                                                          *//*      The packed words "yt1_xt1" allows the loaded"sc" twiddle factor     *//*      to be used for the complex multiplies. The real part os the         *//*      complex multiply is implemented using _dotp2. The imaginary         *//*      part of the complex multiply is implemented using _dotpn2           *//*      after the twiddle factors are swizzled within the half word.        *//*                                                                          *//*      (X + jY) ( C + j S) = (XC + YS) + j (YC - XS).                      *//*                                                                          *//*      The actual twiddle factors for the FFT are cosine, - sine. The      *//*      twiddle factors stored in the table are csine and sine, hence       *//*      the sign of the "sine" term is comprehended during multipli-        *//*      cation as shown above.                                              *//*                                                                          *//*                                                                          *//*  ASSUMPTIONS                                                             *//*                                                                          *//*      The size of the FFT, n, must be a power of 4 and greater than       *//*      or equal to 16 and less than 32768.                                 *//*                                                                          *//*      The arrays 'x[]', 'y[]', and 'w[]' all must be aligned on a         *//*      double-word boundary for the "optimized" implementations.           *//*                                                                          *//*      The input and output data are complex, with the real/imaginary      *//*      components stored in adjacent locations in the array.  The real     *//*      components are stored at even array indices, and the imaginary      *//*      components are stored at odd array indices.                         *//*                                                                          *//* ------------------------------------------------------------------------ *//*            Copyright (c) 2007 Texas Instruments, Incorporated.           *//*                           All Rights Reserved.                           *//* ======================================================================== */#pragma CODE_SECTION(DSP_fft32x32_i, ".text:intrinsic");#include "DSP_fft32x32.h"/*--------------------------------------------------------------------------*//* The following macro is used to obtain a digit reversed index, of a given *//* number i, into j where the number of bits in "i" is "m". For the natural *//* form of C code, this is done by first interchanging every set of "2 bit" *//* pairs, followed by exchanging nibbles, followed by exchanging bytes, and *//* finally halfwords. To give an example, condider the following number:    *//*                                                                          *//* N = FEDCBA9876543210, where each digit represents a bit, the following   *//* steps illustrate the changes as the exchanges are performed:             *//* M = DCFE98BA54761032 is the number after every "2 bits" are exchanged.   *//* O = 98BADCFE10325476 is the number after every nibble is exchanged.      *//* P = 1032547698BADCFE is the number after every byte is exchanged.        *//* Since only 16 digits were considered this represents the digit reversed  *//* index. Since the numbers are represented as 32 bits, there is one more   *//* step typically of exchanging the half words as well.                     *//*--------------------------------------------------------------------------*/ #if 0# define DIG_REV(i, m, j) ((j) = (_shfl(_rotl(_bitr(_deal(i)), 16)) >> (m)))#else# define DIG_REV(i, m, j)                                            \    do {                                                             \        unsigned _ = (i);                                            \        _ = ((_ & 0x33333333) <<  2) | ((_ & ~0x33333333) >>  2);    \        _ = ((_ & 0x0F0F0F0F) <<  4) | ((_ & ~0x0F0F0F0F) >>  4);    \        _ = ((_ & 0x00FF00FF) <<  8) | ((_ & ~0x00FF00FF) >>  8);    \        _ = ((_ & 0x0000FFFF) << 16) | ((_ & ~0x0000FFFF) >> 16);    \        (j) = _ >> (m);                                              \    } while (0)#endifstatic inline void radix_2(int *restrict ptr_x, int *restrict ptr_y, int npoints);static inline void radix_4(int *restrict ptr_x, int *restrict ptr_y, int npoints);void DSP_fft32x32_i (    const int * restrict ptr_w,    int npoints,    int * restrict ptr_x,    int * restrict ptr_y){    int * restrict w;    int * restrict x, * restrict x2, * restrict x0;    int i, j, l1, l2, h2, predj, tw_offset, stride, fft_jmp, radix;    int si10, si20, si30, co10, co20, co30;    int si11, si21, si31, co11, co21, co31;    int xt0_0, yt0_0, yt0_1, xt0_1;    int xt1_0, yt2_0, xt1_1, yt2_1, xt2_0, yt1_0, xt2_1, yt1_1;     int p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF;    int p10, p11, p12, p13, p14, p15, p16, p17;    int xh2_0o, xh2_1o, xh2_2o, xh2_3o;    int xl1_0o, xl1_1o, xl1_2o, xl1_3o;    int xl2_0o, xl2_1o, xl2_2o, xl2_3o;    double ydword0, ydword1, yl1dword0, yl1dword1, yh2dword0, yh2dword1;    double x_10, x_32;    double x_l1_10, x_l1_32, x_l2_10, x_l2_32, x_h2_10,x_h2_32;    double co10_si10, co20_si20, co30_si30;    double co11_si11, co21_si21, co31_si31;    long long xh20_0_xl20_0, xh21_0_xl21_0, xh20_1_xl20_1, xh21_1_xl21_1;     long long xt1_0_xt2_0, yt2_0_yt1_0, xt1_1_xt2_1, yt2_1_yt1_1;     long long xh0_0_xl0_0, xh1_0_xl1_0, xh0_1_xl0_1, xh1_1_xl1_1;     long long x_0o_xt0_0, x_1o_yt0_0, x_2o_xt0_1, x_3o_yt0_1;    /*---------------------------------------------------------------------*/    /* Determine the magnitude od the number of points to be transformed.  */    /* Check whether we can use a radix4 decomposition or a mixed radix    */    /* transformation, by determining modulo 2.                            */    /*---------------------------------------------------------------------*/    radix = _norm(npoints) & 1 ? 2 :  4;    /*----------------------------------------------------------------------*/    /* The stride is quartered with every iteration of the outer loop. It   */    /* denotes the seperation between any two adjacent inputs to the butter */    /* -fly. This should start out at N/4, hence stride is initially set to */    /* N. For every stride, 6*stride twiddle factors are accessed. The      */    /* "tw_offset" is the offset within the current twiddle factor sub-     */    /* table. This is set to zero, at the start of the code and is used to  */    /* obtain the appropriate sub-table twiddle pointer by offseting it     */    /* with the base pointer "ptr_w".                                       */    /*----------------------------------------------------------------------*/     stride = npoints;    tw_offset = 0;    fft_jmp = 6 * stride;    _nassert(stride > 4);    #pragma MUST_ITERATE(1,,1);    while (stride > radix) {        /*-----------------------------------------------------------------*/        /* At the start of every iteration of the outer loop, "j" is set   */        /* to zero, as "w" is pointing to the correct location within the  */        /* twiddle factor array. For every iteration of the inner loop     */        /* 6 * stride twiddle factors are accessed. For eg,                */        /*                                                                 */        /* #Iteration of outer loop  # twiddle factors    #times cycled    */        /*  1                          6 N/4               1               */        /*  2                          6 N/16              4               */        /*  ...                                                            */        /*-----------------------------------------------------------------*/          j =  0;        fft_jmp >>= 2;        /*-----------------------------------------------------------------*/        /* Set up offsets to access "N/4", "N/2", "3N/4" complex point or  */        /* "N/2", "N", "3N/2" half word                                    */        /*-----------------------------------------------------------------*/         h2 = stride >> 1;        l1 = stride;        l2 = stride + (stride >> 1);        /*-----------------------------------------------------------------*/        /*  Reset "x" to point to the start of the input data array.       */        /* "tw_offset" starts off at 0, and increments by "6 * stride"     */        /*  The stride quarters with every iteration of the outer loop     */        /*-----------------------------------------------------------------*/        x = ptr_x;        w = (int *)ptr_w + tw_offset;        tw_offset += fft_jmp;        stride >>= 2;        /*----------------------------------------------------------------*/        /* The following loop iterates through the different butterflies, */        /* within a given stage. Recall that there are logN to base 4     */        /* stages. Certain butterflies share the twiddle factors. These   */        /* are grouped together. On the very first stage there are no     */        /* butterflies that share the twiddle factor, all N/4 butter-     */        /* flies have different factors. On the next stage two sets of    */        /* N/8 butterflies share the same twiddle factor. Hence after     */        /* half the butterflies are performed, j the index into the       */        /* factor array resets to 0, and the twiddle factors are reused.  */        /* When this happens, the data pointer 'x' is incremented by the  */        /* fft_jmp amount.                                                */        /*----------------------------------------------------------------*/        _nassert((int)(w) % 8 == 0);        _nassert((int)(x) % 8 == 0);        _nassert(h2 % 8 == 0);        _nassert(l1 % 8 == 0);        _nassert(l2 % 8 == 0);        _nassert(npoints >= 16);        #pragma MUST_ITERATE(16,,16);        for (i = 0; i < (npoints >> 3); i++) {            /*------------------------------------------------------------*/            /*  Read the first six twiddle factor values. This loop       */            /*  computes two radix 4 butterflies at a time.               */            /* si10 = w[0] co10 = w[1]  si20 = w[2]  co20 = w[3]          */            /* si30 = w[4] co30 = w[5]  si11 = w[6]  co11 = w[7]          */            /* si21 = w[8] co21 = w[9]  si31 = w[a]  co31 = w[b]          */            /*------------------------------------------------------------*/            co10_si10 = _amemd8_const(&w[j]);            co20_si20 = _amemd8_const(&w[j + 2]);            co30_si30 = _amemd8_const(&w[j + 4]);            co11_si11 = _amemd8_const(&w[j + 6]);            co21_si21 = _amemd8_const(&w[j + 8]);            co31_si31 = _amemd8_const(&w[j + 10]);            si10 = _lo(co10_si10);            co10 = _hi(co10_si10);            si20 = _lo(co20_si20);            co20 = _hi(co20_si20);            si30 = _lo(co30_si30);            co30 = _hi(co30_si30);            si11 = _lo(co11_si11);            co11 = _hi(co11_si11);            si21 = _lo(co21_si21);            co21 = _hi(co21_si21);            si31 = _lo(co31_si31);            co31 = _hi(co31_si31);            /*-------------------------------------------------------------*/            /* Read in the data elements for the eight inputs of two       */            /* radix4 butterflies.                                         */            /*  x[0]       x[1]       x[2]       x[3]                      */            /*  x[h2+0]    x[h2+1]    x[h2+2]    x[h2+3]                   */            /*  x[l1+0]    x[l1+1]    x[l1+2]    x[l1+3]                   */            /*  x[l2+0]    x[l2+1]    x[l2+2]    x[l2+3]                   */            /*-------------------------------------------------------------*/
            x_10    = _amemd8(&x[0]);            x_32    = _amemd8(&x[2]);            x_l1_10 = _amemd8(&x[l1]);            x_l1_32 = _amemd8(&x[l1 + 2]);            x_l2_10 = _amemd8(&x[l2]);            x_l2_32 = _amemd8(&x[l2 + 2]);            x_h2_10 = _amemd8(&x[h2]);            x_h2_32 = _amemd8(&x[h2 + 2]);
            /*-------------------------------------------------------------*/            /* xh0_0 = x[0] + x[l1];    xh1_0 = x[1] + x[l1+1]             */            /* xh0_1 = x[2] + x[l1+2];  xh1_1 = x[3] + x[l1+3]             */            /* xl0_0 = x[0] - x[l1];    xl1_0 = x[1] - x[l1+1]             */            /* xl0_1 = x[2] - x[l1+2];  xl1_1 = x[3] - x[l1+3]             */            /*-------------------------------------------------------------*/            xh0_0_xl0_0 = _addsub(_lo(x_10), _lo(x_l1_10));            xh1_0_xl1_0 = _addsub(_hi(x_10), _hi(x_l1_10));            xh0_1_xl0_1 = _addsub(_lo(x_32), _lo(x_l1_32));
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -