📄 dsp_fft32x32.c

📁 TI c64x的FFT程序
💻 C
📖 第 1 页 / 共 4 页
字号:
            xh1_1_xl1_1 = _addsub(_hi(x_32), _hi(x_l1_32));            /*------------------------------------------------------------*/            /* xh20_0 = x[h2  ] + x[l2  ]; xh21_0 = x[h2+1] + x[l2+1]     */            /* xh20_1 = x[h2+2] + x[l2+2]; xh21_1 = x[h2+3] + x[l2+3]     */            /* xl20_0 = x[h2  ] - x[l2  ]; xl21_0 = x[h2+1] - x[l2+1]     */            /* xl20_1 = x[h2+2] - x[l2+2]; xl21_1 = x[h2+3] - x[l2+3]     */            /*------------------------------------------------------------*/            xh20_0_xl20_0 = _addsub(_lo(x_h2_10), _lo(x_l2_10));            xh21_0_xl21_0 = _addsub(_hi(x_h2_10), _hi(x_l2_10));            xh20_1_xl20_1 = _addsub(_lo(x_h2_32), _lo(x_l2_32));            xh21_1_xl21_1 = _addsub(_hi(x_h2_32), _hi(x_l2_32));            /*-------------------------------------------------------------*/            /*    x_0o  = xh0_0 + xh20_0;    x_1o  = xh1_0 + xh21_0;       */            /*    x_2o  = xh0_1 + xh20_1;    x_3o  = xh1_1 + xh21_1;       */            /*    xt0_0 = xh0_0 - xh20_0;    yt0_0 = xh1_0 - xh21_0;       */            /*    xt0_1 = xh0_1 - xh20_1;    yt0_1 = xh1_1 - xh21_1;       */            /*-------------------------------------------------------------*/            x_0o_xt0_0 = _addsub(_hill(xh0_0_xl0_0), _hill(xh20_0_xl20_0));            x_1o_yt0_0 = _addsub(_hill(xh1_0_xl1_0), _hill(xh21_0_xl21_0));            x_2o_xt0_1 = _addsub(_hill(xh0_1_xl0_1), _hill(xh20_1_xl20_1));            x_3o_yt0_1 = _addsub(_hill(xh1_1_xl1_1), _hill(xh21_1_xl21_1));            /*-------------------------------------------------------------*/            /*    xt1_0 = xl0_0 + xl21_0;    yt2_0 = xl1_0 + xl20_0;       */            /*    xt1_1 = xl0_1 + xl21_1;    yt2_1 = xl1_1 + xl20_1;       */            /*    xt2_0 = xl0_0 - xl21_0;    yt1_0 = xl1_0 - xl20_0;       */            /*    xt2_1 = xl0_1 - xl21_1;    yt1_1 = xl1_1 - xl20_1;       */            /*-------------------------------------------------------------*/            xt1_0_xt2_0 = _addsub(_loll(xh0_0_xl0_0), _loll(xh21_0_xl21_0));            yt2_0_yt1_0 = _addsub(_loll(xh1_0_xl1_0), _loll(xh20_0_xl20_0));            xt1_1_xt2_1 = _addsub(_loll(xh0_1_xl0_1), _loll(xh21_1_xl21_1));            yt2_1_yt1_1 = _addsub(_loll(xh1_1_xl1_1), _loll(xh20_1_xl20_1));            xt0_0 = _loll(x_0o_xt0_0);            xt0_1 = _loll(x_2o_xt0_1);            yt0_0 = _loll(x_1o_yt0_0);            yt0_1 = _loll(x_3o_yt0_1);            xt1_0 = _hill(xt1_0_xt2_0);            xt1_1 = _hill(xt1_1_xt2_1);            yt1_0 = _loll(yt2_0_yt1_0);            yt1_1 = _loll(yt2_1_yt1_1);            xt2_0 = _loll(xt1_0_xt2_0);            xt2_1 = _loll(xt1_1_xt2_1);            yt2_0 = _hill(yt2_0_yt1_0);            yt2_1 = _hill(yt2_1_yt1_1);            /*-------------------------------------------------------------*/            /*   x2[h2  ] = (si10 * yt1_0 + co10 * xt1_0) >> 15            */            /*   x2[h2+1] = (co10 * yt1_0 - si10 * xt1_0) >> 15            */            /*   x2[h2+2] = (si11 * yt1_1 + co11 * xt1_1) >> 15            */            /*   x2[h2+3] = (co11 * yt1_1 - si11 * xt1_1) >> 15            */            /*-------------------------------------------------------------*/
            p0 = _smpy32(si10, yt1_0);            p1 = _smpy32(co10, xt1_0);            xh2_0o = p0 + p1;            p2 = _smpy32(co10, yt1_0);            p3 = _smpy32(si10, xt1_0);            xh2_1o = p2 - p3;            p4 = _smpy32(si11, yt1_1);            p5 = _smpy32(co11, xt1_1);            xh2_2o = p4 + p5;            p6 = _smpy32(co11, yt1_1);            p7 = _smpy32(si11, xt1_1);            xh2_3o = p6 - p7;            /*-------------------------------------------------------------*/            /*   x2[l1  ] = (si20 * yt0_0 + co20 * xt0_0) >> 15            */            /*   x2[l1+1] = (co20 * yt0_0 - si20 * xt0_0) >> 15            */            /*   x2[l1+2] = (si21 * yt0_1 + co21 * xt0_1) >> 15            */            /*   x2[l1+3] = (co21 * yt0_1 - si21 * xt0_1) >> 15            */            /*-------------------------------------------------------------*/            p8 = _smpy32(si20, yt0_0);            p9 = _smpy32(co20, xt0_0);            xl1_0o = p8 + p9;            pA = _smpy32(co20, yt0_0);            pB = _smpy32(si20, xt0_0);            xl1_1o = pA - pB;            pC = _smpy32(si21, yt0_1);            pD = _smpy32(co21, xt0_1);            xl1_2o = pC + pD;            pE = _smpy32(co21, yt0_1);            pF = _smpy32(si21, xt0_1);            xl1_3o = pE - pF;            /*-------------------------------------------------------------*/            /*   x2[l2  ] = (si30 * yt2_0 + co30 * xt2_0) >> 15            */            /*   x2[l2+1] = (co30 * yt2_0 - si30 * xt2_0) >> 15            */            /*   x2[l2+2] = (si31 * yt2_1 + co31 * xt2_1) >> 15            */            /*   x2[l2+3] = (co31 * yt2_1 - si31 * xt2_1) >> 15            */            /*-------------------------------------------------------------*/            p10 = _smpy32(si30, yt2_0);            p11 = _smpy32(co30, xt2_0);            xl2_0o = p10 + p11;            p12 = _smpy32(co30, yt2_0);            p13 = _smpy32(si30, xt2_0);            xl2_1o = p12 - p13;            p14 = _smpy32(si31, yt2_1);            p15 = _smpy32(co31, xt2_1);            xl2_2o = p14 + p15;            p16 = _smpy32(co31, yt2_1);            p17 = _smpy32(si31, xt2_1);            xl2_3o = p16 - p17;            /*-------------------------------------------------------------*/            /*  Combine outputs in double words                            */            /*-------------------------------------------------------------*/            ydword0 = _itod(_hill(x_1o_yt0_0), _hill(x_0o_xt0_0));            ydword1 = _itod(_hill(x_3o_yt0_1), _hill(x_2o_xt0_1));            yh2dword0 = _itod(xh2_1o, xh2_0o);            yh2dword1 = _itod(xh2_3o, xh2_2o);            yl1dword0 = _itod(xl1_1o, xl1_0o);            yl1dword1 = _itod(xl1_3o, xl1_2o);            /*-----------------------------------------------------------*/            /* Derive output pointers using the input pointer "x"        */            /*-----------------------------------------------------------*/            x0 = x;            x2 = x0;            /*-------------------------------------------------------------*/            /*  Store eight outputs - four legs of each butterfly          */            /*-------------------------------------------------------------*/            _amemd8(&x2[0])    = ydword0;            _amemd8(&x2[2])    = ydword1;            _amemd8(&x2[h2])   = yh2dword0;            _amemd8(&x2[h2+2]) = yh2dword1;            _amemd8(&x2[l1])   = yl1dword0;            _amemd8(&x2[l1+2]) = yl1dword1;            _amemd8(&x2[l2])   = _itod(xl2_1o, xl2_0o);            _amemd8(&x2[l2+2]) = _itod(xl2_3o, xl2_2o);            /*-----------------------------------------------------------*/            /* When the twiddle factors are not to be re-used, j is      */            /* incremented by 12, to reflect the fact that 6 words       */            /* are consumed in every iteration. The input data pointer   */            /* increments by 4. Note that within a stage, the stride     */            /* does not change and hence the offsets for the other three */            /* legs, 0, h2, l1, l2.                                      */            /*-----------------------------------------------------------*/
            j += 12;            x += 4;            predj = (j - fft_jmp);            if (!predj) x += fft_jmp;            if (!predj) j = 0;
        }    }    if (radix == 2)        radix_2(ptr_x, ptr_y, npoints);    else if (radix == 4)        radix_4(ptr_x, ptr_y, npoints);    return;}void radix_2 (    int * restrict ptr_x,    int * restrict ptr_y,    int npoints){    int n0, j0;    int * restrict x2, * restrict x0;    int * restrict y0, * restrict y1, * restrict y2, * restrict y3;    int i, j, l1, h2;    double n0_dword0, n0_dword1, n0_dword2, n0_dword3;    double n1_dword0, n1_dword1, n1_dword2, n1_dword3;    double x_10, x_32, x_54, x_76, x_98, x_BA, x_DC, x_FE;    long long n00_n20, n01_n21, n10_n30, n11_n31;    long long n02_n22, n03_n23, n12_n32, n13_n33;    /*-----------------------------------------------------------------*/    /* The following code performs a standard radix2 pass.             */    /* Two pointers are used to access the input data.                 */    /* The input data is read "N/4" complex samples apart or "N/2"     */    /* words apart using pointers "x0" and "x2". This produces out-    */    /* puts that are 0, N/8, N/2, 3N/8 for radix 2.                    */    /*-----------------------------------------------------------------*/    y0 = ptr_y;    y2 = ptr_y + (int)npoints;    x0 = ptr_x;    x2 = ptr_x + (int)(npoints >> 1);    /*------------------------------------------------------------*/    /* The pointers are set at the following locations which are  */    /* half the offsets of a radix4 FFT.                          */    /*------------------------------------------------------------*/    y1  = y0 + (int)(npoints >> 2);    y3  = y2 + (int)(npoints >> 2);    l1  = _norm(npoints) + 1;    j0  = 8;    n0  = npoints >> 1;    /*-----------------------------------------------------------------*/    /* The following code reads data indentically for either a radix 4 */    /* or a radix 2 style decomposition. It writes out at different    */    /* locations though. It checks if either half the points, or a     */    /* quarter of the complex points have been exhausted to jump to    */    /* pervent double reversal.                                        */    /*-----------------------------------------------------------------*/    j = 0;    _nassert((int)(n0) % 4 == 0);    _nassert((int)(ptr_x) % 8 == 0);    _nassert((int)(ptr_y) % 8 == 0);    _nassert((int)(x0) % 8 == 0);    _nassert((int)(x2) % 8 == 0);    _nassert((int)(y0) % 8 == 0);    #pragma MUST_ITERATE(2,,2);    for (i = 0; i < npoints; i += 8) {        /*-------------------------------------------------------------*/        /* Digit reverse the index starting from 0. The increment to   */        /* "j" is either by 4, or 8.                                   */        /*-------------------------------------------------------------*/        DIG_REV(j, l1, h2);        /*-------------------------------------------------------------*/        /* Read in the input data, from the first eight locations.     */        /* These are transformed as a radix2.                          */        /*-------------------------------------------------------------*/        x_10 = _amemd8(&x0[0]);        x_32 = _amemd8(&x0[2]);        x_54 = _amemd8(&x0[4]);        x_76 = _amemd8(&x0[6]);        x0 += 8;        /*--------------------------------------------------------*/        /* Perform radix2 style decomposition.                    */        /*    n00 = x_0 + x_2;    n01 = x_1 + x_3;                */        /*    n20 = x_0 - x_2;    n21 = x_1 - x_3;                */        /*    n10 = x_4 + x_6;    n11 = x_5 + x_7;                */        /*    n30 = x_4 - x_6;    n31 = x_5 - x_7;                */        /*--------------------------------------------------------*/        n00_n20 = _addsub(_lo(x_10), _lo(x_32));        n01_n21 = _addsub(_hi(x_10), _hi(x_32));
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -