dsp_fft16x16r.h

来自「dm642函数库」· C头文件 代码 · 共 702 行 · 第 1/4 页

H
702
字号
/*                 xh20 = x[2 * i1    ] + x[2 * i3    ];                     */
/*                 xh21 = x[2 * i1 + 1] + x[2 * i3 + 1];                     */
/*                 xl20 = x[2 * i1    ] - x[2 * i3    ];                     */
/*                 xl21 = x[2 * i1 + 1] - x[2 * i3 + 1];                     */
/*                                                                           */
/*                 x[2 * i0    ] = (xh0 + xh20 + 1)>>1;                      */
/*                 x[2 * i0 + 1] = (xh1 + xh21 + 1)>>1;                      */
/*                                                                           */
/*                 xt0  = xh0 - xh20;                                        */
/*                 yt0  = xh1 - xh21;                                        */
/*                 xt1  = xl0 + xl21;                                        */
/*                 yt2  = xl1 + xl20;                                        */
/*                 xt2  = xl0 - xl21;                                        */
/*                 yt1  = xl1 - xl20;                                        */
/*                                                                           */
/*                 x[2 * i2    ]= (xt1 * co1 + yt1 * si1 + 0x00008000)>> 16; */
/*                 x[2 * i2 + 1]= (yt1 * co1 - xt1 * si1 + 0x00008000)>> 16; */
/*                 x[2 * i1    ]= (xt0 * co2 + yt0 * si2 + 0x00008000)>> 16; */
/*                 x[2 * i1 + 1]= (yt0 * co2 - xt0 * si2 + 0x00008000)>> 16; */
/*                 x[2 * i3    ]= (xt2 * co3 + yt2 * si3 + 0x00008000)>> 16; */
/*                 x[2 * i3 + 1]= (yt2 * co3 - xt2 * si3 + 0x00008000)>> 16; */
/*             }                                                             */
/*         }                                                                 */
/*                                                                           */
/*         ie <<= 2;                                                         */
/*    }                                                                      */
/*                                                                           */
/*    j = 0;                                                                 */
/*    ptr_x0 = x;                                                            */
/*    y0 = y;                                                                */
/*    l0 = _norm(n) - 17;                                                    */
/*                                                                           */
/*    if(radix == 2 || radix  == 4) for (i = 0; i < n; i += 4)               */
/*    {                                                                      */
/*                                                                           */
/*                                                                           */
/*            j0 = (j     ) & 0x3F;                                          */
/*            j1 = (j >> 6) & 0x3F;                                          */
/*            k0 = brev[j0];                                                 */
/*            k1 = brev[j1];                                                 */
/*            k = (k0 << 6) |  k1;                                           */
/*            if (l0 < 0) k = k << -l0;                                      */
/*            else        k = k >> l0;                                       */
/*            j++;                                                           */
/*                                                                           */
/*            x0   = ptr_x0[0];  x1 = ptr_x0[1];                             */
/*            x2   = ptr_x0[2];  x3 = ptr_x0[3];                             */
/*            x4   = ptr_x0[4];  x5 = ptr_x0[5];                             */
/*            x6   = ptr_x0[6];  x7 = ptr_x0[7];                             */
/*            ptr_x0 += 8;                                                   */
/*                                                                           */
/*            xh0_0  = x0 + x4;                                              */
/*            xh1_0  = x1 + x5;                                              */
/*            xh0_1  = x2 + x6;                                              */
/*            xh1_1  = x3 + x7;                                              */
/*                                                                           */
/*            if (radix == 2)                                                */
/*            {                                                              */
/*                xh0_0 = x0;                                                */
/*                xh1_0 = x1;                                                */
/*                xh0_1 = x2;                                                */
/*                xh1_1 = x3;                                                */
/*            }                                                              */
/*                                                                           */
/*            yt0  = xh0_0 + xh0_1;                                          */
/*            yt1  = xh1_0 + xh1_1;                                          */
/*            yt4  = xh0_0 - xh0_1;                                          */
/*            yt5  = xh1_0 - xh1_1;                                          */
/*                                                                           */
/*            xl0_0  = x0 - x4;                                              */
/*            xl1_0  = x1 - x5;                                              */
/*            xl0_1  = x2 - x6;                                              */
/*            xl1_1  = x3 - x7;                                              */
/*                                                                           */
/*            if (radix == 2)                                                */
/*            {                                                              */
/*                  xl0_0 = x4;                                              */
/*                  xl1_0 = x5;                                              */
/*                  xl1_1 = x6;                                              */
/*                  xl0_1 = x7;                                              */
/*            }                                                              */
/*                                                                           */
/*            yt2  = xl0_0 + xl1_1;                                          */
/*            yt3  = xl1_0 - xl0_1;                                          */
/*                                                                           */
/*            yt6  = xl0_0 - xl1_1;                                          */
/*            yt7  = xl1_0 + xl0_1;                                          */
/*                                                                           */
/*            if (radix == 2)                                                */
/*            {                                                              */
/*                 yt7  = xl1_0 - xl0_1;                                     */
/*                 yt3  = xl1_0 + xl0_1;                                     */
/*            }                                                              */
/*                                                                           */
/*            y0[k] = yt0; y0[k+1] = yt1;                                    */
/*            k += n>>1                                                      */
/*            y0[k] = yt2; y0[k+1] = yt3;                                    */
/*            k += n>>1;                                                     */
/*            y0[k] = yt4; y0[k+1] = yt5;                                    */
/*            k += n>>1;                                                     */
/*            y0[k] = yt6; y0[k+1] = yt7;                                    */
/*       }                                                                   */
/*   }                                                                       */
/*                                                                           */
/*       Although code shown above is the simplest equivalent way of writing */
/*       this code, it already carries several optimization ideas. It has    */
/*       a special last stage to avoid multiplication by 1. In addition it   */
/*       was shown by Panos Papamichalis that if the two middle legs of a    */
/*       radix 4 butterfly are reversed, the outputs for a radix4 transform  */
/*       end up in the bit reversed fashion. The code also carries a linear  */
/*       time look up table for bit reversal. This can be used as shown in   */
/*       the code to construct a bit reversed index. The last stage perfo-   */
/*       rms either a radix4 or radix2 as the case may be.                   */
/*                                                                           */
/*       The code shown below performs loop coalescing as it is realized     */
/*       that while the "i" and "j" loop individually iterate for variable   */
/*       number of times, together they always iterate for N/4 times. The    */
/*       natural C code and the code shown below use a modified twiddle      */
/*       factor array to allow for vectorization of the loop. In addition    */
/*       bit-reversal is performed by a macro BIT_REV. This makes the bit-   */
/*       reversal table redundant.                                           */
/*                                                                           */
/*       This is the C equivalent of the assembly code without restrictions: */
/*       Note that the assembly code is hand optimized and restrictions may  */
/*       apply.                                                              */
/*                                                                           */
/*                                                                           */
/*      void DSP_fft16x16r(int n, short ptr_x[], short ptr_w[], short ptr_y[] */
/*                   unsigned char brev[], int n_min, int offset, int n_max) */
/*      {                                                                    */
/*         int  i, j, k, l1, l2, h2, predj;                                  */
/*         int  tw_offset, stride, fft_jmp;                                  */
/*                                                                           */
/*         short x0, x1, x2, x3,x4,x5,x6,x7;                                 */
/*         short xt0, yt0, xt1, yt1, xt2, yt2, yt3;                          */
/*         short yt4, yt5, yt6, yt7;                                         */
/*         short si1,si2,si3,co1,co2,co3;                                    */
/*         short xh0,xh1,xh20,xh21,xl0,xl1,xl20,xl21;                        */
/*         short x_0, x_1, x_l1, x_l1p1, x_h2 , x_h2p1, x_l2, x_l2p1;        */
/*         short xl0_0, xl1_0, xl0_1, xl1_1;                                 */
/*         short xh0_0, xh1_0, xh0_1, xh1_1;                                 */
/*         short *x,*w;                                                      */
/*         int   k0, k1, j0, j1, l0, radix;                                  */
/*         short * y0, * ptr_x0, * ptr_x2;                                   */
/*                                                                           */
/*         radix = n_min;                                                    */
/*                                                                           */
/*         stride = n; -* n is the number of complex samples *-              */
/*         tw_offset = 0;                                                    */
/*         while (stride > radix)                                            */
/*         {                                                                 */
/*             j = 0;                                                        */
/*             fft_jmp = stride + (stride>>1);                               */
/*             h2 = stride>>1;                                               */
/*             l1 = stride;                                                  */
/*             l2 = stride + (stride>>1);                                    */
/*             x = ptr_x;                                                    */
/*             w = ptr_w + tw_offset;                                        */
/*                                                                           */
/*             for (i = 0; i < n; i += 4)                                    */
/*             {                                                             */
/*                 co1 = w[j];                                               */
/*                 si1 = w[j+1];                                             */
/*                 co2 = w[j+2];                                             */
/*                 si2 = w[j+3];                                             */
/*                 co3 = w[j+4];                                             */
/*                 si3 = w[j+5];                                             */
/*                                                                           */
/*                 x_0    = x[0];                                            */
/*                 x_1    = x[1];                                            */
/*                 x_h2   = x[h2];                                           */
/*                 x_h2p1 = x[h2+1];                                         */
/*                 x_l1   = x[l1];                                           */
/*                 x_l1p1 = x[l1+1];                                         */
/*                 x_l2   = x[l2];                                           */
/*                 x_l2p1 = x[l2+1];                                         */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?