dsp_fft16x16r.h

来自「dm642函数库」· C头文件 代码 · 共 702 行 · 第 1/4 页

H
702
字号
/*       DSP_fft16x16r (128, &x_asm[2*256],&w[2*384],y_asm,brev,2, 256,512); */
/*       DSP_fft16x16r (128, &x_asm[2*384],&w[2*384],y_asm,brev,2, 384,512); */
/*                                                                           */
/*       The twiddle factor array is composed of log4(N) sets of twiddle     */
/*       factors, (3/4)*N, (3/16)*N, (3/64)*N, etc.  The index into this     */
/*       array for each stage of the fft is calculated by summing these      */
/*       indices up appropriately.                                           */
/*       For multiple ffts they can share the same table by calling the smal */
/*       ffts from further down in the twiddle factor array. In the same way */
/*       as the decomposition works for more data reuse.                     */
/*                                                                           */
/*       Thus, the above decomposition can be summarized for a general N ,   */
/*       radix "rad" as follows:                                             */
/*                                                                           */
/*  DSP_fft16x16r(N,  &x[0],         &w[0],        brev, y_cn, N/4, 0,    N) */
/*  DSP_fft16x16r(N/4,&x[0],         &w[2*(3*N/4)],brev, y_cn, rad, 0,    N) */
/*  DSP_fft16x16r(N/4,&x[2*(N/4)],   &w[2*(3*N/4)],brev, y_cn, rad, N/4,  N) */
/*  DSP_fft16x16r(N/4,&x[2*(N/2)],   &w[2*(3*N/4)],brev, y_cn, rad, N/2,  N) */
/*  DSP_fft16x16r(N/4,&x[2*(3*N/4)], &w[2*3*N/4)], brev, y_cn, rad, 3*N/4, N) */
/*                                                                           */
/*       As discussed previously, N can be either a power of 4 or 2.  If  N  */
/*       N is a power  of 4, rad = 4, and if N is a power of 2, and not  a   */
/*       power of 4, then rad = 2. "rad" is used to control how many stages  */
/*       of decomposition are performed. It is also used to dtermine whether */
/*       a radix4 or radix2 decomposition should be performed at the last    */
/*       stage. Hence when "rad" is set to "N/4" the first stage of the      */
/*       transform alone is performed and the code exits. To complete the    */
/*       FFT four other calls are required to perform N/4 size FFT's. In     */
/*       fact the ordering of these 4 FFT's amonst themselves does not       */
/*       matter and hence from a cahe perspective it helps to go through     */
/*       the remaining 4 FFT's in exactly the opposite order to the first.   */
/*                                                                           */
/*       This is illustrated as follows:                                     */
/*                                                                           */
/*  DSP_fft16x16r(N,  &x[0],         &w[0],        brev, y_cn, N/4, 0,    N) */
/*  DSP_fft16x16r(N/4,&x[2*(3*N/4)], &w[2*3*N/4)], brev, y_cn, rad, 3*N/4, N) */
/*  DSP_fft16x16r(N/4,&x[2*(N/2)],   &w[2*(3*N/4)],brev, y_cn, rad, N/2,  N) */
/*  DSP_fft16x16r(N/4,&x[2*(N/4)],   &w[2*(3*N/4)],brev, y_cn, rad, N/4,  N) */
/*  DSP_fft16x16r(N/4,&x[0],         &w[2*(3*N/4)],brev, y_cn, rad, 0,    N) */
/*                                                                           */
/*       In addition this function can be used to minimize call overhead, by */
/*       completing the FFT with one function call invocation as shown below */
/*                                                                           */
/*       DSP_fft16x16r(N, &x_cn[0], &w[0], y_cn, brev, rad, 0, N)            */
/*                                                                           */
/*   ASSUMPTIONS:                                                            */
/*       n must be a power of 2 and n >= 8  n <= 16384 points.               */
/*       Complex time data x and twiddle facotrs w are aligned on double     */
/*       word boundares. Real values are stored in even word positions and   */
/*       imaginary values in odd positions.                                  */
/*                                                                           */
/*       All data is in short precision integer fixed point form. The        */
/*       complex frequency data will be returned in linear order.            */
/*                                                                           */
/*       If Interupts are required the decomposition can be used to allow    */
/*       interupts to occur in between function calls. In this way interupts */
/*       Can occur roughly every 20%  of the time through the function.      */
/*                                                                           */
/*   MEMORY NOTE:                                                            */
/*       Configuration is LITTLE ENDIAN the code will not function if the -m */
/*       flag is enabled but it can be modified for BIG ENDIAN usage.        */
/*                                                                           */
/*   TECHNIQUES                                                              */
/*       A special sequence of coeffs. used as generated above               */
/*       produces the fft. This collapses the inner 2 loops in the           */
/*       taditional Burrus and Parks implementation Fortran Code.            */
/*                                                                           */
/*       The revised FFT uses a redundant sequence of twiddle factors to     */
/*       allow a linear access through the data. This linear access enables  */
/*       data and instruction level parallelism.                             */
/*       The data produced by the DSP_fft16x16r fft is in normal form, the   */
/*       whole data array is written into a new output buffer.               */
/*                                                                           */
/*       The DSP_fft16x16r butterfly is bit reversed, i.e. the inner 2 points of */
/*       the butterfly are corssed over, this has the effect of making the   */
/*       data come out in bit reversed rather than in radix 4 digit reversed */
/*       order. This simplifies the last pass of the loop. A simple table    */
/*       is used to do the bit reversal out of place.                        */
/*                                                                           */
/*           unsigned char brev[64] = {                                      */
/*                 0x0, 0x20, 0x10, 0x30, 0x8, 0x28, 0x18, 0x38,             */
/*                 0x4, 0x24, 0x14, 0x34, 0xc, 0x2c, 0x1c, 0x3c,             */
/*                 0x2, 0x22, 0x12, 0x32, 0xa, 0x2a, 0x1a, 0x3a,             */
/*                 0x6, 0x26, 0x16, 0x36, 0xe, 0x2e, 0x1e, 0x3e,             */
/*                 0x1, 0x21, 0x11, 0x31, 0x9, 0x29, 0x19, 0x39,             */
/*                 0x5, 0x25, 0x15, 0x35, 0xd, 0x2d, 0x1d, 0x3d,             */
/*                 0x3, 0x23, 0x13, 0x33, 0xb, 0x2b, 0x1b, 0x3b,             */
/*                 0x7, 0x27, 0x17, 0x37, 0xf, 0x2f, 0x1f, 0x3f              */
/*           };                                                              */
/*                                                                           */
/*        This function no longer uses the lookup table to perform bit       */
/*        reversal. It is performed using the _bitr instruction on C64x      */
/*        architecture. It is performed using a macro BIT_REV instead.       */
/*                                                                           */
/*   NOTES                                                                   */
/*       For more aggressive overflow control the shift in the DC term can b */
/*       adjusted to 2 and the twiddle factors shifted right by 1. This give */
/*       a divide by 4 at each stage. For better accuracy the data can be pr */
/*       asserted left by so many bits so that as it builds in magnitude the */
/*       divide by 2 prevents too much growth. An optimal point for example  */
/*       with an 8192pt fft with input data precision of 8 bits is to asert  */
/*       the input 4 bits left to make it 12 bits. This gives an SNR of 68dB */
/*       at the output. By trying combinations the optimal can be found.     */
/*       If scaling isnot required it is possible to replace the MPY by SMPY */
/*       this will give a shift left by 1 so a shift right by 16 gives a     */
/*       total 15 bit shift right. The DC term must be adjusted to give a    */
/*       zero shift.                                                         */
/*                                                                           */
/*   C CODE                                                                  */
/*       The following code is the traditional Burrus and Parks implemen-    */
/*       tation, which performs a mixed radix FFT capable of 2^M, 4^M.       */
/*       However it does not support multiple calls. It uses a traditional   */
/*       twiddle factor array wn, generated as follows:                      */
/*                                                                           */
/*       const double M = 32767.0;                                           */
/*       const double PI = 3  41592654;                                      */
/*                                                                           */
/*       for (i=0, k = 0; i < 3*(N>>2); i++)                                 */
/*       {                                                                   */
/*          theta1 = 2*PI*i/N;                                               */
/*          x_t = M*cos(theta1);                                             */
/*          y_t = M*sin(theta1);                                             */
/*          wn[k]   = (short) x_t;                                           */
/*          if (x_t >= M) wn[k  ] = 0x7fff;                                  */
/*          wn[k+1] = (short) y_t;                                           */
/*          if (y_t >= M) wn[k+1] = 0x7fff;                                  */
/*          k+=2;                                                            */
/*       }                                                                   */
/*                                                                           */
/*       The C code that implements the traditional mixed radix FFT is       */
/*       shown below. It has three nested loops, one for the stages,         */
/*       one for the groups of butterflies, one for the passes.              */
/*                                                                           */
/* void DSP_fft16x16r(int n, short x[], short wn[],                          */
/*       unsigned char brev[], short y[], int radix, int offset, int nmax)   */
/* {                                                                         */
/*     int    n1, n2, ie, ia1, ia2, ia3, i0, i1, i2, i3, i, l0;              */
/*     short  co1, co2, co3, si1, si2, si3;                                  */
/*     short  xt0, yt0, xt1, yt1, xt2, yt2;                                  */
/*     short  xh0,xh1,xh20,xh21,xl0,xl1,xl20,xl21;                           */
/*     short * ptr_x0, * y0;                                                 */
/*     unsigned int  j0, j1, k0, k1, k, j;                                   */
/*     short x0, x1, x2, x3, x4, x5, x6, x7;                                 */
/*     short xh0_0, xh1_0, xh0_1, xh1_1;                                     */
/*     short xl0_0, xl1_0, xl0_1, xl1_1;                                     */
/*     short yt3, yt4, yt5, yt6, yt7;                                        */
/*                                                                           */
/*     n2 = n;                                                               */
/*     ie = 1;                                                               */
/*     for (k = n; k > radix; k >>= 2)                                       */
/*     {                                                                     */
/*         n1 = n2;                                                          */
/*         n2 >>= 2;                                                         */
/*         ia1 = 0;                                                          */
/*         for (j = 0; j < n2; j++)                                          */
/*         {                                                                 */
/*             ia2 = ia1 + ia1;                                              */
/*             ia3 = ia2 + ia1;                                              */
/*             co1 = w[2 * ia1    ];                                         */
/*             si1 = w[2 * ia1 + 1];                                         */
/*             co2 = w[2 * ia2    ];                                         */
/*             si2 = w[2 * ia2 + 1];                                         */
/*             co3 = w[2 * ia3    ];                                         */
/*             si3 = w[2 * ia3 + 1];                                         */
/*             ia1 = ia1 + ie;                                               */
/*             for (i0 = j; i0 < n; i0 += n1)                                */
/*             {                                                             */
/*                 i1 = i0 + n2;                                             */
/*                 i2 = i1 + n2;                                             */
/*                 i3 = i2 + n2;                                             */
/*                                                                           */
/*                 xh0  = x[2 * i0    ] + x[2 * i2    ];                     */
/*                 xh1  = x[2 * i0 + 1] + x[2 * i2 + 1];                     */
/*                 xl0  = x[2 * i0    ] - x[2 * i2    ];                     */
/*                 xl1  = x[2 * i0 + 1] - x[2 * i2 + 1];                     */
/*                                                                           */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?