dsp_fft16x16r.h
来自「dm642函数库」· C头文件 代码 · 共 702 行 · 第 1/4 页
H
702 行
/* DSP_fft16x16r (128, &x_asm[2*256],&w[2*384],y_asm,brev,2, 256,512); */
/* DSP_fft16x16r (128, &x_asm[2*384],&w[2*384],y_asm,brev,2, 384,512); */
/* */
/* The twiddle factor array is composed of log4(N) sets of twiddle */
/* factors, (3/4)*N, (3/16)*N, (3/64)*N, etc. The index into this */
/* array for each stage of the fft is calculated by summing these */
/* indices up appropriately. */
/* For multiple ffts they can share the same table by calling the smal */
/* ffts from further down in the twiddle factor array. In the same way */
/* as the decomposition works for more data reuse. */
/* */
/* Thus, the above decomposition can be summarized for a general N , */
/* radix "rad" as follows: */
/* */
/* DSP_fft16x16r(N, &x[0], &w[0], brev, y_cn, N/4, 0, N) */
/* DSP_fft16x16r(N/4,&x[0], &w[2*(3*N/4)],brev, y_cn, rad, 0, N) */
/* DSP_fft16x16r(N/4,&x[2*(N/4)], &w[2*(3*N/4)],brev, y_cn, rad, N/4, N) */
/* DSP_fft16x16r(N/4,&x[2*(N/2)], &w[2*(3*N/4)],brev, y_cn, rad, N/2, N) */
/* DSP_fft16x16r(N/4,&x[2*(3*N/4)], &w[2*3*N/4)], brev, y_cn, rad, 3*N/4, N) */
/* */
/* As discussed previously, N can be either a power of 4 or 2. If N */
/* N is a power of 4, rad = 4, and if N is a power of 2, and not a */
/* power of 4, then rad = 2. "rad" is used to control how many stages */
/* of decomposition are performed. It is also used to dtermine whether */
/* a radix4 or radix2 decomposition should be performed at the last */
/* stage. Hence when "rad" is set to "N/4" the first stage of the */
/* transform alone is performed and the code exits. To complete the */
/* FFT four other calls are required to perform N/4 size FFT's. In */
/* fact the ordering of these 4 FFT's amonst themselves does not */
/* matter and hence from a cahe perspective it helps to go through */
/* the remaining 4 FFT's in exactly the opposite order to the first. */
/* */
/* This is illustrated as follows: */
/* */
/* DSP_fft16x16r(N, &x[0], &w[0], brev, y_cn, N/4, 0, N) */
/* DSP_fft16x16r(N/4,&x[2*(3*N/4)], &w[2*3*N/4)], brev, y_cn, rad, 3*N/4, N) */
/* DSP_fft16x16r(N/4,&x[2*(N/2)], &w[2*(3*N/4)],brev, y_cn, rad, N/2, N) */
/* DSP_fft16x16r(N/4,&x[2*(N/4)], &w[2*(3*N/4)],brev, y_cn, rad, N/4, N) */
/* DSP_fft16x16r(N/4,&x[0], &w[2*(3*N/4)],brev, y_cn, rad, 0, N) */
/* */
/* In addition this function can be used to minimize call overhead, by */
/* completing the FFT with one function call invocation as shown below */
/* */
/* DSP_fft16x16r(N, &x_cn[0], &w[0], y_cn, brev, rad, 0, N) */
/* */
/* ASSUMPTIONS: */
/* n must be a power of 2 and n >= 8 n <= 16384 points. */
/* Complex time data x and twiddle facotrs w are aligned on double */
/* word boundares. Real values are stored in even word positions and */
/* imaginary values in odd positions. */
/* */
/* All data is in short precision integer fixed point form. The */
/* complex frequency data will be returned in linear order. */
/* */
/* If Interupts are required the decomposition can be used to allow */
/* interupts to occur in between function calls. In this way interupts */
/* Can occur roughly every 20% of the time through the function. */
/* */
/* MEMORY NOTE: */
/* Configuration is LITTLE ENDIAN the code will not function if the -m */
/* flag is enabled but it can be modified for BIG ENDIAN usage. */
/* */
/* TECHNIQUES */
/* A special sequence of coeffs. used as generated above */
/* produces the fft. This collapses the inner 2 loops in the */
/* taditional Burrus and Parks implementation Fortran Code. */
/* */
/* The revised FFT uses a redundant sequence of twiddle factors to */
/* allow a linear access through the data. This linear access enables */
/* data and instruction level parallelism. */
/* The data produced by the DSP_fft16x16r fft is in normal form, the */
/* whole data array is written into a new output buffer. */
/* */
/* The DSP_fft16x16r butterfly is bit reversed, i.e. the inner 2 points of */
/* the butterfly are corssed over, this has the effect of making the */
/* data come out in bit reversed rather than in radix 4 digit reversed */
/* order. This simplifies the last pass of the loop. A simple table */
/* is used to do the bit reversal out of place. */
/* */
/* unsigned char brev[64] = { */
/* 0x0, 0x20, 0x10, 0x30, 0x8, 0x28, 0x18, 0x38, */
/* 0x4, 0x24, 0x14, 0x34, 0xc, 0x2c, 0x1c, 0x3c, */
/* 0x2, 0x22, 0x12, 0x32, 0xa, 0x2a, 0x1a, 0x3a, */
/* 0x6, 0x26, 0x16, 0x36, 0xe, 0x2e, 0x1e, 0x3e, */
/* 0x1, 0x21, 0x11, 0x31, 0x9, 0x29, 0x19, 0x39, */
/* 0x5, 0x25, 0x15, 0x35, 0xd, 0x2d, 0x1d, 0x3d, */
/* 0x3, 0x23, 0x13, 0x33, 0xb, 0x2b, 0x1b, 0x3b, */
/* 0x7, 0x27, 0x17, 0x37, 0xf, 0x2f, 0x1f, 0x3f */
/* }; */
/* */
/* This function no longer uses the lookup table to perform bit */
/* reversal. It is performed using the _bitr instruction on C64x */
/* architecture. It is performed using a macro BIT_REV instead. */
/* */
/* NOTES */
/* For more aggressive overflow control the shift in the DC term can b */
/* adjusted to 2 and the twiddle factors shifted right by 1. This give */
/* a divide by 4 at each stage. For better accuracy the data can be pr */
/* asserted left by so many bits so that as it builds in magnitude the */
/* divide by 2 prevents too much growth. An optimal point for example */
/* with an 8192pt fft with input data precision of 8 bits is to asert */
/* the input 4 bits left to make it 12 bits. This gives an SNR of 68dB */
/* at the output. By trying combinations the optimal can be found. */
/* If scaling isnot required it is possible to replace the MPY by SMPY */
/* this will give a shift left by 1 so a shift right by 16 gives a */
/* total 15 bit shift right. The DC term must be adjusted to give a */
/* zero shift. */
/* */
/* C CODE */
/* The following code is the traditional Burrus and Parks implemen- */
/* tation, which performs a mixed radix FFT capable of 2^M, 4^M. */
/* However it does not support multiple calls. It uses a traditional */
/* twiddle factor array wn, generated as follows: */
/* */
/* const double M = 32767.0; */
/* const double PI = 3 41592654; */
/* */
/* for (i=0, k = 0; i < 3*(N>>2); i++) */
/* { */
/* theta1 = 2*PI*i/N; */
/* x_t = M*cos(theta1); */
/* y_t = M*sin(theta1); */
/* wn[k] = (short) x_t; */
/* if (x_t >= M) wn[k ] = 0x7fff; */
/* wn[k+1] = (short) y_t; */
/* if (y_t >= M) wn[k+1] = 0x7fff; */
/* k+=2; */
/* } */
/* */
/* The C code that implements the traditional mixed radix FFT is */
/* shown below. It has three nested loops, one for the stages, */
/* one for the groups of butterflies, one for the passes. */
/* */
/* void DSP_fft16x16r(int n, short x[], short wn[], */
/* unsigned char brev[], short y[], int radix, int offset, int nmax) */
/* { */
/* int n1, n2, ie, ia1, ia2, ia3, i0, i1, i2, i3, i, l0; */
/* short co1, co2, co3, si1, si2, si3; */
/* short xt0, yt0, xt1, yt1, xt2, yt2; */
/* short xh0,xh1,xh20,xh21,xl0,xl1,xl20,xl21; */
/* short * ptr_x0, * y0; */
/* unsigned int j0, j1, k0, k1, k, j; */
/* short x0, x1, x2, x3, x4, x5, x6, x7; */
/* short xh0_0, xh1_0, xh0_1, xh1_1; */
/* short xl0_0, xl1_0, xl0_1, xl1_1; */
/* short yt3, yt4, yt5, yt6, yt7; */
/* */
/* n2 = n; */
/* ie = 1; */
/* for (k = n; k > radix; k >>= 2) */
/* { */
/* n1 = n2; */
/* n2 >>= 2; */
/* ia1 = 0; */
/* for (j = 0; j < n2; j++) */
/* { */
/* ia2 = ia1 + ia1; */
/* ia3 = ia2 + ia1; */
/* co1 = w[2 * ia1 ]; */
/* si1 = w[2 * ia1 + 1]; */
/* co2 = w[2 * ia2 ]; */
/* si2 = w[2 * ia2 + 1]; */
/* co3 = w[2 * ia3 ]; */
/* si3 = w[2 * ia3 + 1]; */
/* ia1 = ia1 + ie; */
/* for (i0 = j; i0 < n; i0 += n1) */
/* { */
/* i1 = i0 + n2; */
/* i2 = i1 + n2; */
/* i3 = i2 + n2; */
/* */
/* xh0 = x[2 * i0 ] + x[2 * i2 ]; */
/* xh1 = x[2 * i0 + 1] + x[2 * i2 + 1]; */
/* xl0 = x[2 * i0 ] - x[2 * i2 ]; */
/* xl1 = x[2 * i0 + 1] - x[2 * i2 + 1]; */
/* */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?