📄 dsp_fft32x32_cn.c
字号:
/* incremented by 6, to reflect the fact that 6 half words */ /* are consumed in every iteration. The input data pointer */ /* increments by 2. Note that within a stage, the stride */ /* does not change and hence the offsets for the other three */ /* legs, 0, h2, l1, l2. */ /*-----------------------------------------------------------*/ j += 6; x += 2; predj = (j - fft_jmp); if (!predj) x += fft_jmp; if (!predj) j = 0; /*----------------------------------------------------------*/ /* These four partial results can be re-written to show */ /* the underlying DIF structure similar to radix2 as */ /* follows: */ /* */ /* X(4k) = (x(n)+x(n + N/2)) + (x(n+N/4)+ x(n + 3N/4)) */ /* X(4k+1)= (x(n)-x(n + N/2)) -j(x(n+N/4) - x(n + 3N/4)) */ /* x(4k+2)= (x(n)+x(n + N/2)) - (x(n+N/4)+ x(n + 3N/4)) */ /* X(4k+3)= (x(n)-x(n + N/2)) +j(x(n+N/4) - x(n + 3N/4)) */ /* */ /* which leads to the real and imaginary values as foll: */ /* */ /* y0r = x0r + x2r + x1r + x3r = xh0 + xh20 */ /* y0i = x0i + x2i + x1i + x3i = xh1 + xh21 */ /* y1r = x0r - x2r + (x1i - x3i) = xl0 + xl21 */ /* y1i = x0i - x2i - (x1r - x3r) = xl1 - xl20 */ /* y2r = x0r + x2r - (x1r + x3r) = xh0 - xh20 */ /* y2i = x0i + x2i - (x1i + x3i = xh1 - xh21 */ /* y3r = x0r - x2r - (x1i - x3i) = xl0 - xl21 */ /* y3i = x0i - x2i + (x1r - x3r) = xl1 + xl20 */ /* ---------------------------------------------------------*/ x0[0] = xh0_0 + xh20_0; x0[1] = xh1_0 + xh21_0; xt0_0 = xh0_0 - xh20_0; yt0_0 = xh1_0 - xh21_0; xt1_0 = xl0_0 + xl21_0; yt2_0 = xl1_0 + xl20_0; xt2_0 = xl0_0 - xl21_0; yt1_0 = xl1_0 - xl20_0; /*-----------------------------------------------------------*/ /* The following multiplies are close to a true 32 by 32 */ /* multiply instruction. */ /* Perform twiddle factor multiplies of three terms,top */ /* term does not have any multiplies. Note the twiddle */ /* factors for a normal FFT are C + j (-S). Since the */ /* factors that are stored are C + j S, this is */ /* corrected for in the multiplies. */ /* */ /* Y1 = (xt1 + jyt1) (c + js) = (xc + ys) + (yc -xs) */ /* There is a loss of 1.5 bits of accuracy. This arises be- */ /* cause the result of the low sixteen by sixteen bits is */ /* not computed and the multiply of the low by high is */ /* shifted by 16, and results in a loss of 1 bit. */ /* Multiplies are performed for all the three legs of the */ /* radix4 butterfly are performed. In addition note that */ /* themiddle two legs are swapped. This is based on work */ /* done by Panos Papamichalis, on obtaining bit reversed */ /* order from a radix 4 butterfly. This allows the last */ /* stage to be performed either as a radix4 or a radix2 */ /* pass for this stage. */ /*-----------------------------------------------------------*/ x2[h2 ] = SMPY_32(si10,yt1_0) + SMPY_32(co10, xt1_0); x2[h2+1] = SMPY_32(co10,yt1_0) - SMPY_32(si10, xt1_0); x2[l1] = SMPY_32(si20,yt0_0) + SMPY_32(co20, xt0_0); x2[l1+1] = SMPY_32(co20,yt0_0) - SMPY_32(si20, xt0_0); x2[l2] = SMPY_32(si30,yt2_0) + SMPY_32(co30, xt2_0); x2[l2+1] = SMPY_32(co30,yt2_0) - SMPY_32(si30, xt2_0); } } /*-----------------------------------------------------------------*/ /* The following code performs either a standard radix4 pass or a */ /* radix2 pass. Two pointers are used to access the input data. */ /* The input data is read "N/4" complex samples apart or "N/2" */ /* words apart using pointers "x0" and "x2". This produces out- */ /* puts that are 0, N/4, N/2, 3N/4 for a radix4 FFT, and 0, N/8 */ /* N/2, 3N/8 for radix 2. */ /*-----------------------------------------------------------------*/ y0 = ptr_y; y2 = ptr_y + (int) npoints; x0 = ptr_x; x2 = ptr_x + (int) (npoints>>1); if (radix == 2) { /*------------------------------------------------------------*/ /* The pointers are set at the following locations which are */ /* half the offsets of a radix4 FFT. */ /*------------------------------------------------------------*/ y1 = y0 + (int) (npoints >> 2); y3 = y2 + (int) (npoints >> 2); l1 = norm + 1; j0 = 8; n0 = npoints >> 1; } else { y1 = y0 + (int) (npoints >> 1); y3 = y2 + (int) (npoints >> 1); l1 = norm + 2; j0 = 4; n0 = npoints >> 2; } /*-----------------------------------------------------------------*/ /* The following code reads data indentically for either a radix 4 */ /* or a radix 2 style decomposition. It writes out at different */ /* locations though. It checks if either half the points, or a */ /* quarter of the complex points have been exhausted to jump to */ /* pervent double reversal. */ /*-----------------------------------------------------------------*/ j = 0; #ifndef NOASSUME _nassert((int)(n0)%4 == 0); _nassert((int)(x0)%8 == 0); _nassert((int)(x2)%8 == 0); _nassert((int)(y0)%8 == 0); #pragma MUST_ITERATE(2,,2); #endif for (i = 0; i < npoints; i += 8) { /*-------------------------------------------------------------*/ /* Digit reverse the index starting from 0. The increment to */ /* "j" is either by 4, or 8. */ /*-------------------------------------------------------------*/ DIG_REV(j, l1, h2); /*-------------------------------------------------------------*/ /* Read in the input data, from the first eight locations. */ /* These are transformed either as a radix4 or as a radix 2. */ /*-------------------------------------------------------------*/ x_0 = x0[0]; x_1 = x0[1]; x_2 = x0[2]; x_3 = x0[3]; x_4 = x0[4]; x_5 = x0[5]; x_6 = x0[6]; x_7 = x0[7]; x0 += 8; xh0_0 = x_0 + x_4; xh1_0 = x_1 + x_5; xl0_0 = x_0 - x_4; xl1_0 = x_1 - x_5; xh0_1 = x_2 + x_6; xh1_1 = x_3 + x_7; xl0_1 = x_2 - x_6; xl1_1 = x_3 - x_7; n00 = xh0_0 + xh0_1; n01 = xh1_0 + xh1_1; n10 = xl0_0 + xl1_1; n11 = xl1_0 - xl0_1; n20 = xh0_0 - xh0_1; n21 = xh1_0 - xh1_1; n30 = xl0_0 - xl1_1; n31 = xl1_0 + xl0_1; if (radix == 2) { /*--------------------------------------------------------*/ /* Perform radix2 style decomposition. */ /*--------------------------------------------------------*/ n00 = x_0 + x_2; n01 = x_1 + x_3; n20 = x_0 - x_2; n21 = x_1 - x_3; n10 = x_4 + x_6; n11 = x_5 + x_7; n30 = x_4 - x_6; n31 = x_5 - x_7; } /*-------------------------------------------------------------*/ /* Store out the four outputs of 1 radix4 butterfly or 2 */ /* radix2 butterflies. */ /*-------------------------------------------------------------*/ y0[2*h2] = n00; y0[2*h2 + 1] = n01; y1[2*h2] = n10; y1[2*h2 + 1] = n11; y2[2*h2] = n20; y2[2*h2 + 1] = n21; y3[2*h2] = n30; y3[2*h2 + 1] = n31; /*-------------------------------------------------------------*/ /* Read in the next set of inputs from pointer "x2". These will*/ /* produce outputs that are contiguous to the previous outputs.*/ /*-------------------------------------------------------------*/ x_8 = x2[0]; x_9 = x2[1]; x_a = x2[2]; x_b = x2[3]; x_c = x2[4]; x_d = x2[5]; x_e = x2[6]; x_f = x2[7]; x2 += 8; /*-------------------------------------------------------------*/ /* Perform radix4 style decompositions and overwrite results */ /* if it is dtermined that the radix to be used is radix 2. */ /*-------------------------------------------------------------*/ xh0_2 = x_8 + x_c; xh1_2 = x_9 + x_d; xl0_2 = x_8 - x_c; xl1_2 = x_9 - x_d; xh0_3 = x_a + x_e; xh1_3 = x_b + x_f; xl0_3 = x_a - x_e; xl1_3 = x_b - x_f; n02 = xh0_2 + xh0_3; n03 = xh1_2 + xh1_3; n12 = xl0_2 + xl1_3; n13 = xl1_2 - xl0_3; n22 = xh0_2 - xh0_3; n23 = xh1_2 - xh1_3; n32 = xl0_2 - xl1_3; n33 = xl1_2 + xl0_3; if (radix == 2) { n02 = x_8 + x_a; n03 = x_9 + x_b; n22 = x_8 - x_a; n23 = x_9 - x_b; n12 = x_c + x_e; n13 = x_d + x_f; n32 = x_c - x_e; n33 = x_d - x_f; } /*----------------------------------------------------------------*/ /* Points that are read from succesive locations map to y, y[N/4] */ /* y[N/2], y[3N/4] in a radix4 scheme, y, y[N/8], y[N/2],y[5N/8] */ /*----------------------------------------------------------------*/ y0[2*h2+2] = n02; y0[2*h2+3] = n03; y1[2*h2+2] = n12; y1[2*h2+3] = n13; y2[2*h2+2] = n22; y2[2*h2+3] = n23; y3[2*h2+2] = n32; y3[2*h2+3] = n33; /*---------------------------------------------------------------*/ /* Increment j by "j0", if j is equal to n0, increment j by n0, */ /* that double reversal is avoided. */ /*---------------------------------------------------------------*/ j += j0; if (j == n0) { j += n0; x0 += (int)npoints >> 1; x2 += (int)npoints >> 1; } }}/* ======================================================================== *//* End of file: DSP_fft32x32_cn.c *//* ------------------------------------------------------------------------ *//* Copyright (C) 2007 Texas Instruments, Incorporated. *//* All Rights Reserved. *//* ======================================================================== */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -