📄 dsp_fft32x32.c
字号:
xh1_1_xl1_1 = _addsub(_hi(x_32), _hi(x_l1_32)); /*------------------------------------------------------------*/ /* xh20_0 = x[h2 ] + x[l2 ]; xh21_0 = x[h2+1] + x[l2+1] */ /* xh20_1 = x[h2+2] + x[l2+2]; xh21_1 = x[h2+3] + x[l2+3] */ /* xl20_0 = x[h2 ] - x[l2 ]; xl21_0 = x[h2+1] - x[l2+1] */ /* xl20_1 = x[h2+2] - x[l2+2]; xl21_1 = x[h2+3] - x[l2+3] */ /*------------------------------------------------------------*/ xh20_0_xl20_0 = _addsub(_lo(x_h2_10), _lo(x_l2_10)); xh21_0_xl21_0 = _addsub(_hi(x_h2_10), _hi(x_l2_10)); xh20_1_xl20_1 = _addsub(_lo(x_h2_32), _lo(x_l2_32)); xh21_1_xl21_1 = _addsub(_hi(x_h2_32), _hi(x_l2_32)); /*-------------------------------------------------------------*/ /* x_0o = xh0_0 + xh20_0; x_1o = xh1_0 + xh21_0; */ /* x_2o = xh0_1 + xh20_1; x_3o = xh1_1 + xh21_1; */ /* xt0_0 = xh0_0 - xh20_0; yt0_0 = xh1_0 - xh21_0; */ /* xt0_1 = xh0_1 - xh20_1; yt0_1 = xh1_1 - xh21_1; */ /*-------------------------------------------------------------*/ x_0o_xt0_0 = _addsub(_hill(xh0_0_xl0_0), _hill(xh20_0_xl20_0)); x_1o_yt0_0 = _addsub(_hill(xh1_0_xl1_0), _hill(xh21_0_xl21_0)); x_2o_xt0_1 = _addsub(_hill(xh0_1_xl0_1), _hill(xh20_1_xl20_1)); x_3o_yt0_1 = _addsub(_hill(xh1_1_xl1_1), _hill(xh21_1_xl21_1)); /*-------------------------------------------------------------*/ /* xt1_0 = xl0_0 + xl21_0; yt2_0 = xl1_0 + xl20_0; */ /* xt1_1 = xl0_1 + xl21_1; yt2_1 = xl1_1 + xl20_1; */ /* xt2_0 = xl0_0 - xl21_0; yt1_0 = xl1_0 - xl20_0; */ /* xt2_1 = xl0_1 - xl21_1; yt1_1 = xl1_1 - xl20_1; */ /*-------------------------------------------------------------*/ xt1_0_xt2_0 = _addsub(_loll(xh0_0_xl0_0), _loll(xh21_0_xl21_0)); yt2_0_yt1_0 = _addsub(_loll(xh1_0_xl1_0), _loll(xh20_0_xl20_0)); xt1_1_xt2_1 = _addsub(_loll(xh0_1_xl0_1), _loll(xh21_1_xl21_1)); yt2_1_yt1_1 = _addsub(_loll(xh1_1_xl1_1), _loll(xh20_1_xl20_1)); xt0_0 = _loll(x_0o_xt0_0); xt0_1 = _loll(x_2o_xt0_1); yt0_0 = _loll(x_1o_yt0_0); yt0_1 = _loll(x_3o_yt0_1); xt1_0 = _hill(xt1_0_xt2_0); xt1_1 = _hill(xt1_1_xt2_1); yt1_0 = _loll(yt2_0_yt1_0); yt1_1 = _loll(yt2_1_yt1_1); xt2_0 = _loll(xt1_0_xt2_0); xt2_1 = _loll(xt1_1_xt2_1); yt2_0 = _hill(yt2_0_yt1_0); yt2_1 = _hill(yt2_1_yt1_1); /*-------------------------------------------------------------*/ /* x2[h2 ] = (si10 * yt1_0 + co10 * xt1_0) >> 15 */ /* x2[h2+1] = (co10 * yt1_0 - si10 * xt1_0) >> 15 */ /* x2[h2+2] = (si11 * yt1_1 + co11 * xt1_1) >> 15 */ /* x2[h2+3] = (co11 * yt1_1 - si11 * xt1_1) >> 15 */ /*-------------------------------------------------------------*/
p0 = _smpy32(si10, yt1_0); p1 = _smpy32(co10, xt1_0); xh2_0o = p0 + p1; p2 = _smpy32(co10, yt1_0); p3 = _smpy32(si10, xt1_0); xh2_1o = p2 - p3; p4 = _smpy32(si11, yt1_1); p5 = _smpy32(co11, xt1_1); xh2_2o = p4 + p5; p6 = _smpy32(co11, yt1_1); p7 = _smpy32(si11, xt1_1); xh2_3o = p6 - p7; /*-------------------------------------------------------------*/ /* x2[l1 ] = (si20 * yt0_0 + co20 * xt0_0) >> 15 */ /* x2[l1+1] = (co20 * yt0_0 - si20 * xt0_0) >> 15 */ /* x2[l1+2] = (si21 * yt0_1 + co21 * xt0_1) >> 15 */ /* x2[l1+3] = (co21 * yt0_1 - si21 * xt0_1) >> 15 */ /*-------------------------------------------------------------*/ p8 = _smpy32(si20, yt0_0); p9 = _smpy32(co20, xt0_0); xl1_0o = p8 + p9; pA = _smpy32(co20, yt0_0); pB = _smpy32(si20, xt0_0); xl1_1o = pA - pB; pC = _smpy32(si21, yt0_1); pD = _smpy32(co21, xt0_1); xl1_2o = pC + pD; pE = _smpy32(co21, yt0_1); pF = _smpy32(si21, xt0_1); xl1_3o = pE - pF; /*-------------------------------------------------------------*/ /* x2[l2 ] = (si30 * yt2_0 + co30 * xt2_0) >> 15 */ /* x2[l2+1] = (co30 * yt2_0 - si30 * xt2_0) >> 15 */ /* x2[l2+2] = (si31 * yt2_1 + co31 * xt2_1) >> 15 */ /* x2[l2+3] = (co31 * yt2_1 - si31 * xt2_1) >> 15 */ /*-------------------------------------------------------------*/ p10 = _smpy32(si30, yt2_0); p11 = _smpy32(co30, xt2_0); xl2_0o = p10 + p11; p12 = _smpy32(co30, yt2_0); p13 = _smpy32(si30, xt2_0); xl2_1o = p12 - p13; p14 = _smpy32(si31, yt2_1); p15 = _smpy32(co31, xt2_1); xl2_2o = p14 + p15; p16 = _smpy32(co31, yt2_1); p17 = _smpy32(si31, xt2_1); xl2_3o = p16 - p17; /*-------------------------------------------------------------*/ /* Combine outputs in double words */ /*-------------------------------------------------------------*/ ydword0 = _itod(_hill(x_1o_yt0_0), _hill(x_0o_xt0_0)); ydword1 = _itod(_hill(x_3o_yt0_1), _hill(x_2o_xt0_1)); yh2dword0 = _itod(xh2_1o, xh2_0o); yh2dword1 = _itod(xh2_3o, xh2_2o); yl1dword0 = _itod(xl1_1o, xl1_0o); yl1dword1 = _itod(xl1_3o, xl1_2o); /*-----------------------------------------------------------*/ /* Derive output pointers using the input pointer "x" */ /*-----------------------------------------------------------*/ x0 = x; x2 = x0; /*-------------------------------------------------------------*/ /* Store eight outputs - four legs of each butterfly */ /*-------------------------------------------------------------*/ _amemd8(&x2[0]) = ydword0; _amemd8(&x2[2]) = ydword1; _amemd8(&x2[h2]) = yh2dword0; _amemd8(&x2[h2+2]) = yh2dword1; _amemd8(&x2[l1]) = yl1dword0; _amemd8(&x2[l1+2]) = yl1dword1; _amemd8(&x2[l2]) = _itod(xl2_1o, xl2_0o); _amemd8(&x2[l2+2]) = _itod(xl2_3o, xl2_2o); /*-----------------------------------------------------------*/ /* When the twiddle factors are not to be re-used, j is */ /* incremented by 12, to reflect the fact that 6 words */ /* are consumed in every iteration. The input data pointer */ /* increments by 4. Note that within a stage, the stride */ /* does not change and hence the offsets for the other three */ /* legs, 0, h2, l1, l2. */ /*-----------------------------------------------------------*/
j += 12; x += 4; predj = (j - fft_jmp); if (!predj) x += fft_jmp; if (!predj) j = 0;
} } if (radix == 2) radix_2(ptr_x, ptr_y, npoints); else if (radix == 4) radix_4(ptr_x, ptr_y, npoints); return;}void radix_2 ( int * restrict ptr_x, int * restrict ptr_y, int npoints){ int n0, j0; int * restrict x2, * restrict x0; int * restrict y0, * restrict y1, * restrict y2, * restrict y3; int i, j, l1, h2; double n0_dword0, n0_dword1, n0_dword2, n0_dword3; double n1_dword0, n1_dword1, n1_dword2, n1_dword3; double x_10, x_32, x_54, x_76, x_98, x_BA, x_DC, x_FE; long long n00_n20, n01_n21, n10_n30, n11_n31; long long n02_n22, n03_n23, n12_n32, n13_n33; /*-----------------------------------------------------------------*/ /* The following code performs a standard radix2 pass. */ /* Two pointers are used to access the input data. */ /* The input data is read "N/4" complex samples apart or "N/2" */ /* words apart using pointers "x0" and "x2". This produces out- */ /* puts that are 0, N/8, N/2, 3N/8 for radix 2. */ /*-----------------------------------------------------------------*/ y0 = ptr_y; y2 = ptr_y + (int)npoints; x0 = ptr_x; x2 = ptr_x + (int)(npoints >> 1); /*------------------------------------------------------------*/ /* The pointers are set at the following locations which are */ /* half the offsets of a radix4 FFT. */ /*------------------------------------------------------------*/ y1 = y0 + (int)(npoints >> 2); y3 = y2 + (int)(npoints >> 2); l1 = _norm(npoints) + 1; j0 = 8; n0 = npoints >> 1; /*-----------------------------------------------------------------*/ /* The following code reads data indentically for either a radix 4 */ /* or a radix 2 style decomposition. It writes out at different */ /* locations though. It checks if either half the points, or a */ /* quarter of the complex points have been exhausted to jump to */ /* pervent double reversal. */ /*-----------------------------------------------------------------*/ j = 0; _nassert((int)(n0) % 4 == 0); _nassert((int)(ptr_x) % 8 == 0); _nassert((int)(ptr_y) % 8 == 0); _nassert((int)(x0) % 8 == 0); _nassert((int)(x2) % 8 == 0); _nassert((int)(y0) % 8 == 0); #pragma MUST_ITERATE(2,,2); for (i = 0; i < npoints; i += 8) { /*-------------------------------------------------------------*/ /* Digit reverse the index starting from 0. The increment to */ /* "j" is either by 4, or 8. */ /*-------------------------------------------------------------*/ DIG_REV(j, l1, h2); /*-------------------------------------------------------------*/ /* Read in the input data, from the first eight locations. */ /* These are transformed as a radix2. */ /*-------------------------------------------------------------*/ x_10 = _amemd8(&x0[0]); x_32 = _amemd8(&x0[2]); x_54 = _amemd8(&x0[4]); x_76 = _amemd8(&x0[6]); x0 += 8; /*--------------------------------------------------------*/ /* Perform radix2 style decomposition. */ /* n00 = x_0 + x_2; n01 = x_1 + x_3; */ /* n20 = x_0 - x_2; n21 = x_1 - x_3; */ /* n10 = x_4 + x_6; n11 = x_5 + x_7; */ /* n30 = x_4 - x_6; n31 = x_5 - x_7; */ /*--------------------------------------------------------*/ n00_n20 = _addsub(_lo(x_10), _lo(x_32)); n01_n21 = _addsub(_hi(x_10), _hi(x_32));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -