📄 dsp_fft32x32_cn.c
字号:
/* needs to be comprehended. The real part of the complex number is *//* in the lower half, and the imaginary part is in the upper half. *//* The flow breaks in case of "xl0" and "xl1" because in this case *//* the real part needs to be combined with the imaginary part because *//* of the multiplication by "j". This requires a packed quantity like *//* "xl21xl20" to be rotated as "xl20xl21" so that it can be combined *//* using add2's and sub2's. Hence the natural version of C code *//* shown below is transformed using packed data processing as shown: *//* *//* xl0 = x[2 * i0 ] - x[2 * i2 ]; *//* xl1 = x[2 * i0 + 1] - x[2 * i2 + 1]; *//* xl20 = x[2 * i1 ] - x[2 * i3 ]; *//* xl21 = x[2 * i1 + 1] - x[2 * i3 + 1]; *//* *//* xt1 = xl0 + xl21; *//* yt2 = xl1 + xl20; *//* xt2 = xl0 - xl21; *//* yt1 = xl1 - xl20; *//* *//* xl1_xl0 = _sub2(x21_x20, x21_x20) *//* xl21_xl20 = _sub2(x32_x22, x23_x22) *//* xl20_xl21 = _rotl(xl21_xl20, 16) *//* *//* yt2_xt1 = _add2(xl1_xl0, xl20_xl21) *//* yt1_xt2 = _sub2(xl1_xl0, xl20_xl21) *//* *//* Also notice that xt1, yt1 endup on seperate words, these need to *//* be packed together to take advantage of the packed twiddle fact *//* ors that have been loaded. In order for this to be achieved they *//* are re-aligned as follows: *//* *//* yt1_xt1 = _packhl2(yt1_xt2, yt2_xt1) *//* yt2_xt2 = _packhl2(yt2_xt1, yt1_xt2) *//* *//* The packed words "yt1_xt1" allows the loaded"sc" twiddle factor *//* to be used for the complex multiplies. The real part os the *//* complex multiply is implemented using _dotp2. The imaginary *//* part of the complex multiply is implemented using _dotpn2 *//* after the twiddle factors are swizzled within the half word. *//* *//* (X + jY) ( C + j S) = (XC + YS) + j (YC - XS). *//* *//* The actual twiddle factors for the FFT are cosine, - sine. The *//* twiddle factors stored in the table are csine and sine, hence *//* the sign of the "sine" term is comprehended during multipli- *//* cation as shown above. *//* *//* *//* ASSUMPTIONS *//* *//* The size of the FFT, n, must be a power of 4 and greater than *//* or equal to 16 and less than 32768. *//* *//* The arrays 'x[]', 'y[]', and 'w[]' all must be aligned on a *//* double-word boundary for the "optimized" implementations. *//* *//* The input and output data are complex, with the real/imaginary *//* components stored in adjacent locations in the array. The real *//* components are stored at even array indices, and the imaginary *//* components are stored at odd array indices. *//* *//* ------------------------------------------------------------------------ *//* Copyright (c) 2007 Texas Instruments, Incorporated. *//* All Rights Reserved. *//* ======================================================================== */#pragma CODE_SECTION(DSP_fft32x32_cn, ".text:ansi");#include "DSP_fft32x32_cn.h"# define DIG_REV(i, m, j) \ do { \ unsigned _ = (i); \ _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \ _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \ _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \ _ = ((_ & 0x0000FFFF) << 16) | ((_ & ~0x0000FFFF) >> 16); \ (j) = _ >> (m); \ } while (0)#define SMPY_32(x,y) \ (int)(((long long)(x)*(long long)(y))>>31)void DSP_fft32x32_cn ( int * ptr_w, int npoints, int * ptr_x, int * ptr_y){ const int *w; int *x, *x2, *x0; int *y0, *y1, *y2, *y3; int i, j, l1, l2, h2, predj, tw_offset, stride, fft_jmp; int xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0; int xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0; int xh0_1, xh1_1, xl0_1, xl1_1; int x_0, x_1, x_2, x_3, x_l1_0, x_l1_1, x_l2_0, x_l2_1; int xh0_2, xh1_2, xl0_2, xl1_2, xh0_3, xh1_3, xl0_3, xl1_3; int x_4, x_5, x_6, x_7, x_h2_0, x_h2_1; int x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f; int si10, si20, si30, co10, co20, co30; int n00, n10, n20, n30, n01, n11, n21, n31; int n02, n12, n22, n32, n03, n13, n23, n33; int n0, j0, radix, norm, m; /*---------------------------------------------------------------------*/ /* Determine the magnitude od the number of points to be transformed. */ /* Check whether we can use a radix4 decomposition or a mixed radix */ /* transformation, by determining modulo 2. */ /*---------------------------------------------------------------------*/ for (i = 31, m = 1; (npoints & (1 << i)) == 0; i--, m++) ; radix = m & 1 ? 2 : 4; norm = m - 2; /*----------------------------------------------------------------------*/ /* The stride is quartered with every iteration of the outer loop. It */ /* denotes the seperation between any two adjacent inputs to the butter */ /* -fly. This should start out at N/4, hence stride is initially set to */ /* N. For every stride, 6*stride twiddle factors are accessed. The */ /* "tw_offset" is the offset within the current twiddle factor sub- */ /* table. This is set to zero, at the start of the code and is used to */ /* obtain the appropriate sub-table twiddle pointer by offseting it */ /* with the base pointer "ptr_w". */ /*----------------------------------------------------------------------*/ stride = npoints; tw_offset = 0; fft_jmp = 6 * stride; #ifndef NOASSUME _nassert(stride > 4); #pragma MUST_ITERATE(1,,1); #endif while (stride > radix) { /*-----------------------------------------------------------------*/ /* At the start of every iteration of the outer loop, "j" is set */ /* to zero, as "w" is pointing to the correct location within the */ /* twiddle factor array. For every iteration of the inner loop */ /* 6 * stride twiddle factors are accessed. For eg, */ /* */ /* #Iteration of outer loop # twiddle factors #times cycled */ /* 1 6 N/4 1 */ /* 2 6 N/16 4 */ /* ... */ /*-----------------------------------------------------------------*/ j = 0; fft_jmp >>= 2; /*-----------------------------------------------------------------*/ /* Set up offsets to access "N/4", "N/2", "3N/4" complex point or */ /* "N/2", "N", "3N/2" half word */ /*-----------------------------------------------------------------*/ h2 = stride >> 1; l1 = stride; l2 = stride + (stride >> 1); /*-----------------------------------------------------------------*/ /* Reset "x" to point to the start of the input data array. */ /* "tw_offset" starts off at 0, and increments by "6 * stride" */ /* The stride quarters with every iteration of the outer loop */ /*-----------------------------------------------------------------*/ x = ptr_x; w = ptr_w + tw_offset; tw_offset += fft_jmp; stride >>= 2; /*----------------------------------------------------------------*/ /* The following loop iterates through the different butterflies, */ /* within a given stage. Recall that there are logN to base 4 */ /* stages. Certain butterflies share the twiddle factors. These */ /* are grouped together. On the very first stage there are no */ /* butterflies that share the twiddle factor, all N/4 butter- */ /* flies have different factors. On the next stage two sets of */ /* N/8 butterflies share the same twiddle factor. Hence after */ /* half the butterflies are performed, j the index into the */ /* factor array resets to 0, and the twiddle factors are reused. */ /* When this happens, the data pointer 'x' is incremented by the */ /* fft_jmp amount. */ /*----------------------------------------------------------------*/ #ifndef NOASSUME _nassert((int)(w) % 8 == 0); _nassert((int)(x) % 8 == 0); _nassert(h2 % 8 == 0); _nassert(l1 % 8 == 0); _nassert(l2 % 8 == 0); _nassert(npoints >= 16); #pragma MUST_ITERATE(4,,1); #endif for (i = 0; i < npoints; i += 4) { /*------------------------------------------------------------*/ /* Read the first three twiddle factor values. This loop co- */ /* mputes one radix 4 butterfly at a time. */ /*------------------------------------------------------------*/ co10 = w[j+1]; si10 = w[j+0]; co20 = w[j+3]; si20 = w[j+2]; co30 = w[j+5]; si30 = w[j+4]; /*-----------------------------------------------------------*/ /* Read in the data elements for the four inputs of radix4 */ /* butterfly. */ /*-----------------------------------------------------------*/ x_0 = x[0]; x_1 = x[1]; x_l1_0 = x[l1]; x_l1_1 = x[l1+1]; x_l2_0 = x[l2]; x_l2_1 = x[l2+1]; x_h2_0 = x[h2]; x_h2_1 = x[h2+1]; /*-----------------------------------------------------------*/ /* Perform radix2 style DIF butterflies, for initial radix4 */ /*-----------------------------------------------------------*/ xh0_0 = x_0 + x_l1_0; xh1_0 = x_1 + x_l1_1; xl0_0 = x_0 - x_l1_0; xl1_0 = x_1 - x_l1_1; xh20_0 = x_h2_0 + x_l2_0; xh21_0 = x_h2_1 + x_l2_1; xl20_0 = x_h2_0 - x_l2_0; xl21_0 = x_h2_1 - x_l2_1; /*-----------------------------------------------------------*/ /* Derive output pointers using the input pointer "x" */ /*-----------------------------------------------------------*/ x0 = x; x2 = x0; /*-----------------------------------------------------------*/ /* When the twiddle factors are not to be re-used, j is */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -