📄 c_fft6.c
字号:
/* *********************************************************************** This program is part of the OpenMP Source Code Repository http://www.pcg.ull.es/ompscr/ e-mail: ompscr@etsii.ull.es This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License (LICENSE file) along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA FILE: c_fft6.c VERSION: 1.0 DATE: May 2004 AUTHOR: F. de Sande COMMENTS TO: sande@csi.ull.es DESCRIPTION: Bailey's 6-step 1D Fast Fourier Transform Computes the 1D FFT of an input signal (complex array) COMMENTS: The code performs a number (ITERS) of iterations of the Bailey's 6-step FFT algorithm (following the ideas in the CMU Task parallel suite). 1.- Generates an input signal vector (dgen) with size n=n1xn2 stored in row major order In this code the size of the input signal is NN=NxN (n=NN, n1=n2=N) 2.- Transpose (tpose) A to have it stored in column major order 3.- Perform independent FFTs on the rows (cffts) 4.- Scale each element of the resulting array by a factor of w[n]**(p*q) 5.- Transpose (tpose) to prepair it for the next step 6.- Perform independent FFTs on the rows (cffts) 7.- Transpose the resulting matrix The code requires nested Parallelism. REFERENCES: David H. Bailey FFTs in External or Hierarchical Memory, The Journal of Supercomputing, vol. 4, no. 1, pg. 23-35. Mar 1990, vol. 4, no. 7, pg. 321. Jul 1961 http://www-2.cs.cmu.edu/~fx/tpsuite.html http://en.wikipedia.org/wiki/Cooley-Tukey_FFT_algorithm BASIC PRAGMAS: parallel for USAGE: ./c_fft6.par 1024 10 INPUT: The size of the input signal. The number of iterations to perform OUTPUT: The code tests the correctness of the output signal FILE FORMATS: - RESTRICTIONS: The size of the input signal MUST BE a power of 2 REVISION HISTORY: **************************************************************************/#include "OmpSCR.h"#include <math.h>#define PI (3.141592653589793f)#define NUM_ARGS 2#define NUM_TIMERS 4typedef double real_type;/* Input values (CONSTANTS) */unsigned NN; /* input vector is NN complex values */ unsigned LOGNN; /* log base 2 of NN */unsigned N; /* square root of NN */unsigned LOGN; /* log base 2 of N */int ITERS; /* number of input vectors */typedef struct { real_type re, im; } complex;/* ----------------------------------------------------------------------- PROTOTYPES * ----------------------------------------------------------------------- */int prperf(double tpose_time, double scale_time, double ffts_time, int nn, int lognn, int iters);int gen_bit_reverse_table(int *brt, int n);int gen_w_table(complex *w, int n, int logn, int ndv2);int gen_v_table(complex *v, int n);int dgen(complex *xin, int n);int tpose(complex *a, complex *b, int n);int tpose_seq(complex *a, complex *b, const int n);int cffts(complex *a, int *brt, complex *w, int n, int logn, int ndv2);int cffts_seq(complex *a, int *brt, complex *w, const int n, int logn, int ndv2);int fft(complex *a, int *brt, complex *w, int n, int logn, int ndv2);int scale(complex *a, complex *v, int n);int scale_seq(complex *a, complex *v, const int n);int chkmat(complex *a, int n);complex complex_pow(complex z, int n);int log2_int(int n);int print_mat(complex *a, int n);int print_vec(complex *a, int n);/* ----------------------------------------------------------------- IMPLEMENTATION * ----------------------------------------------------------------- *//* ----------------------------------------------------------------- gen_bit_reverse_table - initialize bit reverse table Postcondition: br(i) = bit-reverse(i-1) + 1 ----------------------------------------------------------------- */int gen_bit_reverse_table(int *brt, int n) { register int i, j, k, nv2; nv2 = n / 2; j = 1; brt[0] = j; for (i = 1; i < n; ++i) { k = nv2; while (k < j) { j -= k; k /= 2; } j += k; brt[i] = j; } return 0;} /* ----------------------------------------------------------------------- gen_w_table: generate powers of w. postcondition: w(i) = w**(i-1) * ----------------------------------------------------------------------- */int gen_w_table(complex *w, int n, int logn, int ndv2) { register int i; complex ww, pt; ww.re = cos(PI / ndv2); ww.im = -sin(PI / ndv2); w[0].re = 1.f; w[0].im = 0.f; pt.re = 1.f; pt.im = 0.f; for (i = 1; i < ndv2; ++i) { w[i].re = pt.re * ww.re - pt.im * ww.im; w[i].im = pt.re * ww.im + pt.im * ww.re; pt = w[i]; } return 0;} /* ----------------------------------------------------------------------- gen_v_table - gen 2d twiddle factors * ----------------------------------------------------------------------- */int gen_v_table(complex *v, int n) { register int j, k; complex wn; wn.re = cos(PI * 2.f / (n * n)); wn.im = -sin(PI * 2.f / (n * n)); for (j = 0; j < n; ++j) { for (k = 0; k < n; ++k) { v[j * n + k] = complex_pow(wn, j * k); } } return 0;} /* ----------------------------------------------------------------------- z^n = modulo^n (cos(n * alfa) + i sin(n * alfa) * ----------------------------------------------------------------------- */complex complex_pow(complex z, int n) { complex temp; real_type pot, nangulo, modulo, angulo; modulo = sqrt(z.re * z.re + z.im * z.im); angulo = atan(z.im / z.re); pot = pow(modulo, n); nangulo = n * angulo; temp.re = pot * cos(nangulo); temp.im = pot * sin(nangulo); return(temp);}/* ----------------------------------------------------------------------- A: Para una se馻l de entrada de tama駉 N y de forma (1,0), (1,0), ..., (1,0) la salida debe ser de la forma (N,0), (0,0), ..., (0,0) B: Para una se馻l de entrada de tama駉 N y de forma (0,0), (0,0), ..., (N*N, 0), ..., (0,0) la salida debe ser de la forma (N*N, 0), (0,0), ..., (0,0) Propiedad 鷗il: C: FFT(s1 + s2) = FFT(s1) + FFT(s2) * ----------------------------------------------------------------------- */int dgen(complex *xin, int n) { register int i; int nn = n * n; /* Se馻l de forma B */ for (i = 0; i < nn; ++i) { xin[i].re = 0.f; xin[i].im = 0.f; } xin[nn / 2].re = (real_type)nn; /* Se馻l de forma A */ /* for (i = 0; i < nn; ++i) { xin[i].re = 1.0; xin[i].im = 0.f; } */ return 0;} /* ----------------------------------------------------------------- */int tpose(complex *a, complex *b, const int n) { register int i, j; #pragma omp parallel for private(i, j) for (i = 0; i < n; ++i) { for (j = i; j < n; ++j) { b[i * n + j] = a[j * n + i]; b[j * n + i] = a[i * n + j]; } } return 0;} /* ----------------------------------------------------------------- */int tpose_seq(complex *a, complex *b, const int n) { register int i, j; for (i = 0; i < n; ++i) { for (j = i; j < n; ++j) { b[i * n + j] = a[j * n + i]; b[j * n + i] = a[i * n + j]; } } return 0;} /* ----------------------------------------------------------------- */int cffts(complex *a, int *brt, complex *w, const int n, int logn, int ndv2) { register int i;#pragma omp parallel for private(i) for (i = 0; i < n; ++i) fft(&a[i * n], brt, w, n, logn, ndv2); /* fft(a + i * n, brt, w, n, logn, ndv2); */ return 0;}/* ----------------------------------------------------------------- */int cffts_seq(complex *a, int *brt, complex *w, const int n, int logn, int ndv2) { register int i; for (i = 0; i < n; ++i) fft(&a[i * n], brt, w, n, logn, ndv2); return 0;}/* ----------------------------------------------------------------------- Fast Fourier Transform 1D in-place complex-complex decimation-in-time Cooley-Tukey * ----------------------------------------------------------------------- */int fft(complex *a, int *brt, complex *w, int n, int logn, int ndv2) { register int i, j; int powerOfW, stage, first, spowerOfW; int ijDiff; int stride; complex ii, jj, temp, pw; /* bit reverse step */ for (i = 0; i < n; ++i) { j = brt[i]; if (i < (j-1)) { temp = a[j - 1]; a[j - 1] = a[i]; a[i] = temp; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -