📄 c_fft6.c
字号:
} /* butterfly computations */ ijDiff = 1; stride = 2; spowerOfW = ndv2; for (stage = 0; stage < logn; ++stage) { /* Invariant: stride = 2 ** stage Invariant: ijDiff = 2 ** (stage - 1) */ first = 0; for (powerOfW = 0; powerOfW < ndv2; powerOfW += spowerOfW) { pw = w[powerOfW]; /* Invariant: pwr + sqrt(-1)*pwi = W**(powerOfW - 1) */ for (i = first; i < n; i += stride) { j = i + ijDiff; jj = a[j]; ii = a[i]; temp.re = jj.re * pw.re - jj.im * pw.im; temp.im = jj.re * pw.im + jj.im * pw.re; a[j].re = ii.re - temp.re; a[j].im = ii.im - temp.im; a[i].re = ii.re + temp.re; a[i].im = ii.im + temp.im; } ++first; } ijDiff = stride; stride <<= 1; spowerOfW /= 2; } return 0;} /* fft *//* ----------------------------------------------------------------- */int scale(complex *a, complex *v, const int n) { register int i, j, index; complex aa, vv;#pragma omp parallel for private(i, j, index, aa, vv) for (i = 0; i < n; ++i) { for (j = 0; j < n; ++j) { index = i * n + j; aa = a[index]; vv = v[index]; a[index].re = aa.re * vv.re - aa.im * vv.im; a[index].im = aa.re * vv.im + aa.im * vv.re; } } return 0;} /* ----------------------------------------------------------------- */int scale_seq(complex *a, complex *v, const int n) { register int i, j, index; complex aa, vv; for (i = 0; i < n; ++i) { for (j = 0; j < n; ++j) { index = i * n + j; aa = a[index]; vv = v[index]; a[index].re = aa.re * vv.re - aa.im * vv.im; a[index].im = aa.re * vv.im + aa.im * vv.re; } } return 0;} /* ----------------------------------------------------------------------- chkmat - check the output matrix for correctness * ----------------------------------------------------------------------- */int chkmat(complex *a, int n) { int sign, i, j, nn, errors; real_type EPSILON = 1e-4f; errors = 0; nn = n * n; for (i = 0; i < n; ++i) { sign = 1; for (j = 0; j < n; ++j) { if (a[i * n + j].re > nn * sign + EPSILON) ++errors; if (a[i * n + j].re < nn * sign - EPSILON) ++errors; sign = -sign; } } if (errors > 0) { printf("Errors = %d\n", errors); exit(-1); } return 0;} /* ----------------------------------------------------------------- */int prperf(double tpose_time, double scale_time, double ffts_time, int nn, int lognn, int iters) { double secs; double fpercent, spercent, tpercent; double flops, mflops; tpose_time /= iters; scale_time /= iters; ffts_time /= iters; secs = tpose_time + scale_time + ffts_time; tpercent = tpose_time / secs * 100; spercent = scale_time / secs * 100; fpercent = ffts_time / secs * 100; flops = (real_type) (nn * 5 * lognn); mflops = flops / 1e6f / secs; printf("***********************\n"); printf("1D FFT %d points\n" ,nn); printf("***********************\n"); printf("Time per input vector:\n"); printf("tpose : %lf %lf percent\n", tpose_time, tpercent); printf("scale : %lf %lf percent\n", scale_time, spercent); printf("ffts : %lf %lf percent\n", ffts_time, fpercent); printf("total(s) : %lf\n", secs); printf("mflop/s : %lf\n", mflops); return 0;} /* prperf *//* ----------------------------------------------------------------------- Base 2 logarithm * ----------------------------------------------------------------------- */int log2_int(int n) { register int i, aux; aux = 1; for (i = 0; i <= 128; i++) { if (aux > n) return (i - 1); aux <<= 1; } return -1;}/* ----------------------------------------------------------------- */int print_mat(complex *a, int n) { register int i, j; for (i = 0; i < n; ++i) { for (j = 0; j < n; ++j) { printf(" (%.0f, %.0f)", a[i * n + j].re, a[i * n + j].im); } printf("\n"); } printf("\n"); return 0;} /* ----------------------------------------------------------------- */int print_vec(complex *a, int n) { register int i; for (i = 0; i < n*n; ++i) printf(" (%.0f, %.0f)", a[i].re, a[i].im); printf("\n\n"); return 0;} /* ----------------------------------------------------------------- */int main(int argc, char *argv[]) { complex **xin; /* input vector */ complex **aux; /* intermediate array (same size) */ /* precomputed vectors */ int *brt; /* bit reverse table for 1d fft's */ complex *w; /* twiddles for 1d fft's */ complex *v; /* twiddles for 2d fft */ double total_time; /* timing */ int k; int NUMTHREADS, MAX_THREADS; #define ID omp_get_thread_num() float MEMORY; /* number of bytes required */ char *PARAM_NAMES[NUM_ARGS] = {"Size (a power of 2).", "No. of iterations"}; char *TIMERS_NAMES[NUM_TIMERS] = {"Total_time", "Tpose time", "Scale time", "Column FFTs time" }; char *DEFAULT_VALUES[NUM_ARGS] = {"64", "10"}; NUMTHREADS = omp_get_max_threads(); OSCR_init (NUMTHREADS, "Bailey's '6-step' Fast Fourier Transform", "Use 'fft6' <size (in K)> <iters>", NUM_ARGS, PARAM_NAMES, DEFAULT_VALUES , NUM_TIMERS, NUM_TIMERS, TIMERS_NAMES, argc, argv); /* Command line arguments processing */ N = OSCR_getarg_int(1); ITERS = OSCR_getarg_int(2); /* if(argc == 3) { N = atoi(argv[1]); ITERS = atoi(argv[2]); } else { printf("Usage: %s N ITERS\n", argv[0]); printf("N is the size of the input signal. Should be a power of two.\n"); N = 64; ITERS = 10; } */ MAX_THREADS = omp_get_max_threads(); NN = N * N; LOGN = log2_int(N); LOGNN = log2_int(NN); MEMORY = N * sizeof(int) + 2 * (NN) * sizeof(complex) * MAX_THREADS + (NN) * sizeof(complex) + (N / 2) * sizeof(complex); /* Memory allocation */ xin = OSCR_calloc(MAX_THREADS, sizeof(*xin)); aux = OSCR_calloc(MAX_THREADS, sizeof(*aux)); brt = (int *)OSCR_calloc(N, sizeof(int)); v = (complex *)OSCR_calloc(N * N, sizeof(complex)); /* twiddles for 2d fft */ w = (complex *)OSCR_calloc((N / 2), sizeof(complex)); /* twiddles for 1d fft's */ printf("Input values:\n"); printf("=============\n"); printf("N : %u\n", N); printf("ITERS : %u\n", ITERS); printf("NN : %u\n", NN); printf("LOGN : %u\n", LOGN); printf("LOGNN : %u\n", LOGNN); printf("Memory : %.2f Kbytes\n", MEMORY / 1024); /* precompute the input array and fft constants */ gen_bit_reverse_table(brt, N); gen_w_table(w, N, LOGN, N / 2); gen_v_table(v, N); for (k = 0; k < MAX_THREADS; k++) { xin[k] = (complex *)OSCR_calloc(N * N, sizeof(complex)); aux[k] = (complex *)OSCR_calloc(N * N, sizeof(complex)); }#define TOTAL 0OSCR_timer_start(TOTAL);#pragma omp parallel for private(k) for(k = 0; k < ITERS; k++) { dgen(xin[ID], N); tpose(xin[ID], aux[ID], N); cffts(aux[ID], brt, w, N, LOGN, N / 2); scale(aux[ID], v, N); tpose(aux[ID], xin[ID], N); cffts(xin[ID], brt, w, N, LOGN, N / 2); tpose(xin[ID], aux[ID], N); chkmat(aux[ID], N); } OSCR_timer_stop(TOTAL); total_time = OSCR_timer_read(TOTAL); /* Display results and time */ OSCR_report(1, TIMERS_NAMES); printf("============================"); printf("\n# THREADS : %d\n", NUMTHREADS); printf("N : %d\n", N); printf("ITERS : %d\n", ITERS); printf("total TIME: %.6lf secs.\n", total_time); #undef ID#undef MAX_THREADS return 0;}/* ----------------------------------------------------------------- *//* * vim:ts=2:sw=2:set nonu: */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -