📄 rfftw_threads_test.c
字号:
/* * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */#include <stdlib.h>#include <stdio.h>#include <string.h>#include <math.h>#include <time.h>#include "fftw_threads-int.h"#include "rfftw_threads.h"#include "test_main.h"char fftw_prefix[] = "rfftw_threads";int nthreads = 1;/************************************************* * Speed tests *************************************************/void zero_arr(int n, fftw_real * a){ int i; for (i = 0; i < n; ++i) a[i] = 0.0;}void test_speed_aux(int n, fftw_direction dir, int flags, int specific){ fftw_real *in, *out; fftw_plan plan; double t, t0; fftw_time begin, end; in = (fftw_real *) fftw_malloc(n * howmany_fields * sizeof(fftw_real)); out = (fftw_real *) fftw_malloc(n * howmany_fields * sizeof(fftw_real)); if (specific) { begin = fftw_get_time(); plan = rfftw_create_plan_specific(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag, in, howmany_fields, out, howmany_fields); end = fftw_get_time(); } else { begin = fftw_get_time(); plan = rfftw_create_plan(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag); end = fftw_get_time(); } CHECK(plan != NULL, "can't create plan"); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); WHEN_VERBOSE(2, rfftw_print_plan(plan)); FFTW_TIME_FFT(rfftw(plan, howmany_fields, in, howmany_fields, 1, out, howmany_fields, 1), in, n * howmany_fields, t0); FFTW_TIME_FFT(rfftw_threads(nthreads,plan, howmany_fields, in, howmany_fields, 1, out, howmany_fields, 1), in, n * howmany_fields, t); rfftw_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0))); WHEN_VERBOSE(1, printf("time for one fft (%d threads): %s", nthreads, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); WHEN_VERBOSE(1, printf("\"mflops\" = 5/2 (n log2 n) / (t in microseconds)" " = %f\n", 0.5 * howmany_fields * mflops(t, n))); WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t)); fftw_free(in); fftw_free(out); WHEN_VERBOSE(1, printf("\n"));}void test_speed_nd_aux(struct size sz, fftw_direction dir, int flags, int specific){ fftw_real *in; fftwnd_plan plan; double t, t0; fftw_time begin, end; int i, N; /* only bench in-place multi-dim transforms */ flags |= FFTW_IN_PLACE; N = 1; for (i = 0; i < sz.rank - 1; ++i) N *= sz.narray[i]; N *= (sz.narray[i] + 2); in = (fftw_real *) fftw_malloc(N * howmany_fields * sizeof(fftw_real)); if (specific) { begin = fftw_get_time(); plan = rfftwnd_create_plan_specific(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag, in, howmany_fields, 0, 1); } else { begin = fftw_get_time(); plan = rfftwnd_create_plan(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag); } end = fftw_get_time(); CHECK(plan != NULL, "can't create plan"); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); WHEN_VERBOSE(2, printf("\n")); WHEN_VERBOSE(2, (rfftwnd_print_plan(plan))); WHEN_VERBOSE(2, printf("\n")); if (dir == FFTW_REAL_TO_COMPLEX) { FFTW_TIME_FFT(rfftwnd_real_to_complex(plan, howmany_fields, in, howmany_fields, 1, 0, 0, 0), in, N * howmany_fields, t0); FFTW_TIME_FFT(rfftwnd_threads_real_to_complex(nthreads, plan, howmany_fields, in, howmany_fields, 1, 0, 0, 0), in, N * howmany_fields, t); } else { FFTW_TIME_FFT(rfftwnd_complex_to_real(plan, howmany_fields, (fftw_complex *) in, howmany_fields, 1, 0, 0, 0), in, N * howmany_fields, t0); FFTW_TIME_FFT(rfftwnd_threads_complex_to_real(nthreads, plan, howmany_fields, (fftw_complex *) in, howmany_fields, 1, 0, 0, 0), in, N * howmany_fields, t); } rfftwnd_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0))); WHEN_VERBOSE(1, printf("time for one fft (%d threads): %s", nthreads, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("\"mflops\" = 5/2 (N log2 N) / (t in microseconds)" " = %f\n", 0.5 * howmany_fields * mflops(t, N))); WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t)); fftw_free(in); WHEN_VERBOSE(1, printf("\n"));}/************************************************* * correctness tests *************************************************/void fill_random(fftw_real * a, int n, int stride){ int i; /* generate random inputs */ for (i = 0; i < n; ++i) a[i * stride] = DRAND();}double compute_error(fftw_real * A, int astride, fftw_real * B, int bstride, int n){ /* compute the relative error */ double error = 0.0; int i; for (i = 0; i < n; ++i) { double a; double mag; a = fabs(A[i * astride] - B[i * bstride]); mag = 0.5 * (fabs(A[i * astride]) + fabs(B[i * bstride])) + TOLERANCE; a /= mag; if (a > error) error = a;#ifdef HAVE_ISNAN CHECK(!isnan(a), "NaN in answer");#endif } return error;}void array_compare(fftw_real * A, fftw_real * B, int n){ CHECK(compute_error(A, 1, B, 1, n) < TOLERANCE, "failure in RFFTW verification");}void test_out_of_place(int n, int istride, int ostride, int howmany, fftw_direction dir, fftw_plan validated_plan, int specific){ fftw_complex *in2, *out2; fftw_real *in1, *out1, *out3; fftw_plan plan; int i, j; int flags = measure_flag | wisdom_flag; if (coinflip()) flags |= FFTW_THREADSAFE; in1 = (fftw_real *) fftw_malloc(istride * n * sizeof(fftw_real) * howmany); in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); out1 = (fftw_real *) fftw_malloc(ostride * n * sizeof(fftw_real) * howmany); out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); out3 = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); if (!specific) plan = rfftw_create_plan(n, dir, flags); else plan = rfftw_create_plan_specific(n, dir, flags, in1, istride, out1, ostride); CHECK(plan != NULL, "can't create plan"); /* generate random inputs */ fill_random(in1, n, istride); for (j = 1; j < howmany; ++j) for (i = 0; i < n; ++i) in1[(j * n + i) * istride] = in1[i * istride]; /* copy random inputs to complex array for comparison with fftw: */ if (dir == FFTW_REAL_TO_COMPLEX) for (i = 0; i < n; ++i) { c_re(in2[i]) = in1[i * istride]; c_im(in2[i]) = 0.0; } else { int n2 = (n + 1) / 2; c_re(in2[0]) = in1[0]; c_im(in2[0]) = 0.0; for (i = 1; i < n2; ++i) { c_re(in2[i]) = in1[i * istride]; c_im(in2[i]) = in1[(n - i) * istride]; } if (n2 * 2 == n) { c_re(in2[n2]) = in1[n2 * istride]; c_im(in2[n2]) = 0.0; ++i; } for (; i < n; ++i) { c_re(in2[i]) = c_re(in2[n - i]); c_im(in2[i]) = -c_im(in2[n - i]); } } /* * fill in other positions of the array, to make sure that * rfftw doesn't overwrite them */ for (j = 1; j < istride; ++j) for (i = 0; i < n * howmany; ++i) in1[i * istride + j] = i * istride + j; for (j = 1; j < ostride; ++j) for (i = 0; i < n * howmany; ++i) out1[i * ostride + j] = -i * ostride + j; WHEN_VERBOSE(2, rfftw_print_plan(plan)); /* fft-ize */ if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftw_threads(nthreads, plan, howmany, in1, istride, n * istride, out1, ostride, n * ostride); else rfftw_threads_one(nthreads, plan, in1, out1); rfftw_destroy_plan(plan); /* check for overwriting */ for (j = 1; j < istride; ++j) for (i = 0; i < n * howmany; ++i) CHECK(in1[i * istride + j] == i * istride + j, "input has been overwritten"); for (j = 1; j < ostride; ++j) for (i = 0; i < n * howmany; ++i) CHECK(out1[i * ostride + j] == -i * ostride + j, "output has been overwritten"); fftw(validated_plan, 1, in2, 1, n, out2, 1, n); if (dir == FFTW_REAL_TO_COMPLEX) { int n2 = (n + 1) / 2; out3[0] = c_re(out2[0]); for (i = 1; i < n2; ++i) { out3[i] = c_re(out2[i]); out3[n - i] = c_im(out2[i]); } if (n2 * 2 == n) out3[n2] = c_re(out2[n2]); } else { for (i = 0; i < n; ++i) out3[i] = c_re(out2[i]); } for (j = 0; j < howmany; ++j) CHECK(compute_error(out1 + j * n * ostride, ostride, out3, 1, n) < TOLERANCE, "test_out_of_place: wrong answer"); WHEN_VERBOSE(2, printf("OK\n")); fftw_free(in1); fftw_free(in2); fftw_free(out1); fftw_free(out2); fftw_free(out3);}void test_in_place(int n, int istride, int howmany, fftw_direction dir, fftw_plan validated_plan, int specific){ fftw_complex *in2, *out2; fftw_real *in1, *out1, *out3; fftw_plan plan; int i, j; int ostride = istride; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (coinflip()) flags |= FFTW_THREADSAFE; in1 = (fftw_real *) fftw_malloc(istride * n * sizeof(fftw_real) * howmany); in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); out1 = in1; out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); out3 = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); if (!specific) plan = rfftw_create_plan(n, dir, flags); else plan = rfftw_create_plan_specific(n, dir, flags, in1, istride, out1, ostride); CHECK(plan != NULL, "can't create plan"); /* generate random inputs */ fill_random(in1, n, istride); for (j = 1; j < howmany; ++j) for (i = 0; i < n; ++i) in1[(j * n + i) * istride] = in1[i * istride]; /* copy random inputs to complex array for comparison with fftw: */ if (dir == FFTW_REAL_TO_COMPLEX) for (i = 0; i < n; ++i) { c_re(in2[i]) = in1[i * istride]; c_im(in2[i]) = 0.0; } else { int n2 = (n + 1) / 2; c_re(in2[0]) = in1[0]; c_im(in2[0]) = 0.0; for (i = 1; i < n2; ++i) { c_re(in2[i]) = in1[i * istride]; c_im(in2[i]) = in1[(n - i) * istride]; } if (n2 * 2 == n) { c_re(in2[n2]) = in1[n2 * istride]; c_im(in2[n2]) = 0.0; ++i; } for (; i < n; ++i) { c_re(in2[i]) = c_re(in2[n - i]); c_im(in2[i]) = -c_im(in2[n - i]); } } /* * fill in other positions of the array, to make sure that * rfftw doesn't overwrite them */ for (j = 1; j < istride; ++j) for (i = 0; i < n * howmany; ++i) in1[i * istride + j] = i * istride + j; WHEN_VERBOSE(2, rfftw_print_plan(plan)); /* fft-ize */ if (howmany != 1 || istride != 1 || coinflip()) rfftw_threads(nthreads, plan, howmany, in1, istride, n * istride, 0, 0, 0); else rfftw_threads_one(nthreads, plan, in1, NULL); rfftw_destroy_plan(plan); /* check for overwriting */ for (j = 1; j < ostride; ++j) for (i = 0; i < n * howmany; ++i) CHECK(out1[i * ostride + j] == i * ostride + j, "output has been overwritten");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -