⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rfftw_mpi_test.c

📁 FFTW, a collection of fast C routines to compute the Discrete Fourier Transform in one or more dime
💻 C
📖 第 1 页 / 共 2 页
字号:
/* * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * */#include <stdlib.h>#include <stdio.h>#include <string.h>#include <math.h>#include <time.h>#include "fftw-int.h"#include "rfftw_mpi.h"#include "test_main.h"#define my_printf if (io_okay) printf#define my_fprintf if (io_okay) fprintf#define my_fflush if (io_okay) fflushint ncpus = 1;int my_cpu = 0;char fftw_prefix[] = "rfftw_mpi";/************************************************* * Speed tests *************************************************/#define MPI_TIME_FFT(fft,a,n,t) \{ \     double ts,te; \     double total_t; \     int iters = 1,iter; \     zero_arr((n), (a)); \     do { \          MPI_Barrier(MPI_COMM_WORLD); \          ts = MPI_Wtime(); \          for (iter = 0; iter < iters; ++iter) fft; \          te = MPI_Wtime(); \          t = (total_t = (te - ts)) / iters; \          iters *= 2; \     } while (total_t < 2.0); \}void zero_arr(int n, fftw_real * a){     int i;     for (i = 0; i < n; ++i)	  a[i] = 0.0;}void test_speed_aux(int n, fftw_direction dir, int flags, int specific){     fftw_real *in, *out;     fftw_plan plan;     double t;     fftw_time begin, end;     return; /* one-dim transforms not supported yet in MPI */     in = (fftw_real *) fftw_malloc(n * howmany_fields				    * sizeof(fftw_real));     out = (fftw_real *) fftw_malloc(n * howmany_fields				     * sizeof(fftw_real));     if (specific) {	  begin = fftw_get_time();	  plan = rfftw_create_plan_specific(n, dir,speed_flag | flags					    | wisdom_flag | no_vector_flag,					    in, howmany_fields,					    out, howmany_fields);	  end = fftw_get_time();     } else {	  begin = fftw_get_time();	  plan = rfftw_create_plan(n, dir, speed_flag | flags				   | wisdom_flag | no_vector_flag);	  end = fftw_get_time();     }     CHECK(plan != NULL, "can't create plan");     t = fftw_time_to_sec(fftw_time_diff(end, begin));     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));     WHEN_VERBOSE(2, rfftw_print_plan(plan));     FFTW_TIME_FFT(rfftw(plan, howmany_fields,			 in, howmany_fields, 1, out, howmany_fields, 1),		   in, n * howmany_fields, t);     rfftw_destroy_plan(plan);     WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t)));     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));     WHEN_VERBOSE(1, printf("\"mflops\" = 5/2 (n log2 n) / (t in microseconds)"			" = %f\n", 0.5 * howmany_fields * mflops(t, n)));     fftw_free(in);     fftw_free(out);     WHEN_VERBOSE(1, printf("\n"));}void test_speed_nd_aux(struct size sz,		       fftw_direction dir, int flags, int specific){     int local_nx, local_x_start, local_ny_after_transpose,          local_y_start_after_transpose, total_local_size;     fftw_real *in, *work;     rfftwnd_plan plan = 0;     rfftwnd_mpi_plan mpi_plan;     double t, t0 = 0.0;     int i, N;     if (sz.rank < 2)          return;     /* only bench in-place multi-dim transforms */     flags |= FFTW_IN_PLACE;	     N = 1;     for (i = 0; i < sz.rank - 1; ++i)	  N *= sz.narray[i];     N *= (sz.narray[i] + 2);     if (specific) {	  return;     } else {          if (io_okay)               plan = rfftwnd_create_plan(sz.rank, sz.narray,					  dir, speed_flag | flags					  | wisdom_flag | no_vector_flag);          mpi_plan = rfftwnd_mpi_create_plan(MPI_COMM_WORLD, sz.rank,sz.narray,					     dir, speed_flag | flags					     | wisdom_flag | no_vector_flag);     }     CHECK(mpi_plan != NULL, "can't create plan");     rfftwnd_mpi_local_sizes(mpi_plan, &local_nx, &local_x_start,			     &local_ny_after_transpose,			     &local_y_start_after_transpose,			     &total_local_size);     if (io_okay)          in = (fftw_real *) fftw_malloc(N * howmany_fields *					 sizeof(fftw_real));     else          in = (fftw_real *) fftw_malloc(total_local_size * howmany_fields *					 sizeof(fftw_real));     work = (fftw_real *) fftw_malloc(total_local_size * howmany_fields *                                         sizeof(fftw_real));     if (io_okay) {	  if (dir == FFTW_REAL_TO_COMPLEX) {	       FFTW_TIME_FFT(rfftwnd_real_to_complex(plan, howmany_fields,						     in, howmany_fields, 1,						     0, 0, 0),			     in, N * howmany_fields, t0);	  }	  else {	       FFTW_TIME_FFT(rfftwnd_complex_to_real(plan, howmany_fields,						     (fftw_complex *) in,						     howmany_fields, 1,						     0, 0, 0),			     in, N * howmany_fields, t0);	  }     }     rfftwnd_destroy_plan(plan);     WHEN_VERBOSE(1, my_printf("time for one fft (uniprocessor): %s\n",                               smart_sprint_time(t0)));     MPI_TIME_FFT(rfftwnd_mpi(mpi_plan, howmany_fields,                             in, NULL, FFTW_NORMAL_ORDER),                   in, total_local_size * howmany_fields, t);     if (io_okay) {          WHEN_VERBOSE(1, printf("NORMAL: time for one fft (%d cpus): %s",                                 ncpus, smart_sprint_time(t)));          WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));          WHEN_VERBOSE(1, printf("NORMAL: \"mflops\" = 5/2 (N log2 N) / "                                 "(t in microseconds)"                                 " = %f\n", 0.5*howmany_fields*mflops(t, N)));          WHEN_VERBOSE(1, printf("NORMAL: parallel speedup: %f\n", t0 / t));     }     MPI_TIME_FFT(rfftwnd_mpi(mpi_plan, howmany_fields,                             in, NULL, FFTW_TRANSPOSED_ORDER),                   in, total_local_size * howmany_fields, t);     if (io_okay) {          WHEN_VERBOSE(1, printf("TRANSP.: time for one fft (%d cpus): %s",                                 ncpus, smart_sprint_time(t)));          WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));          WHEN_VERBOSE(1, printf("TRANSP.: \"mflops\" = 5/2 (N log2 N) / "                                 "(t in microseconds)"                                 " = %f\n", 0.5*howmany_fields*mflops(t, N)));          WHEN_VERBOSE(1, printf("TRANSP.: parallel speedup: %f\n", t0 / t));     }     MPI_TIME_FFT(rfftwnd_mpi(mpi_plan, howmany_fields,                             in, work, FFTW_NORMAL_ORDER),                   in, total_local_size * howmany_fields, t);     if (io_okay) {          WHEN_VERBOSE(1, printf("NORMAL,w/WORK: time for one fft (%d cpus): %s",                                 ncpus, smart_sprint_time(t)));          WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));          WHEN_VERBOSE(1, printf("NORMAL,w/WORK: \"mflops\" = 5/2 (N log2 N) / "                                 "(t in microseconds)"                                 " = %f\n", 0.5*howmany_fields*mflops(t, N)));          WHEN_VERBOSE(1, printf("NORMAL,w/WORK: parallel speedup: %f\n",				 t0 / t));     }     MPI_TIME_FFT(rfftwnd_mpi(mpi_plan, howmany_fields,                             in, work, FFTW_TRANSPOSED_ORDER),                   in, total_local_size * howmany_fields, t);     if (io_okay) {          WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: time for one fft (%d cpus): %s",                                 ncpus, smart_sprint_time(t)));          WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));          WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: \"mflops\" = 5/2 (N log2 N) / "                                 "(t in microseconds)"                                 " = %f\n", 0.5*howmany_fields*mflops(t, N)));          WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: parallel speedup: %f\n",				 t0 / t));     }     rfftwnd_mpi_destroy_plan(mpi_plan);     fftw_free(in);     fftw_free(work);     WHEN_VERBOSE(1, my_printf("\n"));}/************************************************* * correctness tests *************************************************/double compute_error(fftw_real * A, int astride,                     fftw_real * B, int bstride, int n){     /* compute the relative error */     double error = 0.0;     int i;     for (i = 0; i < n; ++i) {          double a;          double mag;          a = fabs(A[i * astride] - B[i * bstride]);          mag = 0.5 * (fabs(A[i * astride]) + fabs(B[i * bstride]))+TOLERANCE;          a /= mag;          if (a > error)               error = a;#ifdef HAVE_ISNAN          CHECK(!isnan(a), "NaN in answer");#endif     }     return error;}void test_out_of_place(int n, int istride, int ostride,		       int howmany, fftw_direction dir,		       fftw_plan validated_plan, int specific){     /* one-dim. out-of-place transforms will never be supported in MPI */     WHEN_VERBOSE(2, my_printf("N/A\n"));}void test_in_place(int n, int istride,		   int howmany, fftw_direction dir,		   fftw_plan validated_plan, int specific){     /* one-dim. transforms are not supported yet in MPI */     WHEN_VERBOSE(2, my_printf("N/A\n"));}void test_out_of_place_both(int n, int istride, int ostride,			    int howmany,			    fftw_plan validated_plan_forward,			    fftw_plan validated_plan_backward){}void test_in_place_both(int n, int istride, int howmany,			fftw_plan validated_plan_forward,			fftw_plan validated_plan_backward)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -