📄 fftw_mpi_test.c

📁 FFTW, a collection of fast C routines to compute the Discrete Fourier Transform in one or more dime
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * */#include <stdlib.h>#include <stdio.h>#include <string.h>#include <math.h>#include <time.h>#include "fftw-int.h"#include "fftw_mpi.h"#include "test_main.h"#define my_printf if (io_okay) printf#define my_fprintf if (io_okay) fprintf#define my_fflush if (io_okay) fflushint ncpus = 1;int my_cpu = 0;int only_parallel = 0;char fftw_prefix[] = "fftw_mpi";/************************************************* * Speed tests *************************************************/#define MPI_TIME_FFT(fft,a,n,t) \{ \     double ts,te; \     double total_t; \     int iters = 1,iter; \     zero_arr((n), (a)); \     do { \          MPI_Barrier(MPI_COMM_WORLD); \          ts = MPI_Wtime(); \          for (iter = 0; iter < iters; ++iter) fft; \          te = MPI_Wtime(); \          t = (total_t = (te - ts)) / iters; \          iters *= 2; \     } while (total_t < 2.0); \}void zero_arr(int n, fftw_complex * a){     int i;     for (i = 0; i < n; ++i)	  c_re(a[i]) = c_im(a[i]) = 0.0;}void test_speed_aux(int n, fftw_direction dir, int flags, int specific){     int local_n, local_start, local_n_after_transform,	  local_start_after_transform, total_local_size, nalloc;     fftw_complex *in, *work;     fftw_plan plan = 0;     fftw_mpi_plan mpi_plan;     double t, t0 = 0.0;     if (specific || !(flags & FFTW_IN_PLACE))	  return;     if (io_okay && !only_parallel)	  plan = fftw_create_plan(n, dir, speed_flag | flags				  | wisdom_flag | no_vector_flag);     mpi_plan = fftw_mpi_create_plan(MPI_COMM_WORLD, n, dir,				     speed_flag | flags				     | wisdom_flag | no_vector_flag);     CHECK(mpi_plan, "failed to create plan!");     fftw_mpi_local_sizes(mpi_plan, &local_n, &local_start,			  &local_n_after_transform,			  &local_start_after_transform,			  &total_local_size);     if (io_okay && !only_parallel)	  nalloc = n;     else	  nalloc = total_local_size;     in = (fftw_complex *) fftw_malloc(nalloc * howmany_fields				       * sizeof(fftw_complex));     work = (fftw_complex *) fftw_malloc(nalloc * howmany_fields					 * sizeof(fftw_complex));     if (io_okay) {	  WHEN_VERBOSE(2, fftw_mpi_print_plan(mpi_plan));     }     if (io_okay && !only_parallel) {	  FFTW_TIME_FFT(fftw(plan, howmany_fields,			     in, howmany_fields, 1, work, 1, 0),			in, n * howmany_fields, t0);	  fftw_destroy_plan(plan);	  WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0)));     }          MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, NULL),		  in, total_local_size * howmany_fields, t);     if (io_okay) {	  WHEN_VERBOSE(1, printf("time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t)));	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));	  WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)"				 " = %f\n", howmany_fields * mflops(t, n)));	  if (!only_parallel)	       WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t));     }     MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, work),		  in, total_local_size * howmany_fields, t);     if (io_okay) {	  WHEN_VERBOSE(1, printf("w/WORK: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t)));	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));	  WHEN_VERBOSE(1, printf("w/WORK: \"mflops\" = 5 (n log2 n) / (t in microseconds)"				 " = %f\n", howmany_fields * mflops(t, n)));	  if (!only_parallel)	     WHEN_VERBOSE(1, printf("w/WORK: parallel speedup: %f\n", t0 / t));     }     fftw_free(in);     fftw_free(work);     fftw_mpi_destroy_plan(mpi_plan);     WHEN_VERBOSE(1, my_printf("\n"));}void test_speed_nd_aux(struct size sz,		       fftw_direction dir, int flags, int specific){     int local_nx, local_x_start, local_ny_after_transpose,	  local_y_start_after_transpose, total_local_size;     fftw_complex *in, *work;     fftwnd_plan plan = 0;     fftwnd_mpi_plan mpi_plan;     double t, t0 = 0.0;     int i, N;          if (sz.rank < 2)	  return;     /* only bench in-place multi-dim transforms */     flags |= FFTW_IN_PLACE;	     N = 1;     for (i = 0; i < sz.rank; ++i)	  N *= (sz.narray[i]);     if (specific) {	  return;     } else {	  if (io_okay && !only_parallel)	       plan = fftwnd_create_plan(sz.rank, sz.narray,					 dir, speed_flag | flags					 | wisdom_flag | no_vector_flag);	  mpi_plan = fftwnd_mpi_create_plan(MPI_COMM_WORLD, sz.rank, sz.narray,					    dir, speed_flag | flags					    | wisdom_flag | no_vector_flag);     }     CHECK(mpi_plan != NULL, "can't create plan");     fftwnd_mpi_local_sizes(mpi_plan, &local_nx, &local_x_start,			    &local_ny_after_transpose,			    &local_y_start_after_transpose,			    &total_local_size);     if (io_okay && !only_parallel)	  in = (fftw_complex *) fftw_malloc(N * howmany_fields *					    sizeof(fftw_complex));     else	  in = (fftw_complex *) fftw_malloc(total_local_size * howmany_fields *					    sizeof(fftw_complex));     work = (fftw_complex *) fftw_malloc(total_local_size * howmany_fields *					 sizeof(fftw_complex));          if (io_okay && !only_parallel) {	  FFTW_TIME_FFT(fftwnd(plan, howmany_fields,			      in, howmany_fields, 1, 0, 0, 0),		       in, N * howmany_fields, t0);	  fftwnd_destroy_plan(plan);	  	  WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n",				 smart_sprint_time(t0)));     }     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,			     in, NULL, FFTW_NORMAL_ORDER),		   in, total_local_size * howmany_fields, t);     if (io_okay) {	  WHEN_VERBOSE(1, printf("NORMAL: time for one fft (%d cpus): %s",				 ncpus, smart_sprint_time(t)));	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));	  WHEN_VERBOSE(1, printf("NORMAL: \"mflops\" = 5 (N log2 N) / "				 "(t in microseconds)"				 " = %f\n", howmany_fields * mflops(t, N)));	  if (!only_parallel)	     WHEN_VERBOSE(1, printf("NORMAL: parallel speedup: %f\n", t0 / t));     }     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,			     in, NULL, FFTW_TRANSPOSED_ORDER),		   in, total_local_size * howmany_fields, t);     if (io_okay) {	  WHEN_VERBOSE(1, printf("TRANSP.: time for one fft (%d cpus): %s",				 ncpus, smart_sprint_time(t)));	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));	  WHEN_VERBOSE(1, printf("TRANSP.: \"mflops\" = 5 (N log2 N) / "				 "(t in microseconds)"				 " = %f\n", howmany_fields * mflops(t, N)));	  if (!only_parallel)	    WHEN_VERBOSE(1, printf("TRANSP.: parallel speedup: %f\n", t0 / t));     }     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,			     in, work, FFTW_NORMAL_ORDER),		   in, total_local_size * howmany_fields, t);     if (io_okay) {	  WHEN_VERBOSE(1, printf("NORMAL,w/WORK: time for one fft (%d cpus): %s",				 ncpus, smart_sprint_time(t)));	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));	  WHEN_VERBOSE(1, printf("NORMAL,w/WORK: \"mflops\" = 5 (N log2 N) / "				 "(t in microseconds)"				 " = %f\n", howmany_fields * mflops(t, N)));	  if (!only_parallel)	       WHEN_VERBOSE(1, printf("NORMAL,w/WORK: parallel speedup: %f\n", t0 / t));     }     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,			     in, work, FFTW_TRANSPOSED_ORDER),		   in, total_local_size * howmany_fields, t);     if (io_okay) {	  WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: time for one fft (%d cpus): %s",				 ncpus, smart_sprint_time(t)));	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));	  WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: \"mflops\" = 5 (N log2 N) / "				 "(t in microseconds)"				 " = %f\n", howmany_fields * mflops(t, N)));	  if (!only_parallel)	       WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: parallel speedup: %f\n", t0 / t));     }     fftwnd_mpi_destroy_plan(mpi_plan);     fftw_free(in);     fftw_free(work);     WHEN_VERBOSE(1, my_printf("\n"));}/************************************************* * correctness tests *************************************************/void test_out_of_place(int n, int istride, int ostride,		       int howmany, fftw_direction dir,		       fftw_plan validated_plan,		       int specific){     /* one-dim. out-of-place transforms will never be supported in MPI */     WHEN_VERBOSE(2, my_printf("N/A\n"));}void test_in_place(int n, int istride, int howmany, fftw_direction dir,		   fftw_plan validated_plan, int specific){     int local_n, local_start, local_n_after_transform,	  local_start_after_transform, total_local_size;     fftw_complex *in1, *work = NULL, *in2, *out2;     fftw_mpi_plan plan;     int i;     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;     if (specific) {	  WHEN_VERBOSE(2, my_printf("N/A\n"));	  return;     }     if (coinflip())	  flags |= FFTW_THREADSAFE;     plan = fftw_mpi_create_plan(MPI_COMM_WORLD, n, dir, flags);     fftw_mpi_local_sizes(plan, &local_n, &local_start,			  &local_n_after_transform,			  &local_start_after_transform,			  &total_local_size);     in1 = (fftw_complex *) fftw_malloc(total_local_size 					* sizeof(fftw_complex) * howmany);     if (coinflip()) {	  WHEN_VERBOSE(2, my_printf("w/work..."));	  work = (fftw_complex *) fftw_malloc(total_local_size                                        * sizeof(fftw_complex) * howmany);     }     in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);     out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);     /* generate random inputs */     for (i = 0; i < n * howmany; ++i) {	  c_re(in2[i]) = DRAND();	  c_im(in2[i]) = DRAND();     }     for (i = 0; i < local_n * howmany; ++i) {	  c_re(in1[i]) = c_re(in2[i + local_start*howmany]);	  c_im(in1[i]) = c_im(in2[i + local_start*howmany]);     }	       /* fft-ize */     fftw_mpi(plan, howmany, in1, work);     fftw_mpi_destroy_plan(plan);
12 下一页
💿 文件大小 1511 K
👤 上传用户 koalalee
📂 所属分类数学计算
🏷️ 相关标签

#collection #Transform #Discrete #routines
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -