⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rexec_threads.c

📁 FFTW, a collection of fast C routines to compute the Discrete Fourier Transform in one or more dime
💻 C
📖 第 1 页 / 共 2 页
字号:
/* * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *//* * rexec_threads.c -- execute the fft in parallel */#include <stdio.h>#include <stdlib.h>#include "fftw_threads-int.h"#include "rfftw_threads.h"extern void rfftw_strided_copy(int n, fftw_real *in, int ostride,			       fftw_real *out);static void rexec_simple_threads(int n, fftw_real *in,				 fftw_real *out,				 fftw_plan_node *p,				 int istride,				 int ostride,				 int nthreads);typedef struct {     int m,r;     fftw_real *in;     fftw_real *out;     fftw_plan_node *p;     int istride, ostride;     int nthreads;} rexec_simple_data;static void *rexec_simple_thread_r2c(fftw_loop_data *ldata){     int min = ldata->min, max = ldata->max;     rexec_simple_data *d = (rexec_simple_data *) ldata->data;     int m = d->m, r = d->r;     fftw_real *in = d->in;     fftw_real *out = d->out;     fftw_plan_node *p = d->p;     int istride = d->istride, ostride = d->ostride;     int nthreads = d->nthreads;          for (; min < max; ++min)	  rexec_simple_threads(m, in + min * istride,			       out + min * (m * ostride),			       p,			       istride * r, ostride,			       nthreads);     return 0;}static void *rexec_simple_thread_c2r(fftw_loop_data *ldata){     int min = ldata->min, max = ldata->max;     rexec_simple_data *d = (rexec_simple_data *) ldata->data;     int m = d->m, r = d->r;     fftw_real *in = d->in;     fftw_real *out = d->out;     fftw_plan_node *p = d->p;     int istride = d->istride, ostride = d->ostride;     int nthreads = d->nthreads;          for (; min < max; ++min)	  rexec_simple_threads(m, in + min * (m * istride),			       out + min * ostride,			       p,			       istride, ostride * r,			       nthreads);     return 0;}static void spawn_h2hc_recurse_threads(int m, int r,				       fftw_real *in,				       fftw_real *out,				       fftw_plan_node *p,				       int istride,				       int ostride,				       int nthreads){     rexec_simple_data d;          d.m = m; d.r = r;     d.in = in; d.out = out;     d.p = p->nodeu.hc2hc.recurse;     d.istride = istride;     d.ostride = ostride;     d.nthreads = nthreads / r;          switch (p->nodeu.hc2hc.dir) {	 case FFTW_REAL_TO_COMPLEX:	      fftw_thread_spawn_loop(r, nthreads,				     rexec_simple_thread_r2c, &d);	      break;	 case FFTW_COMPLEX_TO_REAL:	      fftw_thread_spawn_loop(r, nthreads,				     rexec_simple_thread_c2r, &d);	      break;     }}static void rexec_simple_threads(int n, fftw_real *in, fftw_real *out,				 fftw_plan_node *p,				 int istride,				 int ostride,				 int nthreads){     switch (p->type) {	 case FFTW_REAL2HC:	      HACK_ALIGN_STACK_ODD;	      (p->nodeu.real2hc.codelet) (in, out, out + n * ostride,					  istride, ostride, -ostride);	      break;	 case FFTW_HC2REAL:	      HACK_ALIGN_STACK_ODD;	      (p->nodeu.hc2real.codelet) (in, in + n * istride, out,					  istride, -istride, ostride);	      break;	 case FFTW_HC2HC:	      {		   int r = p->nodeu.hc2hc.size;		   int m = n / r;		   int i;		   fftw_hc2hc_codelet *codelet;		   fftw_complex *W;		   if (nthreads <= 1) { 			switch (p->nodeu.hc2hc.dir) {			    case FFTW_REAL_TO_COMPLEX:				 for (i = 0; i < r; ++i)				      rfftw_executor_simple(m,						    in + i * istride,						    out + i * (m * ostride),						    p->nodeu.hc2hc.recurse,						    istride * r, ostride,       						    FFTW_NORMAL_RECURSE);				 W = p->nodeu.hc2hc.tw->twarray;				 codelet = p->nodeu.hc2hc.codelet;				 HACK_ALIGN_STACK_EVEN;				 codelet(out, W, m * ostride, m, ostride);				 break;			    case FFTW_COMPLEX_TO_REAL:				 W = p->nodeu.hc2hc.tw->twarray;				 codelet = p->nodeu.hc2hc.codelet;				 HACK_ALIGN_STACK_EVEN;				 codelet(in, W, m * istride, m, istride);				 				 for (i = 0; i < r; ++i)				      rfftw_executor_simple(m,						    in + i * (m * istride),						    out + i * ostride,						    p->nodeu.hc2hc.recurse,						    istride, ostride * r,						    FFTW_NORMAL_RECURSE);				 break;			    default:				 goto bug;			}		   }		   else			switch (p->nodeu.hc2hc.dir) {			    case FFTW_REAL_TO_COMPLEX:				 spawn_h2hc_recurse_threads(m, r, in, out, p,							    istride, ostride,							    nthreads);				 W = p->nodeu.hc2hc.tw->twarray;				 codelet = p->nodeu.hc2hc.codelet;				 HACK_ALIGN_STACK_EVEN;				 codelet(out, W, m * ostride, m, ostride);				 break;			    case FFTW_COMPLEX_TO_REAL:				 W = p->nodeu.hc2hc.tw->twarray;				 codelet = p->nodeu.hc2hc.codelet;				 HACK_ALIGN_STACK_EVEN;				 codelet(in, W, m * istride, m, istride);				 				 spawn_h2hc_recurse_threads(m, r, in, out, p,							    istride, ostride,							    nthreads);				 break;			}		   		   break;	      }	 case FFTW_RGENERIC:	      {		   int r = p->nodeu.rgeneric.size;		   int m = n / r;		   int i;		   fftw_rgeneric_codelet *codelet = p->nodeu.rgeneric.codelet;		   fftw_complex *W = p->nodeu.rgeneric.tw->twarray;		   if (nthreads <= 1)			switch (p->nodeu.rgeneric.dir) {			    case FFTW_REAL_TO_COMPLEX:				 for (i = 0; i < r; ++i)				      rfftw_executor_simple(m,						    in + i * istride,						    out + i * (m * ostride),						    p->nodeu.rgeneric.recurse,						    istride * r, ostride,						    FFTW_NORMAL_RECURSE);				 				 codelet(out, W, m, r, n, ostride);				 break;			    case FFTW_COMPLEX_TO_REAL:				 codelet(in, W, m, r, n, istride);				 				 for (i = 0; i < r; ++i)				      rfftw_executor_simple(m,						    in + i * m * istride,						    out + i * ostride,						    p->nodeu.rgeneric.recurse,						    istride, ostride * r,						    FFTW_NORMAL_RECURSE);				 break;			    default:				 goto bug;			}		   else			switch (p->nodeu.hc2hc.dir) {			    case FFTW_REAL_TO_COMPLEX:				 spawn_h2hc_recurse_threads(m, r, in, out, p,							    istride, ostride,							    nthreads);				 codelet(out, W, m, r, n, ostride);				 break;			    case FFTW_COMPLEX_TO_REAL:				 codelet(in, W, m, r, n, istride);				 spawn_h2hc_recurse_threads(m, r, in, out, p,							    istride, ostride,							    nthreads);				 break;			}		   break;	      }	 default:	    bug:	      fftw_die("BUG in rexecutor: invalid plan\n");	      break;     }}static void rexecutor_simple_inplace_threads(int n, fftw_real *in,					     fftw_real *out,					     fftw_plan_node *p,					     int istride,					     int nthreads){     switch (p->type) {	 case FFTW_REAL2HC:	      HACK_ALIGN_STACK_ODD;	      (p->nodeu.real2hc.codelet) (in, in, in + n * istride,					  istride, istride, -istride);	      break;	 case FFTW_HC2REAL:	      HACK_ALIGN_STACK_ODD;	      (p->nodeu.hc2real.codelet) (in, in + n * istride, in,					  istride, -istride, istride);	      break;	 default:	      {		   fftw_real *tmp;		   if (out)			tmp = out;		   else			tmp = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));		   rexec_simple_threads(n, in, tmp, p, istride, 1, nthreads);		   rfftw_strided_copy(n, tmp, istride, in);		   if (!out)			fftw_free(tmp);	      }     }}typedef struct {     union {          fftw_real2hc_codelet *r2c_codelet;          fftw_hc2real_codelet *c2r_codelet;	  fftw_plan_node *p;     } u;     int n;     fftw_real *in;     fftw_real *out;     int idist, odist, istride, ostride;} rexec_many_data;static void *rexec_many_r2c_codelet_thread(fftw_loop_data *ldata){     int min = ldata->min, max = ldata->max;     rexec_many_data *d = (rexec_many_data *) ldata->data;     fftw_real2hc_codelet *r2c_codelet = d->u.r2c_codelet;     int n = d->n;     fftw_real *in = d->in;     fftw_real *out = d->out;     int idist = d->idist, odist = d->odist;     int istride = d->istride, ostride = d->ostride;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -