📄 rexec_threads.c
字号:
/* * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* * rexec_threads.c -- execute the fft in parallel */#include <stdio.h>#include <stdlib.h>#include "fftw_threads-int.h"#include "rfftw_threads.h"extern void rfftw_strided_copy(int n, fftw_real *in, int ostride, fftw_real *out);static void rexec_simple_threads(int n, fftw_real *in, fftw_real *out, fftw_plan_node *p, int istride, int ostride, int nthreads);typedef struct { int m,r; fftw_real *in; fftw_real *out; fftw_plan_node *p; int istride, ostride; int nthreads;} rexec_simple_data;static void *rexec_simple_thread_r2c(fftw_loop_data *ldata){ int min = ldata->min, max = ldata->max; rexec_simple_data *d = (rexec_simple_data *) ldata->data; int m = d->m, r = d->r; fftw_real *in = d->in; fftw_real *out = d->out; fftw_plan_node *p = d->p; int istride = d->istride, ostride = d->ostride; int nthreads = d->nthreads; for (; min < max; ++min) rexec_simple_threads(m, in + min * istride, out + min * (m * ostride), p, istride * r, ostride, nthreads); return 0;}static void *rexec_simple_thread_c2r(fftw_loop_data *ldata){ int min = ldata->min, max = ldata->max; rexec_simple_data *d = (rexec_simple_data *) ldata->data; int m = d->m, r = d->r; fftw_real *in = d->in; fftw_real *out = d->out; fftw_plan_node *p = d->p; int istride = d->istride, ostride = d->ostride; int nthreads = d->nthreads; for (; min < max; ++min) rexec_simple_threads(m, in + min * (m * istride), out + min * ostride, p, istride, ostride * r, nthreads); return 0;}static void spawn_h2hc_recurse_threads(int m, int r, fftw_real *in, fftw_real *out, fftw_plan_node *p, int istride, int ostride, int nthreads){ rexec_simple_data d; d.m = m; d.r = r; d.in = in; d.out = out; d.p = p->nodeu.hc2hc.recurse; d.istride = istride; d.ostride = ostride; d.nthreads = nthreads / r; switch (p->nodeu.hc2hc.dir) { case FFTW_REAL_TO_COMPLEX: fftw_thread_spawn_loop(r, nthreads, rexec_simple_thread_r2c, &d); break; case FFTW_COMPLEX_TO_REAL: fftw_thread_spawn_loop(r, nthreads, rexec_simple_thread_c2r, &d); break; }}static void rexec_simple_threads(int n, fftw_real *in, fftw_real *out, fftw_plan_node *p, int istride, int ostride, int nthreads){ switch (p->type) { case FFTW_REAL2HC: HACK_ALIGN_STACK_ODD; (p->nodeu.real2hc.codelet) (in, out, out + n * ostride, istride, ostride, -ostride); break; case FFTW_HC2REAL: HACK_ALIGN_STACK_ODD; (p->nodeu.hc2real.codelet) (in, in + n * istride, out, istride, -istride, ostride); break; case FFTW_HC2HC: { int r = p->nodeu.hc2hc.size; int m = n / r; int i; fftw_hc2hc_codelet *codelet; fftw_complex *W; if (nthreads <= 1) { switch (p->nodeu.hc2hc.dir) { case FFTW_REAL_TO_COMPLEX: for (i = 0; i < r; ++i) rfftw_executor_simple(m, in + i * istride, out + i * (m * ostride), p->nodeu.hc2hc.recurse, istride * r, ostride, FFTW_NORMAL_RECURSE); W = p->nodeu.hc2hc.tw->twarray; codelet = p->nodeu.hc2hc.codelet; HACK_ALIGN_STACK_EVEN; codelet(out, W, m * ostride, m, ostride); break; case FFTW_COMPLEX_TO_REAL: W = p->nodeu.hc2hc.tw->twarray; codelet = p->nodeu.hc2hc.codelet; HACK_ALIGN_STACK_EVEN; codelet(in, W, m * istride, m, istride); for (i = 0; i < r; ++i) rfftw_executor_simple(m, in + i * (m * istride), out + i * ostride, p->nodeu.hc2hc.recurse, istride, ostride * r, FFTW_NORMAL_RECURSE); break; default: goto bug; } } else switch (p->nodeu.hc2hc.dir) { case FFTW_REAL_TO_COMPLEX: spawn_h2hc_recurse_threads(m, r, in, out, p, istride, ostride, nthreads); W = p->nodeu.hc2hc.tw->twarray; codelet = p->nodeu.hc2hc.codelet; HACK_ALIGN_STACK_EVEN; codelet(out, W, m * ostride, m, ostride); break; case FFTW_COMPLEX_TO_REAL: W = p->nodeu.hc2hc.tw->twarray; codelet = p->nodeu.hc2hc.codelet; HACK_ALIGN_STACK_EVEN; codelet(in, W, m * istride, m, istride); spawn_h2hc_recurse_threads(m, r, in, out, p, istride, ostride, nthreads); break; } break; } case FFTW_RGENERIC: { int r = p->nodeu.rgeneric.size; int m = n / r; int i; fftw_rgeneric_codelet *codelet = p->nodeu.rgeneric.codelet; fftw_complex *W = p->nodeu.rgeneric.tw->twarray; if (nthreads <= 1) switch (p->nodeu.rgeneric.dir) { case FFTW_REAL_TO_COMPLEX: for (i = 0; i < r; ++i) rfftw_executor_simple(m, in + i * istride, out + i * (m * ostride), p->nodeu.rgeneric.recurse, istride * r, ostride, FFTW_NORMAL_RECURSE); codelet(out, W, m, r, n, ostride); break; case FFTW_COMPLEX_TO_REAL: codelet(in, W, m, r, n, istride); for (i = 0; i < r; ++i) rfftw_executor_simple(m, in + i * m * istride, out + i * ostride, p->nodeu.rgeneric.recurse, istride, ostride * r, FFTW_NORMAL_RECURSE); break; default: goto bug; } else switch (p->nodeu.hc2hc.dir) { case FFTW_REAL_TO_COMPLEX: spawn_h2hc_recurse_threads(m, r, in, out, p, istride, ostride, nthreads); codelet(out, W, m, r, n, ostride); break; case FFTW_COMPLEX_TO_REAL: codelet(in, W, m, r, n, istride); spawn_h2hc_recurse_threads(m, r, in, out, p, istride, ostride, nthreads); break; } break; } default: bug: fftw_die("BUG in rexecutor: invalid plan\n"); break; }}static void rexecutor_simple_inplace_threads(int n, fftw_real *in, fftw_real *out, fftw_plan_node *p, int istride, int nthreads){ switch (p->type) { case FFTW_REAL2HC: HACK_ALIGN_STACK_ODD; (p->nodeu.real2hc.codelet) (in, in, in + n * istride, istride, istride, -istride); break; case FFTW_HC2REAL: HACK_ALIGN_STACK_ODD; (p->nodeu.hc2real.codelet) (in, in + n * istride, in, istride, -istride, istride); break; default: { fftw_real *tmp; if (out) tmp = out; else tmp = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); rexec_simple_threads(n, in, tmp, p, istride, 1, nthreads); rfftw_strided_copy(n, tmp, istride, in); if (!out) fftw_free(tmp); } }}typedef struct { union { fftw_real2hc_codelet *r2c_codelet; fftw_hc2real_codelet *c2r_codelet; fftw_plan_node *p; } u; int n; fftw_real *in; fftw_real *out; int idist, odist, istride, ostride;} rexec_many_data;static void *rexec_many_r2c_codelet_thread(fftw_loop_data *ldata){ int min = ldata->min, max = ldata->max; rexec_many_data *d = (rexec_many_data *) ldata->data; fftw_real2hc_codelet *r2c_codelet = d->u.r2c_codelet; int n = d->n; fftw_real *in = d->in; fftw_real *out = d->out; int idist = d->idist, odist = d->odist; int istride = d->istride, ostride = d->ostride;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -