📄 rexec_threads.c
字号:
HACK_ALIGN_STACK_ODD; for (; min < max; ++min) r2c_codelet(in + min * idist, out + min * odist, out + n * ostride + min * odist, istride, ostride, -ostride); return 0;}static void *rexec_many_c2r_codelet_thread(fftw_loop_data *ldata){ int min = ldata->min, max = ldata->max; rexec_many_data *d = (rexec_many_data *) ldata->data; fftw_hc2real_codelet *c2r_codelet = d->u.c2r_codelet; int n = d->n; fftw_real *in = d->in; fftw_real *out = d->out; int idist = d->idist, odist = d->odist; int istride = d->istride, ostride = d->ostride; HACK_ALIGN_STACK_ODD; for (; min < max; ++min) c2r_codelet(in + min * idist, in + n * istride + min * idist, out + min * odist, istride, -istride, ostride); return 0;}static void *rexec_many_simple_thread(fftw_loop_data *ldata){ int min = ldata->min, max = ldata->max; rexec_many_data *d = (rexec_many_data *) ldata->data; fftw_plan_node *p = d->u.p; int n = d->n; fftw_real *in = d->in; fftw_real *out = d->out; int idist = d->idist, odist = d->odist; int istride = d->istride, ostride = d->ostride; for (; min < max; ++min) rfftw_executor_simple(n, in + min * idist, out + min * odist, p, istride, ostride, FFTW_NORMAL_RECURSE); return 0;}static void rexecutor_many_threads(int n, fftw_real *in, fftw_real *out, fftw_plan_node *p, int istride, int ostride, int howmany, int idist, int odist, int nthreads){ if (nthreads > howmany) nthreads = howmany; switch (p->type) { case FFTW_REAL2HC: { int s; fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet; if (nthreads <= 1) { HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, out + s * odist, out + n * ostride + s * odist, istride, ostride, -ostride); } else { rexec_many_data d; d.n = n; d.in = in; d.out = out; d.u.r2c_codelet = codelet; d.istride = istride; d.ostride = ostride; d.idist = idist; d.odist = odist; fftw_thread_spawn_loop(howmany, nthreads, rexec_many_r2c_codelet_thread, &d); } break; } case FFTW_HC2REAL: { int s; fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet; if (nthreads <= 1) { HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, in + n * istride + s * idist, out + s * odist, istride, -istride, ostride); } else { rexec_many_data d; d.n = n; d.in = in; d.out = out; d.u.c2r_codelet = codelet; d.istride = istride; d.ostride = ostride; d.idist = idist; d.odist = odist; fftw_thread_spawn_loop(howmany, nthreads, rexec_many_c2r_codelet_thread, &d); } break; } default: { int s; if (nthreads <= 1) for (s = 0; s < howmany; ++s) { rfftw_executor_simple(n, in + s * idist, out + s * odist, p, istride, ostride, FFTW_NORMAL_RECURSE); } else { rexec_many_data d; d.in = in; d.out = out; d.n = n; d.u.p = p; d.istride = istride; d.ostride = ostride; d.idist = idist; d.odist = odist; fftw_thread_spawn_loop(howmany, nthreads, rexec_many_simple_thread, &d); } } }}static void *rexec_many_simple_inplace_thread(fftw_loop_data *ldata){ int min = ldata->min, max = ldata->max; rexec_many_data *d = (rexec_many_data *) ldata->data; fftw_plan_node *p = d->u.p; int n = d->n; fftw_real *in = d->in; fftw_real *out = d->out + n * ldata->thread_num; int idist = d->idist; int istride = d->istride; for (; min < max; ++min) { rfftw_executor_simple(n, in + min * idist, out, p, istride, 1, FFTW_NORMAL_RECURSE); rfftw_strided_copy(n, out, istride, in + min * idist); } return 0;}static void rexecutor_many_inplace_threads(int n, fftw_real *in, fftw_real *out, fftw_plan_node *p, int istride, int howmany, int idist, int nthreads){ switch (p->type) { case FFTW_REAL2HC: { int s; fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet; if (nthreads <= 1) { HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, in + s * idist, in + n * istride + s * idist, istride, istride, -istride); } else { rexec_many_data d; d.n = n; d.in = in; d.out = in; d.u.r2c_codelet = codelet; d.istride = istride; d.ostride = istride; d.idist = idist; d.odist = idist; fftw_thread_spawn_loop(howmany, nthreads, rexec_many_r2c_codelet_thread, &d); } break; } case FFTW_HC2REAL: { int s; fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet; if (nthreads <= 1) { HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, in + n * istride + s * idist, in + s * idist, istride, -istride, istride); } else { rexec_many_data d; d.n = n; d.in = in; d.out = in; d.u.c2r_codelet = codelet; d.istride = istride; d.ostride = istride; d.idist = idist; d.odist = idist; fftw_thread_spawn_loop(howmany, nthreads, rexec_many_c2r_codelet_thread, &d); } break; } default: { int s; fftw_real *tmp; if (nthreads > howmany) nthreads = howmany; if (nthreads <= 1) { if (out) tmp = out; else tmp =(fftw_real *) fftw_malloc(n * sizeof(fftw_real)); for (s = 0; s < howmany; ++s) { rfftw_executor_simple(n, in + s * idist, tmp, p, istride, 1, FFTW_NORMAL_RECURSE); rfftw_strided_copy(n, tmp, istride, in + s * idist); } if (!out) fftw_free(tmp); } else { rexec_many_data d; tmp = (fftw_real *) fftw_malloc(nthreads * n * sizeof(fftw_real)); d.in = in; d.out = tmp; d.n = n; d.u.p = p; d.istride = istride; d.ostride = 1; d.idist = idist; d.odist = 0; fftw_thread_spawn_loop(howmany, nthreads, rexec_many_simple_inplace_thread,&d); fftw_free(tmp); } } }}/* user interface */void rfftw_threads(int nthreads, fftw_plan plan, int howmany, fftw_real *in, int istride, int idist, fftw_real *out, int ostride, int odist){ int n = plan->n; if (plan->flags & FFTW_IN_PLACE) { if (howmany == 1) { rexecutor_simple_inplace_threads(n, in, out, plan->root, istride, nthreads); } else { rexecutor_many_inplace_threads(n, in, out, plan->root, istride, howmany, idist, nthreads); } } else { if (howmany == 1) { rexec_simple_threads(n, in, out, plan->root, istride, ostride, nthreads); } else { rexecutor_many_threads(n, in, out, plan->root, istride, ostride, howmany, idist, odist, nthreads); } }}void rfftw_threads_one(int nthreads, fftw_plan plan, fftw_real *in, fftw_real *out){ int n = plan->n; if (plan->flags & FFTW_IN_PLACE) rexecutor_simple_inplace_threads(n, in, out, plan->root, 1, nthreads); else rexec_simple_threads(n, in, out, plan->root, 1, 1, nthreads);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -