📄 fftsg3d.c
字号:
double *t, int *ip, double *w){ void *cdft3db_th(void *p); fft3d_thread_t th[FFT3D_MAX_THREADS]; fft3d_arg_t ag[FFT3D_MAX_THREADS]; int nthread, nt, i; nthread = FFT3D_MAX_THREADS; if (nthread > n2) { nthread = n2; } nt = 8 * n1; if (n3 == 4) { nt >>= 1; } else if (n3 < 4) { nt >>= 2; } for (i = 0; i < nthread; i++) { ag[i].nthread = nthread; ag[i].n0 = i; ag[i].n1 = n1; ag[i].n2 = n2; ag[i].n3 = n3; ag[i].isgn = isgn; ag[i].a = a; ag[i].t = &t[nt * i]; ag[i].ip = ip; ag[i].w = w; fft3d_thread_create(&th[i], cdft3db_th, &ag[i]); } for (i = 0; i < nthread; i++) { fft3d_thread_wait(th[i]); }}void ddxt3da_subth(int n1, int n2, int n3, int ics, int isgn, double ***a, double *t, int *ip, double *w){ void *ddxt3da_th(void *p); fft3d_thread_t th[FFT3D_MAX_THREADS]; fft3d_arg_t ag[FFT3D_MAX_THREADS]; int nthread, nt, i; nthread = FFT3D_MAX_THREADS; if (nthread > n1) { nthread = n1; } nt = 4 * n2; if (n3 == 2) { nt >>= 1; } for (i = 0; i < nthread; i++) { ag[i].nthread = nthread; ag[i].n0 = i; ag[i].n1 = n1; ag[i].n2 = n2; ag[i].n3 = n3; ag[i].ic = ics; ag[i].isgn = isgn; ag[i].a = a; ag[i].t = &t[nt * i]; ag[i].ip = ip; ag[i].w = w; fft3d_thread_create(&th[i], ddxt3da_th, &ag[i]); } for (i = 0; i < nthread; i++) { fft3d_thread_wait(th[i]); }}void ddxt3db_subth(int n1, int n2, int n3, int ics, int isgn, double ***a, double *t, int *ip, double *w){ void *ddxt3db_th(void *p); fft3d_thread_t th[FFT3D_MAX_THREADS]; fft3d_arg_t ag[FFT3D_MAX_THREADS]; int nthread, nt, i; nthread = FFT3D_MAX_THREADS; if (nthread > n2) { nthread = n2; } nt = 4 * n1; if (n3 == 2) { nt >>= 1; } for (i = 0; i < nthread; i++) { ag[i].nthread = nthread; ag[i].n0 = i; ag[i].n1 = n1; ag[i].n2 = n2; ag[i].n3 = n3; ag[i].ic = ics; ag[i].isgn = isgn; ag[i].a = a; ag[i].t = &t[nt * i]; ag[i].ip = ip; ag[i].w = w; fft3d_thread_create(&th[i], ddxt3db_th, &ag[i]); } for (i = 0; i < nthread; i++) { fft3d_thread_wait(th[i]); }}void *xdft3da_th(void *p){ void cdft(int n, int isgn, double *a, int *ip, double *w); void rdft(int n, int isgn, double *a, int *ip, double *w); int nthread, n0, n1, n2, n3, icr, isgn, *ip, i, j, k; double ***a, *t, *w; nthread = ((fft3d_arg_t *) p)->nthread; n0 = ((fft3d_arg_t *) p)->n0; n1 = ((fft3d_arg_t *) p)->n1; n2 = ((fft3d_arg_t *) p)->n2; n3 = ((fft3d_arg_t *) p)->n3; icr = ((fft3d_arg_t *) p)->ic; isgn = ((fft3d_arg_t *) p)->isgn; a = ((fft3d_arg_t *) p)->a; t = ((fft3d_arg_t *) p)->t; ip = ((fft3d_arg_t *) p)->ip; w = ((fft3d_arg_t *) p)->w; for (i = n0; i < n1; i += nthread) { if (icr == 0) { for (j = 0; j < n2; j++) { cdft(n3, isgn, a[i][j], ip, w); } } else if (isgn >= 0) { for (j = 0; j < n2; j++) { rdft(n3, isgn, a[i][j], ip, w); } } if (n3 > 4) { for (k = 0; k < n3; k += 8) { for (j = 0; j < n2; j++) { t[2 * j] = a[i][j][k]; t[2 * j + 1] = a[i][j][k + 1]; t[2 * n2 + 2 * j] = a[i][j][k + 2]; t[2 * n2 + 2 * j + 1] = a[i][j][k + 3]; t[4 * n2 + 2 * j] = a[i][j][k + 4]; t[4 * n2 + 2 * j + 1] = a[i][j][k + 5]; t[6 * n2 + 2 * j] = a[i][j][k + 6]; t[6 * n2 + 2 * j + 1] = a[i][j][k + 7]; } cdft(2 * n2, isgn, t, ip, w); cdft(2 * n2, isgn, &t[2 * n2], ip, w); cdft(2 * n2, isgn, &t[4 * n2], ip, w); cdft(2 * n2, isgn, &t[6 * n2], ip, w); for (j = 0; j < n2; j++) { a[i][j][k] = t[2 * j]; a[i][j][k + 1] = t[2 * j + 1]; a[i][j][k + 2] = t[2 * n2 + 2 * j]; a[i][j][k + 3] = t[2 * n2 + 2 * j + 1]; a[i][j][k + 4] = t[4 * n2 + 2 * j]; a[i][j][k + 5] = t[4 * n2 + 2 * j + 1]; a[i][j][k + 6] = t[6 * n2 + 2 * j]; a[i][j][k + 7] = t[6 * n2 + 2 * j + 1]; } } } else if (n3 == 4) { for (j = 0; j < n2; j++) { t[2 * j] = a[i][j][0]; t[2 * j + 1] = a[i][j][1]; t[2 * n2 + 2 * j] = a[i][j][2]; t[2 * n2 + 2 * j + 1] = a[i][j][3]; } cdft(2 * n2, isgn, t, ip, w); cdft(2 * n2, isgn, &t[2 * n2], ip, w); for (j = 0; j < n2; j++) { a[i][j][0] = t[2 * j]; a[i][j][1] = t[2 * j + 1]; a[i][j][2] = t[2 * n2 + 2 * j]; a[i][j][3] = t[2 * n2 + 2 * j + 1]; } } else if (n3 == 2) { for (j = 0; j < n2; j++) { t[2 * j] = a[i][j][0]; t[2 * j + 1] = a[i][j][1]; } cdft(2 * n2, isgn, t, ip, w); for (j = 0; j < n2; j++) { a[i][j][0] = t[2 * j]; a[i][j][1] = t[2 * j + 1]; } } if (icr != 0 && isgn < 0) { for (j = 0; j < n2; j++) { rdft(n3, isgn, a[i][j], ip, w); } } } return (void *) 0;}void *cdft3db_th(void *p){ void cdft(int n, int isgn, double *a, int *ip, double *w); int nthread, n0, n1, n2, n3, isgn, *ip, i, j, k; double ***a, *t, *w; nthread = ((fft3d_arg_t *) p)->nthread; n0 = ((fft3d_arg_t *) p)->n0; n1 = ((fft3d_arg_t *) p)->n1; n2 = ((fft3d_arg_t *) p)->n2; n3 = ((fft3d_arg_t *) p)->n3; isgn = ((fft3d_arg_t *) p)->isgn; a = ((fft3d_arg_t *) p)->a; t = ((fft3d_arg_t *) p)->t; ip = ((fft3d_arg_t *) p)->ip; w = ((fft3d_arg_t *) p)->w; if (n3 > 4) { for (j = n0; j < n2; j += nthread) { for (k = 0; k < n3; k += 8) { for (i = 0; i < n1; i++) { t[2 * i] = a[i][j][k]; t[2 * i + 1] = a[i][j][k + 1]; t[2 * n1 + 2 * i] = a[i][j][k + 2]; t[2 * n1 + 2 * i + 1] = a[i][j][k + 3]; t[4 * n1 + 2 * i] = a[i][j][k + 4]; t[4 * n1 + 2 * i + 1] = a[i][j][k + 5]; t[6 * n1 + 2 * i] = a[i][j][k + 6]; t[6 * n1 + 2 * i + 1] = a[i][j][k + 7]; } cdft(2 * n1, isgn, t, ip, w); cdft(2 * n1, isgn, &t[2 * n1], ip, w); cdft(2 * n1, isgn, &t[4 * n1], ip, w); cdft(2 * n1, isgn, &t[6 * n1], ip, w); for (i = 0; i < n1; i++) { a[i][j][k] = t[2 * i]; a[i][j][k + 1] = t[2 * i + 1]; a[i][j][k + 2] = t[2 * n1 + 2 * i]; a[i][j][k + 3] = t[2 * n1 + 2 * i + 1]; a[i][j][k + 4] = t[4 * n1 + 2 * i]; a[i][j][k + 5] = t[4 * n1 + 2 * i + 1]; a[i][j][k + 6] = t[6 * n1 + 2 * i]; a[i][j][k + 7] = t[6 * n1 + 2 * i + 1]; } } } } else if (n3 == 4) { for (j = n0; j < n2; j += nthread) { for (i = 0; i < n1; i++) { t[2 * i] = a[i][j][0]; t[2 * i + 1] = a[i][j][1]; t[2 * n1 + 2 * i] = a[i][j][2]; t[2 * n1 + 2 * i + 1] = a[i][j][3]; } cdft(2 * n1, isgn, t, ip, w); cdft(2 * n1, isgn, &t[2 * n1], ip, w); for (i = 0; i < n1; i++) { a[i][j][0] = t[2 * i]; a[i][j][1] = t[2 * i + 1]; a[i][j][2] = t[2 * n1 + 2 * i]; a[i][j][3] = t[2 * n1 + 2 * i + 1]; } } } else if (n3 == 2) { for (j = n0; j < n2; j += nthread) { for (i = 0; i < n1; i++) { t[2 * i] = a[i][j][0]; t[2 * i + 1] = a[i][j][1]; } cdft(2 * n1, isgn, t, ip, w); for (i = 0; i < n1; i++) { a[i][j][0] = t[2 * i]; a[i][j][1] = t[2 * i + 1]; } } } return (void *) 0;}void *ddxt3da_th(void *p){ void ddct(int n, int isgn, double *a, int *ip, double *w); void ddst(int n, int isgn, double *a, int *ip, double *w); int nthread, n0, n1, n2, n3, ics, isgn, *ip, i, j, k; double ***a, *t, *w; nthread = ((fft3d_arg_t *) p)->nthread; n0 = ((fft3d_arg_t *) p)->n0; n1 = ((fft3d_arg_t *) p)->n1; n2 = ((fft3d_arg_t *) p)->n2; n3 = ((fft3d_arg_t *) p)->n3; ics = ((fft3d_arg_t *) p)->ic; isgn = ((fft3d_arg_t *) p)->isgn; a = ((fft3d_arg_t *) p)->a; t = ((fft3d_arg_t *) p)->t; ip = ((fft3d_arg_t *) p)->ip; w = ((fft3d_arg_t *) p)->w; for (i = n0; i < n1; i += nthread) { if (ics == 0) { for (j = 0; j < n2; j++) { ddct(n3, isgn, a[i][j], ip, w); } } else { for (j = 0; j < n2; j++) { ddst(n3, isgn, a[i][j], ip, w); } } if (n3 > 2) { for (k = 0; k < n3; k += 4) { for (j = 0; j < n2; j++) { t[j] = a[i][j][k]; t[n2 + j] = a[i][j][k + 1]; t[2 * n2 + j] = a[i][j][k + 2]; t[3 * n2 + j] = a[i][j][k + 3]; } if (ics == 0) { ddct(n2, isgn, t, ip, w); ddct(n2, isgn, &t[n2], ip, w); ddct(n2, isgn, &t[2 * n2], ip, w); ddct(n2, isgn, &t[3 * n2], ip, w); } else { ddst(n2, isgn, t, ip, w); ddst(n2, isgn, &t[n2], ip, w); ddst(n2, isgn, &t[2 * n2], ip, w); ddst(n2, isgn, &t[3 * n2], ip, w); } for (j = 0; j < n2; j++) { a[i][j][k] = t[j]; a[i][j][k + 1] = t[n2 + j]; a[i][j][k + 2] = t[2 * n2 + j]; a[i][j][k + 3] = t[3 * n2 + j]; } } } else if (n3 == 2) { for (j = 0; j < n2; j++) { t[j] = a[i][j][0]; t[n2 + j] = a[i][j][1]; } if (ics == 0) { ddct(n2, isgn, t, ip, w); ddct(n2, isgn, &t[n2], ip, w); } else { ddst(n2, isgn, t, ip, w); ddst(n2, isgn, &t[n2], ip, w); } for (j = 0; j < n2; j++) { a[i][j][0] = t[j]; a[i][j][1] = t[n2 + j]; } } } return (void *) 0;}void *ddxt3db_th(void *p){ void ddct(int n, int isgn, double *a, int *ip, double *w); void ddst(int n, int isgn, double *a, int *ip, double *w); int nthread, n0, n1, n2, n3, ics, isgn, *ip, i, j, k; double ***a, *t, *w; nthread = ((fft3d_arg_t *) p)->nthread; n0 = ((fft3d_arg_t *) p)->n0; n1 = ((fft3d_arg_t *) p)->n1; n2 = ((fft3d_arg_t *) p)->n2; n3 = ((fft3d_arg_t *) p)->n3; ics = ((fft3d_arg_t *) p)->ic; isgn = ((fft3d_arg_t *) p)->isgn; a = ((fft3d_arg_t *) p)->a; t = ((fft3d_arg_t *) p)->t; ip = ((fft3d_arg_t *) p)->ip; w = ((fft3d_arg_t *) p)->w; if (n3 > 2) { for (j = n0; j < n2; j += nthread) { for (k = 0; k < n3; k += 4) { for (i = 0; i < n1; i++) { t[i] = a[i][j][k]; t[n1 + i] = a[i][j][k + 1]; t[2 * n1 + i] = a[i][j][k + 2]; t[3 * n1 + i] = a[i][j][k + 3]; } if (ics == 0) { ddct(n1, isgn, t, ip, w); ddct(n1, isgn, &t[n1], ip, w); ddct(n1, isgn, &t[2 * n1], ip, w); ddct(n1, isgn, &t[3 * n1], ip, w); } else { ddst(n1, isgn, t, ip, w); ddst(n1, isgn, &t[n1], ip, w); ddst(n1, isgn, &t[2 * n1], ip, w); ddst(n1, isgn, &t[3 * n1], ip, w); } for (i = 0; i < n1; i++) { a[i][j][k] = t[i]; a[i][j][k + 1] = t[n1 + i]; a[i][j][k + 2] = t[2 * n1 + i]; a[i][j][k + 3] = t[3 * n1 + i]; } } } } else if (n3 == 2) { for (j = n0; j < n2; j += nthread) { for (i = 0; i < n1; i++) { t[i] = a[i][j][0]; t[n1 + i] = a[i][j][1]; } if (ics == 0) { ddct(n1, isgn, t, ip, w); ddct(n1, isgn, &t[n1], ip, w); } else { ddst(n1, isgn, t, ip, w); ddst(n1, isgn, &t[n1], ip, w); } for (i = 0; i < n1; i++) { a[i][j][0] = t[i]; a[i][j][1] = t[n1 + i]; } } } return (void *) 0;}#endif /* USE_FFT3D_THREADS */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -