gpfa2f.c
来自「InsightToolkit-1.4.0(有大量的优化算法程序)」· C语言 代码 · 共 1,107 行 · 第 1/3 页
C
1,107 行
#include "f2c.h"
#include "netlib.h"
extern double sqrt(double); /* #include <math.h> */
/* Table of constant values */
static integer c__2 = 2;
/* fortran version of *gpfa2* - */
/* radix-2 section of self-sorting, in-place, generalized pfa */
/* central radix-2 and radix-8 passes included */
/* so that transform length can be any power of 2 */
/* */
/* ------------------------------------------------------------------- */
/* Subroutine */ void gpfa2f_(real *a, real *b, const real *trigs, const integer *inc,
const integer *jump, const integer *n, const integer *mm, const integer *lot, const integer *isign)
{
/* Initialized data */
static integer lvr = 1024;
/* System generated locals */
integer i__1;
/* Local variables */
static integer ninc, left, nvex, j, k, l, m;
static real s;
static integer ipass, nblox;
static real c1;
static integer jstep;
static real c2, c3;
static integer m2, n2;
static real t0;
static integer m8;
static real t2, t1, t3, u0, u2, u1, u3;
static integer ja, jb, jc, jd, je, jf, jg, jh, ji, jj, jk, jl, jm, jn, jo, jp, la, nb, mh, kk, ll, mu, nu, laincl;
static real ss;
static integer jstepl;
static real co1, co2, co3;
static integer istart;
static real co4, co5, co6, co7;
static integer jstepx;
static real si1, si2, si3, si4, si5, si6, si7, aja, ajb, ajc, ajd, bja,
bjc, bjb, bjd, aje, ajg, ajf, ajh, bje, bjg, bjf, bjh, aji;
static integer jjj;
static real bjm, ajj;
static integer ink;
static real bjj, ajk, ajl, bji, bjk;
static integer inq;
static real ajo, bjl, bjo, ajm, ajn, ajp, bjn, bjp;
/* *************************************************************** */
/* * * */
/* * N.B. LVR = LENGTH OF VECTOR REGISTERS, SET TO 128 FOR C90. * */
/* * RESET TO 64 FOR OTHER CRAY MACHINES, OR TO ANY LARGE VALUE * */
/* * (GREATER THAN OR EQUAL TO LOT) FOR A SCALAR COMPUTER. * */
/* * * */
/* *************************************************************** */
n2 = pow_ii(&c__2, mm);
inq = *n / n2;
jstepx = (n2 - *n) * *inc;
ninc = *n * *inc;
ink = *inc * inq;
m2 = 0;
m8 = 0;
if (*mm % 2 == 0) {
m = *mm / 2;
} else if (*mm % 4 == 1) {
m = (*mm - 1) / 2;
m2 = 1;
} else if (*mm % 4 == 3) {
m = (*mm - 3) / 2;
m8 = 1;
}
mh = (m + 1) / 2;
nblox = (*lot - 1) / lvr + 1;
left = *lot;
s = (real) (*isign);
istart = 0;
/* loop on blocks of lvr transforms */
for (nb = 0; nb < nblox; ++nb) {
if (left <= lvr) {
nvex = left;
} else if (left < lvr << 1) {
nvex = left / 2;
nvex += nvex % 2;
} else {
nvex = lvr;
}
left -= nvex;
la = 1;
/* loop on type I radix-4 passes */
mu = inq % 4;
if (*isign == -1) {
mu = 4 - mu;
}
ss = 1.f;
if (mu == 3) {
ss = -1.f;
}
if (mh == 0) {
goto L200;
}
for (ipass = 0; ipass < mh; ++ipass) {
jstep = *n * *inc / (la << 2);
jstepl = jstep - ninc;
/* k = 0 loop (no twiddle factors) */
i__1 = jstep << 2;
for (jjj = 0; i__1 < 0 ? jjj >= (*n - 1) * *inc : jjj <= (*n - 1) * *inc; jjj += i__1) {
ja = istart + jjj;
/* "transverse" loop */
for (nu = 0; nu < inq; ++nu) {
jb = ja + jstepl;
if (jb < istart) {
jb += ninc;
}
jc = jb + jstepl;
if (jc < istart) {
jc += ninc;
}
jd = jc + jstepl;
if (jd < istart) {
jd += ninc;
}
j = 0;
/* loop across transforms */
/* dir$ ivdep, shortloop */
for (l = 0; l < nvex; ++l) {
aja = a[ja + j];
ajc = a[jc + j];
t0 = aja + ajc;
t2 = aja - ajc;
ajb = a[jb + j];
ajd = a[jd + j];
t1 = ajb + ajd;
t3 = ss * (ajb - ajd);
bja = b[ja + j];
bjc = b[jc + j];
u0 = bja + bjc;
u2 = bja - bjc;
bjb = b[jb + j];
bjd = b[jd + j];
u1 = bjb + bjd;
u3 = ss * (bjb - bjd);
a[ja + j] = t0 + t1;
a[jc + j] = t0 - t1;
b[ja + j] = u0 + u1;
b[jc + j] = u0 - u1;
a[jb + j] = t2 - u3;
a[jd + j] = t2 + u3;
b[jb + j] = u2 + t3;
b[jd + j] = u2 - t3;
j += *jump;
}
ja += jstepx;
if (ja < istart) {
ja += ninc;
}
}
}
/* finished if n2 = 4 */
if (n2 == 4) {
goto L490;
}
kk = la << 1;
/* loop on nonzero k */
for (k = ink; ink < 0 ? k >= jstep - ink : k <= jstep - ink; k += ink) {
co1 = trigs[kk]; si1 = s * trigs[kk + 1];
co2 = trigs[kk * 2]; si2 = s * trigs[kk * 2 + 1];
co3 = trigs[kk * 3]; si3 = s * trigs[kk * 3 + 1];
/* loop along transform */
i__1 = jstep << 2;
for (jjj = k; i__1 < 0 ? jjj >= (*n - 1) * *inc : jjj <= (*n - 1) * *inc; jjj += i__1) {
ja = istart + jjj;
/* "transverse" loop */
for (nu = 0; nu < inq; ++nu) {
jb = ja + jstepl;
if (jb < istart) {
jb += ninc;
}
jc = jb + jstepl;
if (jc < istart) {
jc += ninc;
}
jd = jc + jstepl;
if (jd < istart) {
jd += ninc;
}
j = 0;
/* loop across transforms */
/* dir$ ivdep,shortloop */
for (l = 0; l < nvex; ++l) {
aja = a[ja + j];
ajc = a[jc + j];
t0 = aja + ajc;
t2 = aja - ajc;
ajb = a[jb + j];
ajd = a[jd + j];
t1 = ajb + ajd;
t3 = ss * (ajb - ajd);
bja = b[ja + j];
bjc = b[jc + j];
u0 = bja + bjc;
u2 = bja - bjc;
bjb = b[jb + j];
bjd = b[jd + j];
u1 = bjb + bjd;
u3 = ss * (bjb - bjd);
a[ja + j] = t0 + t1;
b[ja + j] = u0 + u1;
a[jb + j] = co1 * (t2 - u3) - si1 * (u2 + t3);
b[jb + j] = si1 * (t2 - u3) + co1 * (u2 + t3);
a[jc + j] = co2 * (t0 - t1) - si2 * (u0 - u1);
b[jc + j] = si2 * (t0 - t1) + co2 * (u0 - u1);
a[jd + j] = co3 * (t2 + u3) - si3 * (u2 - t3);
b[jd + j] = si3 * (t2 + u3) + co3 * (u2 - t3);
j += *jump;
}
ja += jstepx;
if (ja < istart) {
ja += ninc;
}
}
}
kk += la << 1;
}
la <<= 2;
}
/* central radix-2 pass */
L200:
if (m2 == 0) {
goto L300;
}
jstep = *n * *inc / (la << 1);
jstepl = jstep - ninc;
/* k=0 loop (no twiddle factors) */
for (jjj = 0; jjj <= (*n - 1) * *inc; jjj += (jstep<<1)) {
ja = istart + jjj;
/* "transverse" loop */
for (nu = 0; nu < inq; ++nu) {
jb = ja + jstepl;
if (jb < istart) {
jb += ninc;
}
j = 0;
/* loop across transforms */
/* dir$ ivdep, shortloop */
for (l = 0; l < nvex; ++l) {
aja = a[ja + j];
ajb = a[jb + j];
t0 = aja - ajb;
a[ja + j] = aja + ajb;
a[jb + j] = t0;
bja = b[ja + j];
bjb = b[jb + j];
u0 = bja - bjb;
b[ja + j] = bja + bjb;
b[jb + j] = u0;
j += *jump;
}
ja += jstepx;
if (ja < istart) {
ja += ninc;
}
}
}
/* finished if n2=2 */
if (n2 == 2) {
goto L490;
}
kk = la << 1;
/* loop on nonzero k */
for (k = ink; ink < 0 ? k >= jstep - ink : k <= jstep - ink; k += ink) {
co1 = trigs[kk];
si1 = s * trigs[kk + 1];
/* loop along transforms */
i__1 = jstep << 1;
for (jjj = k; i__1 < 0 ? jjj >= (*n - 1) * *inc : jjj <= (*n - 1) * *inc; jjj += i__1) {
ja = istart + jjj;
/* "transverse" loop */
for (nu = 0; nu < inq; ++nu) {
jb = ja + jstepl;
if (jb < istart) {
jb += ninc;
}
j = 0;
/* loop across transforms */
if (kk == n2 / 2) {
/* dir$ ivdep, shortloop */
for (l = 0; l < nvex; ++l) {
aja = a[ja + j];
ajb = a[jb + j];
t0 = ss * (aja - ajb);
a[ja + j] = aja + ajb;
bjb = b[jb + j];
bja = b[ja + j];
a[jb + j] = ss * (bjb - bja);
b[ja + j] = bja + bjb;
b[jb + j] = t0;
j += *jump;
}
} else {
/* dir$ ivdep, shortloop */
for (l = 0; l < nvex; ++l) {
aja = a[ja + j];
ajb = a[jb + j];
t0 = aja - ajb;
a[ja + j] = aja + ajb;
bja = b[ja + j];
bjb = b[jb + j];
u0 = bja - bjb;
b[ja + j] = bja + bjb;
a[jb + j] = co1 * t0 - si1 * u0;
b[jb + j] = si1 * t0 + co1 * u0;
j += *jump;
}
}
ja += jstepx;
if (ja < istart) {
ja += ninc;
}
}
}
kk += la << 1;
}
la <<= 1;
goto L400;
/* central radix-8 pass */
L300:
if (m8 == 0) {
goto L400;
}
jstep = *n * *inc / (la << 3);
jstepl = jstep - ninc;
mu = inq % 8;
if (*isign == -1) {
mu = 8 - mu;
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?