gpfa2f.c

来自「InsightToolkit-1.4.0(有大量的优化算法程序)」· C语言 代码 · 共 1,107 行 · 第 1/3 页

C
1,107
字号
#include "f2c.h"
#include "netlib.h"
extern double sqrt(double); /* #include <math.h> */

/* Table of constant values */
static integer c__2 = 2;

/*     fortran version of *gpfa2* -                                    */
/*     radix-2 section of self-sorting, in-place, generalized pfa      */
/*     central radix-2 and radix-8 passes included                     */
/*      so that transform length can be any power of 2                 */
/*                                                                     */
/* ------------------------------------------------------------------- */

/* Subroutine */ void gpfa2f_(real *a, real *b, const real *trigs, const integer *inc,
        const integer *jump, const integer *n, const integer *mm, const integer *lot, const integer *isign)
{
    /* Initialized data */
    static integer lvr = 1024;

    /* System generated locals */
    integer i__1;

    /* Local variables */
    static integer ninc, left, nvex, j, k, l, m;
    static real s;
    static integer ipass, nblox;
    static real c1;
    static integer jstep;
    static real c2, c3;
    static integer m2, n2;
    static real t0;
    static integer m8;
    static real t2, t1, t3, u0, u2, u1, u3;
    static integer ja, jb, jc, jd, je, jf, jg, jh, ji, jj, jk, jl, jm, jn, jo, jp, la, nb, mh, kk, ll, mu, nu, laincl;
    static real ss;
    static integer jstepl;
    static real co1, co2, co3;
    static integer istart;
    static real co4, co5, co6, co7;
    static integer jstepx;
    static real si1, si2, si3, si4, si5, si6, si7, aja, ajb, ajc, ajd, bja,
            bjc, bjb, bjd, aje, ajg, ajf, ajh, bje, bjg, bjf, bjh, aji;
    static integer jjj;
    static real bjm, ajj;
    static integer ink;
    static real bjj, ajk, ajl, bji, bjk;
    static integer inq;
    static real ajo, bjl, bjo, ajm, ajn, ajp, bjn, bjp;

/*     *************************************************************** */
/*     *                                                             * */
/*     *  N.B. LVR = LENGTH OF VECTOR REGISTERS, SET TO 128 FOR C90. * */
/*     *  RESET TO 64 FOR OTHER CRAY MACHINES, OR TO ANY LARGE VALUE * */
/*     *  (GREATER THAN OR EQUAL TO LOT) FOR A SCALAR COMPUTER.      * */
/*     *                                                             * */
/*     *************************************************************** */

    n2 = pow_ii(&c__2, mm);
    inq = *n / n2;
    jstepx = (n2 - *n) * *inc;
    ninc = *n * *inc;
    ink = *inc * inq;

    m2 = 0;
    m8 = 0;
    if (*mm % 2 == 0) {
        m = *mm / 2;
    } else if (*mm % 4 == 1) {
        m = (*mm - 1) / 2;
        m2 = 1;
    } else if (*mm % 4 == 3) {
        m = (*mm - 3) / 2;
        m8 = 1;
    }
    mh = (m + 1) / 2;

    nblox = (*lot - 1) / lvr + 1;
    left = *lot;
    s = (real) (*isign);
    istart = 0;

/*  loop on blocks of lvr transforms */
    for (nb = 0; nb < nblox; ++nb) {

        if (left <= lvr) {
            nvex = left;
        } else if (left < lvr << 1) {
            nvex = left / 2;
            nvex += nvex % 2;
        } else {
            nvex = lvr;
        }
        left -= nvex;

        la = 1;

/*  loop on type I radix-4 passes */
        mu = inq % 4;
        if (*isign == -1) {
            mu = 4 - mu;
        }
        ss = 1.f;
        if (mu == 3) {
            ss = -1.f;
        }

        if (mh == 0) {
            goto L200;
        }

        for (ipass = 0; ipass < mh; ++ipass) {
            jstep = *n * *inc / (la << 2);
            jstepl = jstep - ninc;

/*  k = 0 loop (no twiddle factors) */
            i__1 = jstep << 2;
            for (jjj = 0; i__1 < 0 ? jjj >= (*n - 1) * *inc : jjj <= (*n - 1) * *inc; jjj += i__1) {
                ja = istart + jjj;

/*     "transverse" loop */
                for (nu = 0; nu < inq; ++nu) {
                    jb = ja + jstepl;
                    if (jb < istart) {
                        jb += ninc;
                    }
                    jc = jb + jstepl;
                    if (jc < istart) {
                        jc += ninc;
                    }
                    jd = jc + jstepl;
                    if (jd < istart) {
                        jd += ninc;
                    }
                    j = 0;

/*  loop across transforms */
/* dir$ ivdep, shortloop */
                    for (l = 0; l < nvex; ++l) {
                        aja = a[ja + j];
                        ajc = a[jc + j];
                        t0 = aja + ajc;
                        t2 = aja - ajc;
                        ajb = a[jb + j];
                        ajd = a[jd + j];
                        t1 = ajb + ajd;
                        t3 = ss * (ajb - ajd);
                        bja = b[ja + j];
                        bjc = b[jc + j];
                        u0 = bja + bjc;
                        u2 = bja - bjc;
                        bjb = b[jb + j];
                        bjd = b[jd + j];
                        u1 = bjb + bjd;
                        u3 = ss * (bjb - bjd);
                        a[ja + j] = t0 + t1;
                        a[jc + j] = t0 - t1;
                        b[ja + j] = u0 + u1;
                        b[jc + j] = u0 - u1;
                        a[jb + j] = t2 - u3;
                        a[jd + j] = t2 + u3;
                        b[jb + j] = u2 + t3;
                        b[jd + j] = u2 - t3;
                        j += *jump;
                    }
                    ja += jstepx;
                    if (ja < istart) {
                        ja += ninc;
                    }
                }
            }

/*  finished if n2 = 4 */
            if (n2 == 4) {
                goto L490;
            }
            kk = la << 1;

/*  loop on nonzero k */
            for (k = ink; ink < 0 ? k >= jstep - ink : k <= jstep - ink; k += ink) {
                co1 = trigs[kk];     si1 = s * trigs[kk + 1];
                co2 = trigs[kk * 2]; si2 = s * trigs[kk * 2 + 1];
                co3 = trigs[kk * 3]; si3 = s * trigs[kk * 3 + 1];

/*  loop along transform */
                i__1 = jstep << 2;
                for (jjj = k; i__1 < 0 ? jjj >= (*n - 1) * *inc : jjj <= (*n - 1) * *inc; jjj += i__1) {
                    ja = istart + jjj;

/*     "transverse" loop */
                    for (nu = 0; nu < inq; ++nu) {
                        jb = ja + jstepl;
                        if (jb < istart) {
                            jb += ninc;
                        }
                        jc = jb + jstepl;
                        if (jc < istart) {
                            jc += ninc;
                        }
                        jd = jc + jstepl;
                        if (jd < istart) {
                            jd += ninc;
                        }
                        j = 0;

/*  loop across transforms */
/* dir$ ivdep,shortloop */
                        for (l = 0; l < nvex; ++l) {
                            aja = a[ja + j];
                            ajc = a[jc + j];
                            t0 = aja + ajc;
                            t2 = aja - ajc;
                            ajb = a[jb + j];
                            ajd = a[jd + j];
                            t1 = ajb + ajd;
                            t3 = ss * (ajb - ajd);
                            bja = b[ja + j];
                            bjc = b[jc + j];
                            u0 = bja + bjc;
                            u2 = bja - bjc;
                            bjb = b[jb + j];
                            bjd = b[jd + j];
                            u1 = bjb + bjd;
                            u3 = ss * (bjb - bjd);
                            a[ja + j] = t0 + t1;
                            b[ja + j] = u0 + u1;
                            a[jb + j] = co1 * (t2 - u3) - si1 * (u2 + t3);
                            b[jb + j] = si1 * (t2 - u3) + co1 * (u2 + t3);
                            a[jc + j] = co2 * (t0 - t1) - si2 * (u0 - u1);
                            b[jc + j] = si2 * (t0 - t1) + co2 * (u0 - u1);
                            a[jd + j] = co3 * (t2 + u3) - si3 * (u2 - t3);
                            b[jd + j] = si3 * (t2 + u3) + co3 * (u2 - t3);
                            j += *jump;
                        }
                        ja += jstepx;
                        if (ja < istart) {
                            ja += ninc;
                        }
                    }
                }
                kk += la << 1;
            }
            la <<= 2;
        }

/*  central radix-2 pass */
L200:
        if (m2 == 0) {
            goto L300;
        }

        jstep = *n * *inc / (la << 1);
        jstepl = jstep - ninc;

/*  k=0 loop (no twiddle factors) */
        for (jjj = 0; jjj <= (*n - 1) * *inc; jjj += (jstep<<1)) {
            ja = istart + jjj;

/*     "transverse" loop */
            for (nu = 0; nu < inq; ++nu) {
                jb = ja + jstepl;
                if (jb < istart) {
                    jb += ninc;
                }
                j = 0;

/*  loop across transforms */
/* dir$ ivdep, shortloop */
                for (l = 0; l < nvex; ++l) {
                    aja = a[ja + j];
                    ajb = a[jb + j];
                    t0 = aja - ajb;
                    a[ja + j] = aja + ajb;
                    a[jb + j] = t0;
                    bja = b[ja + j];
                    bjb = b[jb + j];
                    u0 = bja - bjb;
                    b[ja + j] = bja + bjb;
                    b[jb + j] = u0;
                    j += *jump;
                }
                ja += jstepx;
                if (ja < istart) {
                    ja += ninc;
                }
            }
        }

/*  finished if n2=2 */
        if (n2 == 2) {
            goto L490;
        }

        kk = la << 1;

/*  loop on nonzero k */
        for (k = ink; ink < 0 ? k >= jstep - ink : k <= jstep - ink; k += ink) {
            co1 = trigs[kk];
            si1 = s * trigs[kk + 1];

/*  loop along transforms */
            i__1 = jstep << 1;
            for (jjj = k; i__1 < 0 ? jjj >= (*n - 1) * *inc : jjj <= (*n - 1) * *inc; jjj += i__1) {
                ja = istart + jjj;

/*     "transverse" loop */
                for (nu = 0; nu < inq; ++nu) {
                    jb = ja + jstepl;
                    if (jb < istart) {
                        jb += ninc;
                    }
                    j = 0;

/*  loop across transforms */
                    if (kk == n2 / 2) {
/* dir$ ivdep, shortloop */
                        for (l = 0; l < nvex; ++l) {
                            aja = a[ja + j];
                            ajb = a[jb + j];
                            t0 = ss * (aja - ajb);
                            a[ja + j] = aja + ajb;
                            bjb = b[jb + j];
                            bja = b[ja + j];
                            a[jb + j] = ss * (bjb - bja);
                            b[ja + j] = bja + bjb;
                            b[jb + j] = t0;
                            j += *jump;
                        }
                    } else {

/* dir$ ivdep, shortloop */
                        for (l = 0; l < nvex; ++l) {
                            aja = a[ja + j];
                            ajb = a[jb + j];
                            t0 = aja - ajb;
                            a[ja + j] = aja + ajb;
                            bja = b[ja + j];
                            bjb = b[jb + j];
                            u0 = bja - bjb;
                            b[ja + j] = bja + bjb;
                            a[jb + j] = co1 * t0 - si1 * u0;
                            b[jb + j] = si1 * t0 + co1 * u0;
                            j += *jump;
                        }
                    }

                    ja += jstepx;
                    if (ja < istart) {
                        ja += ninc;
                    }
                }
            }
            kk += la << 1;
        }

        la <<= 1;
        goto L400;

/*  central radix-8 pass */
L300:
        if (m8 == 0) {
            goto L400;
        }
        jstep = *n * *inc / (la << 3);
        jstepl = jstep - ninc;
        mu = inq % 8;
        if (*isign == -1) {
            mu = 8 - mu;
        }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?