📄 slasd3.c

📁 著名的LAPACK矩阵计算软件包, 是比较新的版本, 一般用到矩阵分解的朋友也许会用到
💻 C
字号:
#include "blaswrap.h"
/*  -- translated by f2c (version 19990503).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

/* Common Block Declarations */

struct {
    real ops, itcnt;
} latime_;

#define latime_1 latime_

/* Table of constant values */

static integer c__1 = 1;
static integer c__0 = 0;
static real c_b13 = 1.f;
static real c_b27 = 0.f;

/* Subroutine */ int slasd3_(integer *nl, integer *nr, integer *sqre, integer 
	*k, real *d__, real *q, integer *ldq, real *dsigma, real *u, integer *
	ldu, real *u2, integer *ldu2, real *vt, integer *ldvt, real *vt2, 
	integer *ldvt2, integer *idxc, integer *ctot, real *z__, integer *
	info)
{
    /* System generated locals */
    integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, 
	    vt_offset, vt2_dim1, vt2_offset, i__1, i__2;
    real r__1, r__2;

    /* Builtin functions */
    double sqrt(doublereal), r_sign(real *, real *);

    /* Local variables */
    static real temp;
    extern doublereal snrm2_(integer *, real *, integer *);
    static integer i__, j, m, n, ctemp;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    static integer ktemp;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    extern doublereal slamc3_(real *, real *);
    extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *, 
	    real *, real *, real *, real *, integer *);
    extern doublereal sopbl3_(char *, integer *, integer *, integer *)
	    ;
    static integer jc;
    extern /* Subroutine */ int xerbla_(char *, integer *), slascl_(
	    char *, integer *, integer *, real *, real *, integer *, integer *
	    , real *, integer *, integer *), slacpy_(char *, integer *
	    , integer *, real *, integer *, real *, integer *);
    static real rho;
    static integer nlp1, nlp2, nrp1;


#define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
#define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1]
#define u2_ref(a_1,a_2) u2[(a_2)*u2_dim1 + a_1]
#define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1]
#define vt2_ref(a_1,a_2) vt2[(a_2)*vt2_dim1 + a_1]


/*  -- LAPACK auxiliary routine (instrumented to count ops, version 3.0) --   
       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,   
       Courant Institute, NAG Ltd., and Rice University   
       October 31, 1999   


    Purpose   
    =======   

    SLASD3 finds all the square roots of the roots of the secular   
    equation, as defined by the values in D and Z.  It makes the   
    appropriate calls to SLASD4 and then updates the singular   
    vectors by matrix multiplication.   

    This code makes very mild assumptions about floating point   
    arithmetic. It will work on machines with a guard digit in   
    add/subtract, or on those binary machines without guard digits   
    which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.   
    It could conceivably fail on hexadecimal or decimal machines   
    without guard digits, but we know of none.   

    SLASD3 is called from SLASD1.   

    Arguments   
    =========   

    NL     (input) INTEGER   
           The row dimension of the upper block.  NL >= 1.   

    NR     (input) INTEGER   
           The row dimension of the lower block.  NR >= 1.   

    SQRE   (input) INTEGER   
           = 0: the lower block is an NR-by-NR square matrix.   
           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.   

           The bidiagonal matrix has N = NL + NR + 1 rows and   
           M = N + SQRE >= N columns.   

    K      (input) INTEGER   
           The size of the secular equation, 1 =< K = < N.   

    D      (output) REAL array, dimension(K)   
           On exit the square roots of the roots of the secular equation,   
           in ascending order.   

    Q      (workspace) REAL array,   
                       dimension at least (LDQ,K).   

    LDQ    (input) INTEGER   
           The leading dimension of the array Q.  LDQ >= K.   

    DSIGMA (input) REAL array, dimension(K)   
           The first K elements of this array contain the old roots   
           of the deflated updating problem.  These are the poles   
           of the secular equation.   

    U      (input) REAL array, dimension (LDU, N)   
           The last N - K columns of this matrix contain the deflated   
           left singular vectors.   

    LDU    (input) INTEGER   
           The leading dimension of the array U.  LDU >= N.   

    U2     (input) REAL array, dimension (LDU2, N)   
           The first K columns of this matrix contain the non-deflated   
           left singular vectors for the split problem.   

    LDU2   (input) INTEGER   
           The leading dimension of the array U2.  LDU2 >= N.   

    VT     (input) REAL array, dimension (LDVT, M)   
           The last M - K columns of VT' contain the deflated   
           right singular vectors.   

    LDVT   (input) INTEGER   
           The leading dimension of the array VT.  LDVT >= N.   

    VT2    (input) REAL array, dimension (LDVT2, N)   
           The first K columns of VT2' contain the non-deflated   
           right singular vectors for the split problem.   

    LDVT2  (input) INTEGER   
           The leading dimension of the array VT2.  LDVT2 >= N.   

    IDXC   (input) INTEGER array, dimension ( N )   
           The permutation used to arrange the columns of U (and rows of   
           VT) into three groups:  the first group contains non-zero   
           entries only at and above (or before) NL +1; the second   
           contains non-zero entries only at and below (or after) NL+2;   
           and the third is dense. The first column of U and the row of   
           VT are treated separately, however.   

           The rows of the singular vectors found by SLASD4   
           must be likewise permuted before the matrix multiplies can   
           take place.   

    CTOT   (input) INTEGER array, dimension ( 4 )   
           A count of the total number of the various types of columns   
           in U (or rows in VT), as described in IDXC. The fourth column   
           type is any column which has been deflated.   

    Z      (input) REAL array, dimension (K)   
           The first K elements of this array contain the components   
           of the deflation-adjusted updating row vector.   

    INFO   (output) INTEGER   
           = 0:  successful exit.   
           < 0:  if INFO = -i, the i-th argument had an illegal value.   
           > 0:  if INFO = 1, an singular value did not converge   

    Further Details   
    ===============   

    Based on contributions by   
       Ming Gu and Huan Ren, Computer Science Division, University of   
       California at Berkeley, USA   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    --d__;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    --dsigma;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    u2_dim1 = *ldu2;
    u2_offset = 1 + u2_dim1 * 1;
    u2 -= u2_offset;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1 * 1;
    vt -= vt_offset;
    vt2_dim1 = *ldvt2;
    vt2_offset = 1 + vt2_dim1 * 1;
    vt2 -= vt2_offset;
    --idxc;
    --ctot;
    --z__;

    /* Function Body */
    *info = 0;

    if (*nl < 1) {
	*info = -1;
    } else if (*nr < 1) {
	*info = -2;
    } else if (*sqre != 1 && *sqre != 0) {
	*info = -3;
    }

    n = *nl + *nr + 1;
    m = n + *sqre;
    nlp1 = *nl + 1;
    nlp2 = *nl + 2;

    if (*k < 1 || *k > n) {
	*info = -4;
    } else if (*ldq < *k) {
	*info = -7;
    } else if (*ldu < n) {
	*info = -10;
    } else if (*ldu2 < n) {
	*info = -12;
    } else if (*ldvt < m) {
	*info = -14;
    } else if (*ldvt2 < m) {
	*info = -16;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLASD3", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*k == 1) {
	d__[1] = dabs(z__[1]);
	scopy_(&m, &vt2_ref(1, 1), ldvt2, &vt_ref(1, 1), ldvt);
	if (z__[1] > 0.f) {
	    scopy_(&n, &u2_ref(1, 1), &c__1, &u_ref(1, 1), &c__1);
	} else {
	    i__1 = n;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		u_ref(i__, 1) = -u2_ref(i__, 1);
/* L10: */
	    }
	}
	return 0;
    }

/*     Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can   
       be computed with high relative accuracy (barring over/underflow).   
       This is a problem on machines without a guard digit in   
       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).   
       The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),   
       which on any of these machines zeros out the bottommost   
       bit of DSIGMA(I) if it is 1; this makes the subsequent   
       subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation   
       occurs. On binary machines with a guard digit (almost all   
       machines) it does not change DSIGMA(I) at all. On hexadecimal   
       and decimal machines with a guard digit, it slightly   
       changes the bottommost bits of DSIGMA(I). It does not account   
       for hexadecimal or decimal machines without guard digits   
       (we know of none). We use a subroutine call to compute   
       2*DLAMBDA(I) to prevent optimizing compilers from eliminating   
       this code. */

    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
/* L20: */
    }

/*     Keep a copy of Z. */

    scopy_(k, &z__[1], &c__1, &q[q_offset], &c__1);

/*     Normalize Z. */

    latime_1.ops += (real) (*k * 3 + 1);
    rho = snrm2_(k, &z__[1], &c__1);
    slascl_("G", &c__0, &c__0, &rho, &c_b13, k, &c__1, &z__[1], k, info);
    rho *= rho;

/*     Find the new singular values. */

    i__1 = *k;
    for (j = 1; j <= i__1; ++j) {
	slasd4_(k, &j, &dsigma[1], &z__[1], &u_ref(1, j), &rho, &d__[j], &
		vt_ref(1, j), info);

/*        If the zero finder fails, the computation is terminated. */

	if (*info != 0) {
	    return 0;
	}
/* L30: */
    }

/*     Compute updated Z. */

    latime_1.ops += (real) (*k << 1);
    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	z__[i__] = u_ref(i__, *k) * vt_ref(i__, *k);
	latime_1.ops += (real) ((i__ - 1) * 6);
	i__2 = i__ - 1;
	for (j = 1; j <= i__2; ++j) {
	    z__[i__] *= u_ref(i__, j) * vt_ref(i__, j) / (dsigma[i__] - 
		    dsigma[j]) / (dsigma[i__] + dsigma[j]);
/* L40: */
	}
	latime_1.ops += (real) ((*k - i__) * 6);
	i__2 = *k - 1;
	for (j = i__; j <= i__2; ++j) {
	    z__[i__] *= u_ref(i__, j) * vt_ref(i__, j) / (dsigma[i__] - 
		    dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]);
/* L50: */
	}
	r__2 = sqrt((r__1 = z__[i__], dabs(r__1)));
	z__[i__] = r_sign(&r__2, &q_ref(i__, 1));
/* L60: */
    }

/*     Compute left singular vectors of the modified diagonal matrix,   
       and store related information for the right singular vectors.   

   Computing MAX */
    i__1 = 0, i__2 = *k - 1 << 2;
    latime_1.ops += (real) (*k * ((*k << 1) + 3) + max(i__1,i__2));
    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	vt_ref(1, i__) = z__[1] / u_ref(1, i__) / vt_ref(1, i__);
	u_ref(1, i__) = -1.f;
	i__2 = *k;
	for (j = 2; j <= i__2; ++j) {
	    vt_ref(j, i__) = z__[j] / u_ref(j, i__) / vt_ref(j, i__);
	    u_ref(j, i__) = dsigma[j] * vt_ref(j, i__);
/* L70: */
	}
	temp = snrm2_(k, &u_ref(1, i__), &c__1);
	q_ref(1, i__) = u_ref(1, i__) / temp;
	i__2 = *k;
	for (j = 2; j <= i__2; ++j) {
	    jc = idxc[j];
	    q_ref(j, i__) = u_ref(jc, i__) / temp;
/* L80: */
	}
/* L90: */
    }

/*     Update the left singular vector matrix. */

    if (*k == 2) {
	latime_1.ops += sopbl3_("SGEMM ", &n, k, k);
	sgemm_("N", "N", &n, k, k, &c_b13, &u2[u2_offset], ldu2, &q[q_offset],
		 ldq, &c_b27, &u[u_offset], ldu);
	goto L100;
    }
    if (ctot[1] > 0) {
	latime_1.ops += sopbl3_("SGEMM ", nl, k, &ctot[1]);
	sgemm_("N", "N", nl, k, &ctot[1], &c_b13, &u2_ref(1, 2), ldu2, &q_ref(
		2, 1), ldq, &c_b27, &u_ref(1, 1), ldu);
	if (ctot[3] > 0) {
	    ktemp = ctot[1] + 2 + ctot[2];
	    latime_1.ops += sopbl3_("SGEMM ", nl, k, &ctot[3]);
	    sgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2_ref(1, ktemp), ldu2,
		     &q_ref(ktemp, 1), ldq, &c_b13, &u_ref(1, 1), ldu);
	}
    } else if (ctot[3] > 0) {
	ktemp = ctot[1] + 2 + ctot[2];
	latime_1.ops += sopbl3_("SGEMM ", nl, k, &ctot[3]);
	sgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2_ref(1, ktemp), ldu2, &
		q_ref(ktemp, 1), ldq, &c_b27, &u_ref(1, 1), ldu);
    } else {
	slacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu);
    }
    scopy_(k, &q_ref(1, 1), ldq, &u_ref(nlp1, 1), ldu);
    ktemp = ctot[1] + 2;
    ctemp = ctot[2] + ctot[3];
    latime_1.ops += sopbl3_("SGEMM ", nr, k, &ctemp);
    sgemm_("N", "N", nr, k, &ctemp, &c_b13, &u2_ref(nlp2, ktemp), ldu2, &
	    q_ref(ktemp, 1), ldq, &c_b27, &u_ref(nlp2, 1), ldu);

/*     Generate the right singular vectors. */

L100:
/* Computing MAX */
    i__1 = 0, i__2 = *k - 1;
    latime_1.ops += (real) (*k * ((*k << 1) + 1) + max(i__1,i__2));
    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	temp = snrm2_(k, &vt_ref(1, i__), &c__1);
	q_ref(i__, 1) = vt_ref(1, i__) / temp;
	i__2 = *k;
	for (j = 2; j <= i__2; ++j) {
	    jc = idxc[j];
	    q_ref(i__, j) = vt_ref(jc, i__) / temp;
/* L110: */
	}
/* L120: */
    }

/*     Update the right singular vector matrix. */

    if (*k == 2) {
	latime_1.ops += sopbl3_("SGEMM ", k, &m, k);
	sgemm_("N", "N", k, &m, k, &c_b13, &q[q_offset], ldq, &vt2[vt2_offset]
		, ldvt2, &c_b27, &vt[vt_offset], ldvt);
	return 0;
    }
    ktemp = ctot[1] + 1;
    latime_1.ops += sopbl3_("SGEMM ", k, &nlp1, &ktemp);
    sgemm_("N", "N", k, &nlp1, &ktemp, &c_b13, &q_ref(1, 1), ldq, &vt2_ref(1, 
	    1), ldvt2, &c_b27, &vt_ref(1, 1), ldvt);
    ktemp = ctot[1] + 2 + ctot[2];
    latime_1.ops += sopbl3_("SGEMM ", k, &nlp1, &ctot[3]);
    if (ktemp <= *ldvt2) {
	sgemm_("N", "N", k, &nlp1, &ctot[3], &c_b13, &q_ref(1, ktemp), ldq, &
		vt2_ref(ktemp, 1), ldvt2, &c_b13, &vt_ref(1, 1), ldvt);
    }

    ktemp = ctot[1] + 1;
    nrp1 = *nr + *sqre;
    if (ktemp > 1) {
	i__1 = *k;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    q_ref(i__, ktemp) = q_ref(i__, 1);
/* L130: */
	}
	i__1 = m;
	for (i__ = nlp2; i__ <= i__1; ++i__) {
	    vt2_ref(ktemp, i__) = vt2_ref(1, i__);
/* L140: */
	}
    }
    ctemp = ctot[2] + 1 + ctot[3];
    latime_1.ops += sopbl3_("SGEMM ", k, &nrp1, &ctemp);
    sgemm_("N", "N", k, &nrp1, &ctemp, &c_b13, &q_ref(1, ktemp), ldq, &
	    vt2_ref(ktemp, nlp2), ldvt2, &c_b27, &vt_ref(1, nlp2), ldvt);

    return 0;

/*     End of SLASD3 */

} /* slasd3_ */

#undef vt2_ref
#undef vt_ref
#undef u2_ref
#undef u_ref
#undef q_ref
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -