📄 dlasd3.c
字号:
*info = -10;
} else if (*ldu2 < n) {
*info = -12;
} else if (*ldvt < m) {
*info = -14;
} else if (*ldvt2 < m) {
*info = -16;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_("DLASD3", &i__1);
return 0;
}
/* Quick return if possible */
if (*k == 1) {
d__[1] = abs(z__[1]);
dcopy_(&m, &vt2_ref(1, 1), ldvt2, &vt_ref(1, 1), ldvt);
if (z__[1] > 0.) {
dcopy_(&n, &u2_ref(1, 1), &c__1, &u_ref(1, 1), &c__1);
} else {
i__1 = n;
for (i__ = 1; i__ <= i__1; ++i__) {
u_ref(i__, 1) = -u2_ref(i__, 1);
/* L10: */
}
}
return 0;
}
/* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
be computed with high relative accuracy (barring over/underflow).
This is a problem on machines without a guard digit in
add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
which on any of these machines zeros out the bottommost
bit of DSIGMA(I) if it is 1; this makes the subsequent
subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
occurs. On binary machines with a guard digit (almost all
machines) it does not change DSIGMA(I) at all. On hexadecimal
and decimal machines with a guard digit, it slightly
changes the bottommost bits of DSIGMA(I). It does not account
for hexadecimal or decimal machines without guard digits
(we know of none). We use a subroutine call to compute
2*DLAMBDA(I) to prevent optimizing compilers from eliminating
this code. */
i__1 = *k;
for (i__ = 1; i__ <= i__1; ++i__) {
dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
/* L20: */
}
/* Keep a copy of Z. */
dcopy_(k, &z__[1], &c__1, &q[q_offset], &c__1);
/* Normalize Z. */
latime_1.ops += (doublereal) (*k * 3 + 1);
rho = dnrm2_(k, &z__[1], &c__1);
dlascl_("G", &c__0, &c__0, &rho, &c_b13, k, &c__1, &z__[1], k, info);
rho *= rho;
/* Find the new singular values. */
i__1 = *k;
for (j = 1; j <= i__1; ++j) {
dlasd4_(k, &j, &dsigma[1], &z__[1], &u_ref(1, j), &rho, &d__[j], &
vt_ref(1, j), info);
/* If the zero finder fails, the computation is terminated. */
if (*info != 0) {
return 0;
}
/* L30: */
}
/* Compute updated Z. */
latime_1.ops += (doublereal) (*k << 1);
i__1 = *k;
for (i__ = 1; i__ <= i__1; ++i__) {
z__[i__] = u_ref(i__, *k) * vt_ref(i__, *k);
latime_1.ops += (doublereal) ((i__ - 1) * 6);
i__2 = i__ - 1;
for (j = 1; j <= i__2; ++j) {
z__[i__] *= u_ref(i__, j) * vt_ref(i__, j) / (dsigma[i__] -
dsigma[j]) / (dsigma[i__] + dsigma[j]);
/* L40: */
}
latime_1.ops += (doublereal) ((*k - i__) * 6);
i__2 = *k - 1;
for (j = i__; j <= i__2; ++j) {
z__[i__] *= u_ref(i__, j) * vt_ref(i__, j) / (dsigma[i__] -
dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]);
/* L50: */
}
d__2 = sqrt((d__1 = z__[i__], abs(d__1)));
z__[i__] = d_sign(&d__2, &q_ref(i__, 1));
/* L60: */
}
/* Compute left singular vectors of the modified diagonal matrix,
and store related information for the right singular vectors.
Computing MAX */
i__1 = 0, i__2 = *k - 1 << 2;
latime_1.ops += (doublereal) (*k * ((*k << 1) + 3) + max(i__1,i__2));
i__1 = *k;
for (i__ = 1; i__ <= i__1; ++i__) {
vt_ref(1, i__) = z__[1] / u_ref(1, i__) / vt_ref(1, i__);
u_ref(1, i__) = -1.;
i__2 = *k;
for (j = 2; j <= i__2; ++j) {
vt_ref(j, i__) = z__[j] / u_ref(j, i__) / vt_ref(j, i__);
u_ref(j, i__) = dsigma[j] * vt_ref(j, i__);
/* L70: */
}
temp = dnrm2_(k, &u_ref(1, i__), &c__1);
q_ref(1, i__) = u_ref(1, i__) / temp;
i__2 = *k;
for (j = 2; j <= i__2; ++j) {
jc = idxc[j];
q_ref(j, i__) = u_ref(jc, i__) / temp;
/* L80: */
}
/* L90: */
}
/* Update the left singular vector matrix. */
if (*k == 2) {
latime_1.ops += dopbl3_("DGEMM ", &n, k, k);
dgemm_("N", "N", &n, k, k, &c_b13, &u2[u2_offset], ldu2, &q[q_offset],
ldq, &c_b27, &u[u_offset], ldu);
goto L100;
}
if (ctot[1] > 0) {
latime_1.ops += dopbl3_("DGEMM ", nl, k, &ctot[1]);
dgemm_("N", "N", nl, k, &ctot[1], &c_b13, &u2_ref(1, 2), ldu2, &q_ref(
2, 1), ldq, &c_b27, &u_ref(1, 1), ldu);
if (ctot[3] > 0) {
ktemp = ctot[1] + 2 + ctot[2];
latime_1.ops += dopbl3_("DGEMM ", nl, k, &ctot[3]);
dgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2_ref(1, ktemp), ldu2,
&q_ref(ktemp, 1), ldq, &c_b13, &u_ref(1, 1), ldu);
}
} else if (ctot[3] > 0) {
ktemp = ctot[1] + 2 + ctot[2];
latime_1.ops += dopbl3_("DGEMM ", nl, k, &ctot[3]);
dgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2_ref(1, ktemp), ldu2, &
q_ref(ktemp, 1), ldq, &c_b27, &u_ref(1, 1), ldu);
} else {
dlacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu);
}
dcopy_(k, &q_ref(1, 1), ldq, &u_ref(nlp1, 1), ldu);
ktemp = ctot[1] + 2;
ctemp = ctot[2] + ctot[3];
latime_1.ops += dopbl3_("DGEMM ", nr, k, &ctemp);
dgemm_("N", "N", nr, k, &ctemp, &c_b13, &u2_ref(nlp2, ktemp), ldu2, &
q_ref(ktemp, 1), ldq, &c_b27, &u_ref(nlp2, 1), ldu);
/* Generate the right singular vectors. */
L100:
/* Computing MAX */
i__1 = 0, i__2 = *k - 1;
latime_1.ops += (doublereal) (*k * ((*k << 1) + 1) + max(i__1,i__2));
i__1 = *k;
for (i__ = 1; i__ <= i__1; ++i__) {
temp = dnrm2_(k, &vt_ref(1, i__), &c__1);
q_ref(i__, 1) = vt_ref(1, i__) / temp;
i__2 = *k;
for (j = 2; j <= i__2; ++j) {
jc = idxc[j];
q_ref(i__, j) = vt_ref(jc, i__) / temp;
/* L110: */
}
/* L120: */
}
/* Update the right singular vector matrix. */
if (*k == 2) {
latime_1.ops += dopbl3_("DGEMM ", k, &m, k);
dgemm_("N", "N", k, &m, k, &c_b13, &q[q_offset], ldq, &vt2[vt2_offset]
, ldvt2, &c_b27, &vt[vt_offset], ldvt);
return 0;
}
ktemp = ctot[1] + 1;
latime_1.ops += dopbl3_("DGEMM ", k, &nlp1, &ktemp);
dgemm_("N", "N", k, &nlp1, &ktemp, &c_b13, &q_ref(1, 1), ldq, &vt2_ref(1,
1), ldvt2, &c_b27, &vt_ref(1, 1), ldvt);
ktemp = ctot[1] + 2 + ctot[2];
latime_1.ops += dopbl3_("DGEMM ", k, &nlp1, &ctot[3]);
if (ktemp <= *ldvt2) {
dgemm_("N", "N", k, &nlp1, &ctot[3], &c_b13, &q_ref(1, ktemp), ldq, &
vt2_ref(ktemp, 1), ldvt2, &c_b13, &vt_ref(1, 1), ldvt);
}
ktemp = ctot[1] + 1;
nrp1 = *nr + *sqre;
if (ktemp > 1) {
i__1 = *k;
for (i__ = 1; i__ <= i__1; ++i__) {
q_ref(i__, ktemp) = q_ref(i__, 1);
/* L130: */
}
i__1 = m;
for (i__ = nlp2; i__ <= i__1; ++i__) {
vt2_ref(ktemp, i__) = vt2_ref(1, i__);
/* L140: */
}
}
ctemp = ctot[2] + 1 + ctot[3];
latime_1.ops += dopbl3_("DGEMM ", k, &nrp1, &ctemp);
dgemm_("N", "N", k, &nrp1, &ctemp, &c_b13, &q_ref(1, ktemp), ldq, &
vt2_ref(ktemp, nlp2), ldvt2, &c_b27, &vt_ref(1, nlp2), ldvt);
return 0;
/* End of DLASD3 */
} /* dlasd3_ */
#undef vt2_ref
#undef vt_ref
#undef u2_ref
#undef u_ref
#undef q_ref
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -