📄 zlalsd.c
字号:
/* L100: */
}
/* Since B is complex, the following call to DGEMM is performed
in two steps (real and imaginary parts). That is for V * B
(in the real version of the code V' is stored in WORK).
CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO,
$ WORK( NWORK ), N ) */
j = irwb - 1;
i__1 = *nrhs;
for (jcol = 1; jcol <= i__1; ++jcol) {
i__2 = *n;
for (jrow = 1; jrow <= i__2; ++jrow) {
++j;
i__3 = b_subscr(jrow, jcol);
rwork[j] = b[i__3].r;
/* L110: */
}
/* L120: */
}
latime_1.ops += dopbl3_("DGEMM ", n, nrhs, n);
dgemm_("T", "N", n, nrhs, n, &c_b10, &rwork[irwvt], n, &rwork[irwb],
n, &c_b35, &rwork[irwrb], n);
j = irwb - 1;
i__1 = *nrhs;
for (jcol = 1; jcol <= i__1; ++jcol) {
i__2 = *n;
for (jrow = 1; jrow <= i__2; ++jrow) {
++j;
rwork[j] = d_imag(&b_ref(jrow, jcol));
/* L130: */
}
/* L140: */
}
latime_1.ops += dopbl3_("DGEMM ", n, nrhs, n);
dgemm_("T", "N", n, nrhs, n, &c_b10, &rwork[irwvt], n, &rwork[irwb],
n, &c_b35, &rwork[irwib], n);
jreal = irwrb - 1;
jimag = irwib - 1;
i__1 = *nrhs;
for (jcol = 1; jcol <= i__1; ++jcol) {
i__2 = *n;
for (jrow = 1; jrow <= i__2; ++jrow) {
++jreal;
++jimag;
i__3 = b_subscr(jrow, jcol);
i__4 = jreal;
i__5 = jimag;
z__1.r = rwork[i__4], z__1.i = rwork[i__5];
b[i__3].r = z__1.r, b[i__3].i = z__1.i;
/* L150: */
}
/* L160: */
}
/* Unscale. */
latime_1.ops += (doublereal) (*n + *n * 6 * *nrhs);
dlascl_("G", &c__0, &c__0, &c_b10, &orgnrm, n, &c__1, &d__[1], n,
info);
dlasrt_("D", n, &d__[1], info);
zlascl_("G", &c__0, &c__0, &orgnrm, &c_b10, n, nrhs, &b[b_offset],
ldb, info);
return 0;
}
/* Book-keeping and setting up some constants. */
nlvl = (integer) (log((doublereal) (*n) / (doublereal) (*smlsiz + 1)) /
log(2.)) + 1;
smlszp = *smlsiz + 1;
u = 1;
vt = *smlsiz * *n + 1;
difl = vt + smlszp * *n;
difr = difl + nlvl * *n;
z__ = difr + (nlvl * *n << 1);
c__ = z__ + nlvl * *n;
s = c__ + *n;
poles = s + *n;
givnum = poles + (nlvl << 1) * *n;
nrwork = givnum + (nlvl << 1) * *n;
bx = 1;
irwrb = nrwork;
irwib = irwrb + *smlsiz * *nrhs;
irwb = irwib + *smlsiz * *nrhs;
sizei = *n + 1;
k = sizei + *n;
givptr = k + *n;
perm = givptr + *n;
givcol = perm + nlvl * *n;
iwk = givcol + (nlvl * *n << 1);
st = 1;
sqre = 0;
icmpq1 = 1;
icmpq2 = 0;
nsub = 0;
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
if ((d__1 = d__[i__], abs(d__1)) < eps) {
d__[i__] = d_sign(&eps, &d__[i__]);
}
/* L170: */
}
i__1 = nm1;
for (i__ = 1; i__ <= i__1; ++i__) {
if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) {
++nsub;
iwork[nsub] = st;
/* Subproblem found. First determine its size and then
apply divide and conquer on it. */
if (i__ < nm1) {
/* A subproblem with E(I) small for I < NM1. */
nsize = i__ - st + 1;
iwork[sizei + nsub - 1] = nsize;
} else if ((d__1 = e[i__], abs(d__1)) >= eps) {
/* A subproblem with E(NM1) not too small but I = NM1. */
nsize = *n - st + 1;
iwork[sizei + nsub - 1] = nsize;
} else {
/* A subproblem with E(NM1) small. This implies an
1-by-1 subproblem at D(N), which is not solved
explicitly. */
nsize = i__ - st + 1;
iwork[sizei + nsub - 1] = nsize;
++nsub;
iwork[nsub] = *n;
iwork[sizei + nsub - 1] = 1;
zcopy_(nrhs, &b_ref(*n, 1), ldb, &work[bx + nm1], n);
}
st1 = st - 1;
if (nsize == 1) {
/* This is a 1-by-1 subproblem and is not solved
explicitly. */
zcopy_(nrhs, &b_ref(st, 1), ldb, &work[bx + st1], n);
} else if (nsize <= *smlsiz) {
/* This is a small subproblem and is solved by DLASDQ. */
dlaset_("A", &nsize, &nsize, &c_b35, &c_b10, &rwork[vt + st1],
n);
dlaset_("A", &nsize, &nsize, &c_b35, &c_b10, &rwork[u + st1],
n);
dlasdq_("U", &c__0, &nsize, &nsize, &nsize, &c__0, &d__[st], &
e[st], &rwork[vt + st1], n, &rwork[u + st1], n, &
rwork[nrwork], &c__1, &rwork[nrwork], info)
;
if (*info != 0) {
return 0;
}
/* In the real version, B is passed to DLASDQ and multiplied
internally by Q'. Here B is complex and that product is
computed below in two steps (real and imaginary parts). */
j = irwb - 1;
i__2 = *nrhs;
for (jcol = 1; jcol <= i__2; ++jcol) {
i__3 = st + nsize - 1;
for (jrow = st; jrow <= i__3; ++jrow) {
++j;
i__4 = b_subscr(jrow, jcol);
rwork[j] = b[i__4].r;
/* L180: */
}
/* L190: */
}
latime_1.ops += dopbl3_("DGEMM ", &nsize, nrhs, &nsize);
dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b10, &rwork[u + st1]
, n, &rwork[irwb], &nsize, &c_b35, &rwork[irwrb], &
nsize);
j = irwb - 1;
i__2 = *nrhs;
for (jcol = 1; jcol <= i__2; ++jcol) {
i__3 = st + nsize - 1;
for (jrow = st; jrow <= i__3; ++jrow) {
++j;
rwork[j] = d_imag(&b_ref(jrow, jcol));
/* L200: */
}
/* L210: */
}
latime_1.ops += dopbl3_("DGEMM ", &nsize, nrhs, &nsize);
dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b10, &rwork[u + st1]
, n, &rwork[irwb], &nsize, &c_b35, &rwork[irwib], &
nsize);
jreal = irwrb - 1;
jimag = irwib - 1;
i__2 = *nrhs;
for (jcol = 1; jcol <= i__2; ++jcol) {
i__3 = st + nsize - 1;
for (jrow = st; jrow <= i__3; ++jrow) {
++jreal;
++jimag;
i__4 = b_subscr(jrow, jcol);
i__5 = jreal;
i__6 = jimag;
z__1.r = rwork[i__5], z__1.i = rwork[i__6];
b[i__4].r = z__1.r, b[i__4].i = z__1.i;
/* L220: */
}
/* L230: */
}
zlacpy_("A", &nsize, nrhs, &b_ref(st, 1), ldb, &work[bx + st1]
, n);
} else {
/* A large problem. Solve it using divide and conquer. */
dlasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], &
rwork[u + st1], n, &rwork[vt + st1], &iwork[k + st1],
&rwork[difl + st1], &rwork[difr + st1], &rwork[z__ +
st1], &rwork[poles + st1], &iwork[givptr + st1], &
iwork[givcol + st1], n, &iwork[perm + st1], &rwork[
givnum + st1], &rwork[c__ + st1], &rwork[s + st1], &
rwork[nrwork], &iwork[iwk], info);
if (*info != 0) {
return 0;
}
bxst = bx + st1;
zlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b_ref(st, 1), ldb, &
work[bxst], n, &rwork[u + st1], n, &rwork[vt + st1], &
iwork[k + st1], &rwork[difl + st1], &rwork[difr + st1]
, &rwork[z__ + st1], &rwork[poles + st1], &iwork[
givptr + st1], &iwork[givcol + st1], n, &iwork[perm +
st1], &rwork[givnum + st1], &rwork[c__ + st1], &rwork[
s + st1], &rwork[nrwork], &iwork[iwk], info);
if (*info != 0) {
return 0;
}
}
st = i__ + 1;
}
/* L240: */
}
/* Apply the singular values and treat the tiny ones as zero. */
latime_1.ops += 1.;
tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
/* Some of the elements in D can be negative because 1-by-1
subproblems were not solved explicitly. */
if ((d__1 = d__[i__], abs(d__1)) <= tol) {
zlaset_("A", &c__1, nrhs, &c_b1, &c_b1, &work[bx + i__ - 1], n);
} else {
++(*rank);
latime_1.ops += (doublereal) (*nrhs * 6);
zlascl_("G", &c__0, &c__0, &d__[i__], &c_b10, &c__1, nrhs, &work[
bx + i__ - 1], n, info);
}
d__[i__] = (d__1 = d__[i__], abs(d__1));
/* L250: */
}
/* Now apply back the right singular vectors. */
icmpq2 = 1;
i__1 = nsub;
for (i__ = 1; i__ <= i__1; ++i__) {
st = iwork[i__];
st1 = st - 1;
nsize = iwork[sizei + i__ - 1];
bxst = bx + st1;
if (nsize == 1) {
zcopy_(nrhs, &work[bxst], n, &b_ref(st, 1), ldb);
} else if (nsize <= *smlsiz) {
/* Since B and BX are complex, the following call to DGEMM
is performed in two steps (real and imaginary parts).
CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
$ RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO,
$ B( ST, 1 ), LDB ) */
j = bxst - *n - 1;
jreal = irwb - 1;
i__2 = *nrhs;
for (jcol = 1; jcol <= i__2; ++jcol) {
j += *n;
i__3 = nsize;
for (jrow = 1; jrow <= i__3; ++jrow) {
++jreal;
i__4 = j + jrow;
rwork[jreal] = work[i__4].r;
/* L260: */
}
/* L270: */
}
latime_1.ops += dopbl3_("DGEMM ", &nsize, nrhs, &nsize)
;
dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b10, &rwork[vt + st1],
n, &rwork[irwb], &nsize, &c_b35, &rwork[irwrb], &nsize);
j = bxst - *n - 1;
jimag = irwb - 1;
i__2 = *nrhs;
for (jcol = 1; jcol <= i__2; ++jcol) {
j += *n;
i__3 = nsize;
for (jrow = 1; jrow <= i__3; ++jrow) {
++jimag;
rwork[jimag] = d_imag(&work[j + jrow]);
/* L280: */
}
/* L290: */
}
latime_1.ops += dopbl3_("DGEMM ", &nsize, nrhs, &nsize)
;
dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b10, &rwork[vt + st1],
n, &rwork[irwb], &nsize, &c_b35, &rwork[irwib], &nsize);
jreal = irwrb - 1;
jimag = irwib - 1;
i__2 = *nrhs;
for (jcol = 1; jcol <= i__2; ++jcol) {
i__3 = st + nsize - 1;
for (jrow = st; jrow <= i__3; ++jrow) {
++jreal;
++jimag;
i__4 = b_subscr(jrow, jcol);
i__5 = jreal;
i__6 = jimag;
z__1.r = rwork[i__5], z__1.i = rwork[i__6];
b[i__4].r = z__1.r, b[i__4].i = z__1.i;
/* L300: */
}
/* L310: */
}
} else {
zlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b_ref(st,
1), ldb, &rwork[u + st1], n, &rwork[vt + st1], &iwork[k +
st1], &rwork[difl + st1], &rwork[difr + st1], &rwork[z__
+ st1], &rwork[poles + st1], &iwork[givptr + st1], &iwork[
givcol + st1], n, &iwork[perm + st1], &rwork[givnum + st1]
, &rwork[c__ + st1], &rwork[s + st1], &rwork[nrwork], &
iwork[iwk], info);
if (*info != 0) {
return 0;
}
}
/* L320: */
}
/* Unscale and sort the singular values. */
latime_1.ops += (doublereal) (*n + *n * 6 * *nrhs);
dlascl_("G", &c__0, &c__0, &c_b10, &orgnrm, n, &c__1, &d__[1], n, info);
dlasrt_("D", n, &d__[1], info);
zlascl_("G", &c__0, &c__0, &orgnrm, &c_b10, n, nrhs, &b[b_offset], ldb,
info);
return 0;
/* End of ZLALSD */
} /* zlalsd_ */
#undef b_ref
#undef b_subscr
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -