📄 clalsd.c
字号:
#include "blaswrap.h"
/* -- translated by f2c (version 19990503).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
#include "f2c.h"
/* Common Block Declarations */
struct {
real ops, itcnt;
} latime_;
#define latime_1 latime_
/* Table of constant values */
static complex c_b1 = {0.f,0.f};
static integer c__1 = 1;
static integer c__0 = 0;
static real c_b10 = 1.f;
static real c_b35 = 0.f;
/* Subroutine */ int clalsd_(char *uplo, integer *smlsiz, integer *n, integer
*nrhs, real *d__, real *e, complex *b, integer *ldb, real *rcond,
integer *rank, complex *work, real *rwork, integer *iwork, integer *
info)
{
/* System generated locals */
integer b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6;
real r__1;
complex q__1;
/* Builtin functions */
double r_imag(complex *), log(doublereal), r_sign(real *, real *);
/* Local variables */
static integer difl, difr, jcol, irwb, perm, nsub, nlvl, sqre, bxst, jrow,
irwu, c__, i__, j, k;
static real r__;
static integer s, u, jimag, z__, jreal;
extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
integer *, real *, real *, integer *, real *, integer *, real *,
real *, integer *);
static integer irwib;
extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
complex *, integer *);
static integer poles, sizei, irwrb, nsize;
extern /* Subroutine */ int csrot_(integer *, complex *, integer *,
complex *, integer *, real *, real *);
static integer irwvt, icmpq1, icmpq2;
extern doublereal sopbl3_(char *, integer *, integer *, integer *)
;
static real cs;
static integer bx;
extern /* Subroutine */ int clalsa_(integer *, integer *, integer *,
integer *, complex *, integer *, complex *, integer *, real *,
integer *, real *, integer *, real *, real *, real *, real *,
integer *, integer *, integer *, integer *, real *, real *, real *
, real *, integer *, integer *);
static real sn;
extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *,
real *, integer *, integer *, complex *, integer *, integer *);
static integer st;
extern /* Subroutine */ int slasda_(integer *, integer *, integer *,
integer *, real *, real *, real *, integer *, real *, integer *,
real *, real *, real *, real *, integer *, integer *, integer *,
integer *, real *, real *, real *, real *, integer *, integer *);
extern doublereal slamch_(char *);
static integer vt;
extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex
*, integer *, complex *, integer *), claset_(char *,
integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *), slascl_(char *,
integer *, integer *, real *, real *, integer *, integer *, real *
, integer *, integer *);
extern integer isamax_(integer *, real *, integer *);
static integer givcol;
extern /* Subroutine */ int slasdq_(char *, integer *, integer *, integer
*, integer *, integer *, real *, real *, real *, integer *, real *
, integer *, real *, integer *, real *, integer *),
slaset_(char *, integer *, integer *, real *, real *, real *,
integer *), slartg_(real *, real *, real *, real *, real *
);
static real orgnrm;
static integer givnum;
extern doublereal slanst_(char *, integer *, real *, real *);
extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
static integer givptr, nm1, nrwork, irwwrk, smlszp, st1;
static real eps;
static integer iwk;
static real tol;
#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]
/* -- LAPACK routine (instrumented to count ops, version 3.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
October 31, 1999
Purpose
=======
CLALSD uses the singular value decomposition of A to solve the least
squares problem of finding X to minimize the Euclidean norm of each
column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
are N-by-NRHS. The solution X overwrites B.
The singular values of A smaller than RCOND times the largest
singular value are treated as zero in solving the least squares
problem; in this case a minimum norm solution is returned.
The actual singular values are returned in D in ascending order.
This code makes very mild assumptions about floating point
arithmetic. It will work on machines with a guard digit in
add/subtract, or on those binary machines without guard digits
which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
It could conceivably fail on hexadecimal or decimal machines
without guard digits, but we know of none.
Arguments
=========
UPLO (input) CHARACTER*1
= 'U': D and E define an upper bidiagonal matrix.
= 'L': D and E define a lower bidiagonal matrix.
SMLSIZ (input) INTEGER
The maximum size of the subproblems at the bottom of the
computation tree.
N (input) INTEGER
The dimension of the bidiagonal matrix. N >= 0.
NRHS (input) INTEGER
The number of columns of B. NRHS must be at least 1.
D (input/output) REAL array, dimension (N)
On entry D contains the main diagonal of the bidiagonal
matrix. On exit, if INFO = 0, D contains its singular values.
E (input) REAL array, dimension (N-1)
Contains the super-diagonal entries of the bidiagonal matrix.
On exit, E has been destroyed.
B (input/output) REAL array, dimension (LDB,NRHS)
On input, B contains the right hand sides of the least
squares problem. On output, B contains the solution X.
LDB (input) INTEGER
The leading dimension of B in the calling subprogram.
LDB must be at least max(1,N).
RCOND (input) REAL
The singular values of A less than or equal to RCOND times
the largest singular value are treated as zero in solving
the least squares problem. If RCOND is negative,
machine precision is used instead.
For example, if diag(S)*X=B were the least squares problem,
where diag(S) is a diagonal matrix of singular values, the
solution would be X(i) = B(i) / S(i) if S(i) is greater than
RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
RCOND*max(S).
RANK (output) INTEGER
The number of singular values of A greater than RCOND times
the largest singular value.
WORK (workspace) COMPLEX array, dimension at least
(N * NRHS).
RWORK (workspace) REAL array, dimension at least
(9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + (SMLSIZ+1)**2),
where
NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
IWORK (workspace) INTEGER array, dimension at least
(3*N*NLVL + 11*N).
INFO (output) INTEGER
= 0: successful exit.
< 0: if INFO = -i, the i-th argument had an illegal value.
> 0: The algorithm failed to compute an singular value while
working on the submatrix lying in rows and columns
INFO/(N+1) through MOD(INFO,N+1).
=====================================================================
Test the input parameters.
Parameter adjustments */
--d__;
--e;
b_dim1 = *ldb;
b_offset = 1 + b_dim1 * 1;
b -= b_offset;
--work;
--rwork;
--iwork;
/* Function Body */
*info = 0;
if (*n < 0) {
*info = -3;
} else if (*nrhs < 1) {
*info = -4;
} else if (*ldb < 1 || *ldb < *n) {
*info = -8;
}
if (*info != 0) {
i__1 = -(*info);
xerbla_("CLALSD", &i__1);
return 0;
}
eps = slamch_("Epsilon");
/* Set up the tolerance. */
if (*rcond <= 0.f || *rcond >= 1.f) {
*rcond = eps;
}
*rank = 0;
/* Quick return if possible. */
if (*n == 0) {
return 0;
} else if (*n == 1) {
if (d__[1] == 0.f) {
claset_("A", &c__1, nrhs, &c_b1, &c_b1, &b[b_offset], ldb);
} else {
*rank = 1;
latime_1.ops += (real) (*nrhs << 1);
clascl_("G", &c__0, &c__0, &d__[1], &c_b10, &c__1, nrhs, &b[
b_offset], ldb, info);
d__[1] = dabs(d__[1]);
}
return 0;
}
/* Rotate the matrix if it is lower bidiagonal. */
if (*(unsigned char *)uplo == 'L') {
latime_1.ops += (real) ((*n - 1) * 6);
i__1 = *n - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
d__[i__] = r__;
e[i__] = sn * d__[i__ + 1];
d__[i__ + 1] = cs * d__[i__ + 1];
if (*nrhs == 1) {
latime_1.ops += 12.f;
csrot_(&c__1, &b_ref(i__, 1), &c__1, &b_ref(i__ + 1, 1), &
c__1, &cs, &sn);
} else {
rwork[(i__ << 1) - 1] = cs;
rwork[i__ * 2] = sn;
}
/* L10: */
}
if (*nrhs > 1) {
latime_1.ops += (real) ((*n - 1) * 12 * *nrhs);
i__1 = *nrhs;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *n - 1;
for (j = 1; j <= i__2; ++j) {
cs = rwork[(j << 1) - 1];
sn = rwork[j * 2];
csrot_(&c__1, &b_ref(j, i__), &c__1, &b_ref(j + 1, i__), &
c__1, &cs, &sn);
/* L20: */
}
/* L30: */
}
}
}
/* Scale. */
nm1 = *n - 1;
orgnrm = slanst_("M", n, &d__[1], &e[1]);
if (orgnrm == 0.f) {
claset_("A", n, nrhs, &c_b1, &c_b1, &b[b_offset], ldb);
return 0;
}
latime_1.ops += (real) (*n + nm1);
slascl_("G", &c__0, &c__0, &orgnrm, &c_b10, n, &c__1, &d__[1], n, info);
slascl_("G", &c__0, &c__0, &orgnrm, &c_b10, &nm1, &c__1, &e[1], &nm1,
info);
/* If N is smaller than the minimum divide size SMLSIZ, then solve
the problem with another solver. */
if (*n <= *smlsiz) {
irwu = 1;
irwvt = irwu + *n * *n;
irwwrk = irwvt + *n * *n;
irwrb = irwwrk;
irwib = irwrb + *n * *nrhs;
irwb = irwib + *n * *nrhs;
slaset_("A", n, n, &c_b35, &c_b10, &rwork[irwu], n);
slaset_("A", n, n, &c_b35, &c_b10, &rwork[irwvt], n);
slasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &rwork[irwvt], n,
&rwork[irwu], n, &rwork[irwwrk], &c__1, &rwork[irwwrk], info);
if (*info != 0) {
return 0;
}
/* In the real version, B is passed to SLASDQ and multiplied
internally by Q'. Here B is complex and that product is
computed below in two steps (real and imaginary parts). */
j = irwb - 1;
i__1 = *nrhs;
for (jcol = 1; jcol <= i__1; ++jcol) {
i__2 = *n;
for (jrow = 1; jrow <= i__2; ++jrow) {
++j;
i__3 = b_subscr(jrow, jcol);
rwork[j] = b[i__3].r;
/* L40: */
}
/* L50: */
}
latime_1.ops += sopbl3_("SGEMM ", n, nrhs, n);
sgemm_("T", "N", n, nrhs, n, &c_b10, &rwork[irwu], n, &rwork[irwb], n,
&c_b35, &rwork[irwrb], n);
j = irwb - 1;
i__1 = *nrhs;
for (jcol = 1; jcol <= i__1; ++jcol) {
i__2 = *n;
for (jrow = 1; jrow <= i__2; ++jrow) {
++j;
rwork[j] = r_imag(&b_ref(jrow, jcol));
/* L60: */
}
/* L70: */
}
latime_1.ops += sopbl3_("SGEMM ", n, nrhs, n);
sgemm_("T", "N", n, nrhs, n, &c_b10, &rwork[irwu], n, &rwork[irwb], n,
&c_b35, &rwork[irwib], n);
jreal = irwrb - 1;
jimag = irwib - 1;
i__1 = *nrhs;
for (jcol = 1; jcol <= i__1; ++jcol) {
i__2 = *n;
for (jrow = 1; jrow <= i__2; ++jrow) {
++jreal;
++jimag;
i__3 = b_subscr(jrow, jcol);
i__4 = jreal;
i__5 = jimag;
q__1.r = rwork[i__4], q__1.i = rwork[i__5];
b[i__3].r = q__1.r, b[i__3].i = q__1.i;
/* L80: */
}
/* L90: */
}
latime_1.ops += 1.f;
tol = *rcond * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1));
i__1 = *n;
for (i__ = 1; i__ <= i__1; ++i__) {
if (d__[i__] <= tol) {
claset_("A", &c__1, nrhs, &c_b1, &c_b1, &b_ref(i__, 1), ldb);
} else {
latime_1.ops += (real) (*nrhs * 6);
clascl_("G", &c__0, &c__0, &d__[i__], &c_b10, &c__1, nrhs, &
b_ref(i__, 1), ldb, info);
++(*rank);
}
/* L100: */
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -