📄 dgelsd.c
字号:
#include "blaswrap.h"
/* -- translated by f2c (version 19990503).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
#include "f2c.h"
/* Table of constant values */
static integer c__6 = 6;
static integer c_n1 = -1;
static integer c__9 = 9;
static integer c__0 = 0;
static integer c__1 = 1;
static doublereal c_b82 = 0.;
/* Subroutine */ int dgelsd_(integer *m, integer *n, integer *nrhs,
doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *
s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork,
integer *iwork, integer *info)
{
/* System generated locals */
integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
/* Builtin functions */
double log(doublereal);
/* Local variables */
static doublereal anrm, bnrm;
static integer itau, nlvl, iascl, ibscl;
static doublereal sfmin;
static integer minmn, maxmn, itaup, itauq, mnthr, nwork;
extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
static integer ie, il;
extern /* Subroutine */ int dgebrd_(integer *, integer *, doublereal *,
integer *, doublereal *, doublereal *, doublereal *, doublereal *,
doublereal *, integer *, integer *);
extern doublereal dlamch_(char *);
static integer mm;
extern doublereal dlange_(char *, integer *, integer *, doublereal *,
integer *, doublereal *);
extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *,
integer *, doublereal *, doublereal *, integer *, integer *),
dlalsd_(char *, integer *, integer *, integer *, doublereal *,
doublereal *, doublereal *, integer *, doublereal *, integer *,
doublereal *, integer *, integer *), dlascl_(char *,
integer *, integer *, doublereal *, doublereal *, integer *,
integer *, doublereal *, integer *, integer *), dgeqrf_(
integer *, integer *, doublereal *, integer *, doublereal *,
doublereal *, integer *, integer *), dlacpy_(char *, integer *,
integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *,
doublereal *, doublereal *, integer *), xerbla_(char *,
integer *);
extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
integer *, integer *, ftnlen, ftnlen);
static doublereal bignum;
extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *,
integer *, integer *, doublereal *, integer *, doublereal *,
doublereal *, integer *, doublereal *, integer *, integer *);
static integer wlalsd;
extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *,
integer *, doublereal *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, integer *);
static integer ldwork;
extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *,
integer *, doublereal *, integer *, doublereal *, doublereal *,
integer *, doublereal *, integer *, integer *);
static integer minwrk, maxwrk;
static doublereal smlnum;
static logical lquery;
static integer smlsiz;
static doublereal eps;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
/* -- LAPACK driver routine (version 3.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
October 31, 1999
Purpose
=======
DGELSD computes the minimum-norm solution to a real linear least
squares problem:
minimize 2-norm(| b - A*x |)
using the singular value decomposition (SVD) of A. A is an M-by-N
matrix which may be rank-deficient.
Several right hand side vectors b and solution vectors x can be
handled in a single call; they are stored as the columns of the
M-by-NRHS right hand side matrix B and the N-by-NRHS solution
matrix X.
The problem is solved in three steps:
(1) Reduce the coefficient matrix A to bidiagonal form with
Householder transformations, reducing the original problem
into a "bidiagonal least squares problem" (BLS)
(2) Solve the BLS using a divide and conquer approach.
(3) Apply back all the Householder tranformations to solve
the original least squares problem.
The effective rank of A is determined by treating as zero those
singular values which are less than RCOND times the largest singular
value.
The divide and conquer algorithm makes very mild assumptions about
floating point arithmetic. It will work on machines with a guard
digit in add/subtract, or on those binary machines without guard
digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
Cray-2. It could conceivably fail on hexadecimal or decimal machines
without guard digits, but we know of none.
Arguments
=========
M (input) INTEGER
The number of rows of A. M >= 0.
N (input) INTEGER
The number of columns of A. N >= 0.
NRHS (input) INTEGER
The number of right hand sides, i.e., the number of columns
of the matrices B and X. NRHS >= 0.
A (input) DOUBLE PRECISION array, dimension (LDA,N)
On entry, the M-by-N matrix A.
On exit, A has been destroyed.
LDA (input) INTEGER
The leading dimension of the array A. LDA >= max(1,M).
B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
On entry, the M-by-NRHS right hand side matrix B.
On exit, B is overwritten by the N-by-NRHS solution
matrix X. If m >= n and RANK = n, the residual
sum-of-squares for the solution in the i-th column is given
by the sum of squares of elements n+1:m in that column.
LDB (input) INTEGER
The leading dimension of the array B. LDB >= max(1,max(M,N)).
S (output) DOUBLE PRECISION array, dimension (min(M,N))
The singular values of A in decreasing order.
The condition number of A in the 2-norm = S(1)/S(min(m,n)).
RCOND (input) DOUBLE PRECISION
RCOND is used to determine the effective rank of A.
Singular values S(i) <= RCOND*S(1) are treated as zero.
If RCOND < 0, machine precision is used instead.
RANK (output) INTEGER
The effective rank of A, i.e., the number of singular values
which are greater than RCOND*S(1).
WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
LWORK (input) INTEGER
The dimension of the array WORK. LWORK must be at least 1.
The exact minimum amount of workspace needed depends on M,
N and NRHS. As long as LWORK is at least
12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2,
if M is greater than or equal to N or
12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2,
if M is less than N, the code will execute correctly.
SMLSIZ is returned by ILAENV and is equal to the maximum
size of the subproblems at the bottom of the computation
tree (usually about 25), and
NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
For good performance, LWORK should generally be larger.
If LWORK = -1, then a workspace query is assumed; the routine
only calculates the optimal size of the WORK array, returns
this value as the first entry of the WORK array, and no error
message related to LWORK is issued by XERBLA.
IWORK (workspace) INTEGER array, dimension (LIWORK)
LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,
where MINMN = MIN( M,N ).
INFO (output) INTEGER
= 0: successful exit
< 0: if INFO = -i, the i-th argument had an illegal value.
> 0: the algorithm for computing the SVD failed to converge;
if INFO = i, i off-diagonal elements of an intermediate
bidiagonal form did not converge to zero.
Further Details
===============
Based on contributions by
Ming Gu and Ren-Cang Li, Computer Science Division, University of
California at Berkeley, USA
Osni Marques, LBNL/NERSC, USA
=====================================================================
Test the input arguments.
Parameter adjustments */
a_dim1 = *lda;
a_offset = 1 + a_dim1 * 1;
a -= a_offset;
b_dim1 = *ldb;
b_offset = 1 + b_dim1 * 1;
b -= b_offset;
--s;
--work;
--iwork;
/* Function Body */
*info = 0;
minmn = min(*m,*n);
maxmn = max(*m,*n);
mnthr = ilaenv_(&c__6, "DGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, (
ftnlen)1);
lquery = *lwork == -1;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*nrhs < 0) {
*info = -3;
} else if (*lda < max(1,*m)) {
*info = -5;
} else if (*ldb < max(1,maxmn)) {
*info = -7;
}
smlsiz = ilaenv_(&c__9, "DGELSD", " ", &c__0, &c__0, &c__0, &c__0, (
ftnlen)6, (ftnlen)1);
/* Compute workspace.
(Note: Comments in the code beginning "Workspace:" describe the
minimal amount of workspace needed at that point in the code,
as well as the preferred amount for good performance.
NB refers to the optimal block size for the immediately
following subroutine, as returned by ILAENV.) */
minwrk = 1;
minmn = max(1,minmn);
/* Computing MAX */
i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) /
log(2.)) + 1;
nlvl = max(i__1,0);
if (*info == 0) {
maxwrk = 0;
mm = *m;
if (*m >= *n && *m >= mnthr) {
/* Path 1a - overdetermined, with many more rows than columns. */
mm = *n;
/* Computing MAX */
i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m,
n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
maxwrk = max(i__1,i__2);
/* Computing MAX */
i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "DORMQR", "LT",
m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2);
maxwrk = max(i__1,i__2);
}
if (*m >= *n) {
/* Path 1 - overdetermined or exactly determined.
Computing MAX */
i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "DGEBRD"
, " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
maxwrk = max(i__1,i__2);
/* Computing MAX */
i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "DORMBR",
"QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
maxwrk = max(i__1,i__2);
/* Computing MAX */
i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "DORMBR",
"PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
maxwrk = max(i__1,i__2);
/* Computing 2nd power */
i__1 = smlsiz + 1;
wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * *
nrhs + i__1 * i__1;
/* Computing MAX */
i__1 = maxwrk, i__2 = *n * 3 + wlalsd;
maxwrk = max(i__1,i__2);
/* Computing MAX */
i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2),
i__2 = *n * 3 + wlalsd;
minwrk = max(i__1,i__2);
}
if (*n > *m) {
/* Computing 2nd power */
i__1 = smlsiz + 1;
wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * *
nrhs + i__1 * i__1;
if (*n >= mnthr) {
/* Path 2a - underdetermined, with many more columns
than rows. */
maxwrk = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1,
&c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) *
ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (
ftnlen)6, (ftnlen)1);
maxwrk = max(i__1,i__2);
/* Computing MAX */
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(&
c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1, (ftnlen)6, (
ftnlen)3);
maxwrk = max(i__1,i__2);
/* Computing MAX */
i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) *
ilaenv_(&c__1, "DORMBR", "PLN", m, nrhs, m, &c_n1, (
ftnlen)6, (ftnlen)3);
maxwrk = max(i__1,i__2);
if (*nrhs > 1) {
/* Computing MAX */
i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
maxwrk = max(i__1,i__2);
} else {
/* Computing MAX */
i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
maxwrk = max(i__1,i__2);
}
/* Computing MAX */
i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "DORMLQ",
"LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2);
maxwrk = max(i__1,i__2);
/* Computing MAX */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -