📄 s_bls2.cc
字号:
// File: s_bls2.cc -- implents class s_bls2// Author: Suvrit Sra// Date: 14 Nov, 2003/************************************************************************* THE ORIGINAL SVDPAKC COPYRIGHT (c) Copyright 1993 University of Tennessee All Rights Reserved *************************************************************************/#include <cstdio>#include <cstdlib>#include <cerrno>#include <cmath>#include <cstring>#include <unistd.h>#include <fcntl.h>#include "s_bls2.h"using namespace ssvd;/*********************************************************************** * * * blklan2() * * Block Lanczos SVD Alogrithm * * * ***********************************************************************//*********************************************************************** Description ----------- blklan2.c is designed to compute singular values and singular vectors of a large sparse matrix A. The singular values (and singular vectors) of A are determined by the eigenvalues (and eigenvectors) of the matrix B, where B = A'A. The eigenvalues of B are the squares of the singular values of A, the eigenvectors of B correspond to the right singular vectors of A only. The left singular vectors of A are determined by u = 1/sigma A*v, where {u, sigma, v} is a singular triplet of A. This version of blklan2 is designed to approximate the ik-largest singular triplets of A. This hybrid block Lanczos procedure consists of five phases: Phase 1: Block Lanczos outer iteration to yield a symmetric block tridiagonal matrix S whose eigenvalues approximate those of the matrix B = A'A. Total (or complete) re-orthogonalization is used here. Phase 2: Lanczos method for tridiagonalizing the S matrix from Phase 1 to yield the tridiagonal matrix T whose eigenvalues also approximate those of B. Total (or complete) re-orthogonalization is used for this Lanczos recursion. A polong Lanczos method (single vector) is used if a blocksize of 1 is encountered via normal deflation. Phase 3: Apply an appropriate QL iteration to diagonalize T and hence produce approximate eigenvalues (array alpha) of matrix B = A'A and hence the squares of the singular values of the original sparse matrix A. Phase 4: Convergence test using a user-supplied residual tolerance. Phase 5: Iteration restart with orthogonalization with respect to any (all) converged eigenvectors of B = A'A. Arguments --------- (input) fp polonger to output file nnzero number of nonzeros in matrix A m row dimension of the sparse matrix A whose SVD is sought n column dimension of the sparse matrix A whose SVD is sought ik number of singular triplets desired ib initial block size for outer iteration ic upper bound for dimension of Krylov subspace generated via outer iteration. It is the maximum dimension for the block upper-bidiagonal matrix S generated in Phase 1 above. tol user-specified tolerance for approximate singular triplets maxit maximum number of outer iterations allowed (output) iko number of singular triplets approximated ico last bound for dimension of Krylov subspace used within outer iteration ibo final block size used in outer iteration eig linear array containing the iko approximate singular values v two-dimensional array containing the iko approximate right singular vectors corresponding to the approximate singular values in array eig res linear array containing the iko residuals of the approximate singular triplets memory memory storage needed in bytes External parameters ------------------- Defined and documented in bls2.h Local parameters ---------------- k current # of desired triplets (exit when reduced to 0) k0 count of triplets found in current iteration nb current block size nc size of current subspace ns number of blocks in current iteration Functions called -------------- BLAS dgemv, dgemm2, tql2, orthg MISC validate, random, imin BLS2 polong2, block2 NOTE: Unlike Fortran, C stores arrays in row-major order. For the sake of efficiency, most matrices and vectors in the algorithm are transposed, i.e., each vector and each column of a matrix are stored row-wise. ***********************************************************************/long s_bls2::blklan2(FILE* fp, long nnzero, long m, long n, long ik, double *v, double *eig, long ic, long ib, double tol, double *res, long maxit, long *iko, long *ico, long *ibo, long *memory){ long irand; long nbold, cont, ns, nb, nc, k, k0, length, memsize; long jj, ii, kk, ll, final, flag, i, j, *index; double **sp, **rp, **bigsp, **workptr2, **tempptr2; double *s, *r, *bigs, *workptr1, *tempptr1; double *ptr1, *ptr2; nb = ib; nc = ic; k = ik; k0 = 0; ns = nc / nb; /* reset converged vector counter */ iconv = 0; irand = (long) SEED; /* determine number of blocks for first iteration */ if (ns < MINBLKS) { nb = nc / MINBLKS; ns = nc / nb; } /* return upon error on input */ if (validate(fp, nnzero, m, n, *ico, *ibo, ik, tol)) return(-1); /* allocate memory for linear arrays of doubles s, r, bigs, y, vv * v0, tres, uvtmp, z, p, q, t, alpha, beta, pp, qq, v */ memsize = sizeof(double) * (nb * (nc + nc - nb + n + 1) + nc * (nb + 2 * n + 3 + nc + nc) + ik * (n + n) + 4 * n); *memory += memsize; /*********************************************************************** * Allocate work area and initialize polongers * * polonger size * * s (sp) nc by nb * * r (rp) (nc-nb) by nb * * bigs (bigsp) nc by (nb+1) * * y (yp) nb by n * * vv (vvp) nc by n * * v0 ik by n * * tres nb * * uvtmp (uvtmpp) (nc+ik) by n * * z n * * p n * * q n * * t n * * alpha nc * * beta nc * * pp (ppp) nc by nc * * qq (qqp) nc by nc * * v ik by n (allocated in bls2.c) * * ztemp (ztempp) nc by n (allocated in bls2.c) * * index nb * ***********************************************************************/ if (!(workptr1 = (double *)malloc(memsize)) || !(index = (long *)malloc(sizeof(long) * nb))){ perror("FIRST MALLOC FAILED in BLKLAN2()"); exit(errno); } /* memory for linear array index */ *memory += sizeof(long) * nb; /* allocate memory for arrays of polongers sp, rp, bigsp, yp, vvp, * uvtmpp, ppp, qqp. This will allow the memory areas s, r, bigs, y, * vv, uvtmp, pp and qq to be addressed as 2-dimensional arrays */ memsize = sizeof(double *) * (7 * nc + nb + ik); *memory += memsize; if (!(workptr2 = (double **)malloc(memsize))){ perror("SECOND MALLOC FAILED in BLKLAN2()"); exit(errno); } tempptr1 = workptr1; tempptr2 = workptr2; length = nc * nb; s = tempptr1; tempptr1 += length; sp = tempptr2; tempptr2 += nc; j = 0; for (i = 0; i < length; i += nb) sp[j++] = &s[i]; length = (nc - nb) * nb; r = tempptr1; tempptr1 += length; rp = tempptr2; tempptr2 += (nc - nb); j = 0; for (i = 0; i < length; i += nb) rp[j++] = &r[i]; length = nc * (nb + 1); bigs = tempptr1; tempptr1 += length; bigsp = tempptr2; tempptr2 += nc; j = 0; for (i = 0; i < length; i += nb + 1) bigsp[j++] = &bigs[i]; length = n * nb; y = tempptr1; tempptr1 += length; yp = tempptr2; tempptr2 += nb; j = 0; for (i = 0; i < length; i += n) yp[j++] = &y[i]; length = n * nc; vv = tempptr1; tempptr1 += length; vvp = tempptr2; tempptr2 += nc; j = 0; for (i = 0; i < length; i += n) vvp[j++] = &vv[i]; v0 = tempptr1; tempptr1 += n * ik; tres = tempptr1; tempptr1 += nb; length = n * (nc + ik); uvtmp = tempptr1; tempptr1 += length; uvtmpp = tempptr2; tempptr2 += nc + ik; j = 0; for (i = 0; i < length; i += n) uvtmpp[j++] = &uvtmp[i]; z = tempptr1; tempptr1 += n; p = tempptr1; tempptr1 += n; q = tempptr1; tempptr1 += n; t = tempptr1; tempptr1 += n; alpha = tempptr1; tempptr1 += nc; beta = tempptr1; tempptr1 += nc; length = nc * nc; pp = tempptr1; tempptr1 += length; qq = tempptr1; tempptr1 += length; ppp = tempptr2; tempptr2 += nc; qqp = tempptr2; tempptr2 += nc; j = 0; for (i = 0; i < length; i += nc) { ppp[j] = &pp[i]; qqp[j++] = &qq[i]; } length = n * nb; for (i = 0; i < length; i++) vv[i] = mrandom(&irand); orthg(nb, 0, n, yp, vvp, ztemp); iter = 0; while (iter < maxit) { nn = nb * ns; cont = TRUE; iter += 1; /*------------------------------------------------------------------* * PHASE 1 and PHASE 2 (block algorithm) * *------------------------------------------------------------------*/ if (nb > 1) block2(sp, rp, bigsp, m, n, nb, ns, &irand); else { if (nbold != 1) k0 = 0; /*------------------------------------------------------------------* * PHASE 2A (polong algorithm) * *------------------------------------------------------------------*/ cont = polong2(&k, m, n, v, res, eig, tol); } if (!cont) break; /*------------------------------------------------------------------* * PHASE 3 * *------------------------------------------------------------------*/ /* solve symmetric tridiagonal EVP */ for (i = 0; i < nn; i++) for (j = 0; j < nn; j++) qqp[i][j] = ZERO; for (i = 0; i < nn; i++) qqp[i][i] = ONE; /* resort alpha's and rows of pp (need descending order) */ if (tql2(nn, alpha, beta, qqp)) break; i = nn - 1; for (jj = 0; jj < nn; jj++) { z[jj] = alpha[i]; for (ii = 0; ii < nn; ii++) uvtmpp[jj][ii] = qqp[i][ii]; i--; } for (jj = 0; jj < nn; jj++) { alpha[jj] = z[jj]; for (ii = 0; ii < nn; ii++) qqp[jj][ii] = uvtmpp[jj][ii]; } /*------------------------------------------------------------------* * PHASE 4 * *------------------------------------------------------------------*/ nbold = nb; if (iter > 1 && nb > 1) { k0 = 0; final = imin(nb, k); for (i = 0; i < final; i++) { if (fabs(tres[i]) <= tol) { index[k0] = i; eig[iconv + k0] = alpha[i]; res[iconv + k0] = tres[i]; k0 += 1; } } if (nb >= k) nb -= k0; else nb = imin(nb, k - k0); nc -= k0; k -= k0; if (k) ns = nc / nb; if (ns < MINBLKS) { nb = nc / MINBLKS;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -