📄 s_bls1.cc
字号:
// File: s_bls1.cc// Author: Suvrit Sra// Just snarfed everything from bls1.c and family/************************************************************************* THE ORIGINAL SVDPAKC COPYRIGHT (c) Copyright 1993 University of Tennessee All Rights Reserved *************************************************************************/#include <cstdio>#include <cstdlib>#include <cmath>#include <cerrno>#include <cstring>#include <unistd.h>#include <fcntl.h>#include "s_bls1.h"using namespace ssvd;/*********************************************************************** * * * blklan1() * * Block Lanczos SVD Alogrithm * * * ***********************************************************************//*********************************************************************** Description ----------- This hybrid block Lanczos procedure consists of five phases: Phase 1: Block Lanczos outer iteration to yield a block upper bidiagonal matrix S whose singular values approximate those of the original sparse matrix A. Total (or complete) re- orthogonalization is used here. Phase 2: Lanczos method for bi-diagonalizing the S matrix from Phase 1 to yield the bi-diagonal matrix B whose singular values also approximate those of A. Total (or complete) re-orthogonalization is used for this Lanczos recursion. a point Lanczos method (single vector) is used if a blocksize of 1 is encountered via normal deflation. Phase 3: Apply an appropriate QR iteration to diagonalize B and hence produce approximate singular values (array alpha) of the original matrix A. Phase 4: Convergence test using a user-supplied residual tolerance. Phase 5: Iteration restart with orthogonalization with respect to any (all) converged singular vectors. This version of blklan1 is designed to approximate the ik-largest singular triplets of A. Users interested in the ik-smallest singular triplets need only sort the alpha array in increasing (as opposed to the default ascending order) following the call to qriter2 in Phase 3. Also, the rows of the two-dimensional arrays ppp and qqp must be reordered to reflect a one-to-one correspondence with the newly sorted elements of alpha (which are approximate singular values of the matrix A). Arguments --------- (input) fp pointer to output file (implicit now....) nnzero number of nonzeros in matrix A m row dimension of the sparse matrix A whose SVD is sought n column dimension of the sparse matrix A whose SVD is sought ik number of singular triplets desired ib initial block size for outer iteration ic upper bound for dimension of Krylov subspace generated via outer iteration. It is the maximum dimension for the block upper-bidiagonal matrix S generated in Phase 1 above. tol user-specified tolerance for approximate singular triplets maxit maximum number of outer iterations allowed (output) iko number of singular triplets approximated ico last bound for dimension of Krylov subspace used within outer iteration ibo final block size used in outer iteration sing linear array containing the iko approximate singular values v two-dimensional array containing the iko approximate right singular vectors corresponding to the approximate singular values in array sing u two-dimensional array containing the iko approximate left singular vectors corresponding to the approximate singular values in array sing res linear array containing the iko residuals of the approximate singular triplets memory memory storage needed in bytes External parameters ------------------- Defined and documented in bls1.h Local parameters ---------------- k current # of desired triplets (exit when reduced to 0) k0 count of triplets found in current iteration nb current block size nc size of current subspace ns number of blocks in current iteration Functions called -------------- BLAS dgemv, dgemm, qriter2, orthg MISC validate, random, imin BLS1 point1, block1 NOTE: Unlike Fortran, C stores arrays in row-major order. For the sake of efficiency, most matrices and vectors in the algorithm are transposed, i.e., each vector and each column of a matrix are stored row-wise. ***********************************************************************/long s_bls1::blklan1(FILE* fp, long nnzero, long m, long n, long ik, double *v, double *u, double *sing, long ic, long ib, double tol, double *res, long maxit, long *iko, long *ico, long *ibo, long *memory){ long nbold, cont, irand, ns, nb, nc, k, k0, length, memsize; long jj, ii, kk, ll, final, flag, i, j, *index; double **wp, **sp, **rp, **bigsp, **workptr2, **tempptr2; double *w, *s, *r, *bigs, *workptr1, *tempptr1; double *ptr1, *ptr2, *ptr3, *ptr4; nb = ib; nc = ic; k = ik; k0 = 0; ns = nc / nb; iconv = 0; irand = SEED; if (ns < MINBLKS) { nb = nc / MINBLKS; ns = nc / nb; } /* return upon error on input */ if (validate(fp, nnzero, m, n, *ico, *ibo, ik, tol)) return(-1); memsize = sizeof(double) * (nb * (m + nc + nc - nb + m + n + 1) + nc * (nb + m + n + m + 3 + nc + nc) + ik * (m + m + n) + n + n + m + m); *memory += memsize; /*********************************************************************** * Allocate work area and initialize pointers * * pointer size * * w (wp) nb by m * * s (sp) nc by nb * * r (rp) (nc-nb) by nb * * bigs (bigsp) nc by (nb+1) * * temp m by nb * * y (yp) nb by n * * uu (uup) nc by m * * vv (vvp) nc by n * * u0 ik by m * * v0 ik by n * * tres nb * * uvtmp (uvtmpp) (nc+ik) by m (assume max (m,n) = m) * * z n * * p n * * q m * * t m * * alpha nc * * beta nc * * pp (ppp) nc by nc * * qq (qqp) nc by nc * * u ik by m (allocated in bls1.c) * * v ik by n (allocated in bls1.c) * ***********************************************************************/ if (!(workptr1 = (double *)malloc(memsize)) || !(index = (long *)malloc(sizeof(long) * ib))){ perror("FIRST MALLOC FAILED in BLKLAN1()"); exit(errno); } *memory += sizeof(long) * ib; memsize = sizeof(double *) * (8 * nc + nb + ik); *memory += memsize; if (!(workptr2 = (double **)malloc(memsize))){ perror("SECOND MALLOC FAILED in BLKLAN1()"); exit(errno); } tempptr1 = workptr1; tempptr2 = workptr2; length = m * nb; w = tempptr1; tempptr1 += length; wp = tempptr2; tempptr2 += nb; j = 0; for (i = 0; i < length; i += m) wp[j++] = &w[i]; length = nc * nb; s = tempptr1; tempptr1 += length; sp = tempptr2; tempptr2 += nc; j = 0; for (i = 0; i < length; i += nb) sp[j++] = &s[i]; length = (nc - nb) * nb; r = tempptr1; tempptr1 += length; rp = tempptr2; tempptr2 += (nc - nb); j = 0; for (i = 0; i < length; i += nb) rp[j++] = &r[i]; length = nc * (nb + 1); bigs = tempptr1; tempptr1 += length; bigsp = tempptr2; tempptr2 += nc; j = 0; for (i = 0; i < length; i += nb + 1) bigsp[j++] = &bigs[i]; temp = tempptr1; tempptr1 += m * nb; length = n * nb; y = tempptr1; tempptr1 += length; yp = tempptr2; tempptr2 += nb; j = 0; for (i = 0; i < length; i += n) yp[j++] = &y[i]; length = m * nc; uu = tempptr1; tempptr1 += length; uup = tempptr2; tempptr2 += nc; j = 0; for (i = 0; i < length; i += m) uup[j++] = &uu[i]; length = n * nc; vv = tempptr1; tempptr1 += length; vvp = tempptr2; tempptr2 += nc; j = 0; for (i = 0; i < length; i += n) vvp[j++] = &vv[i]; u0 = tempptr1; tempptr1 += m * ik; v0 = tempptr1; tempptr1 += n * ik; tres = tempptr1; tempptr1 += nb; length = m * (nc + ik); uvtmp = tempptr1; tempptr1 += length; uvtmpp = tempptr2; tempptr2 += nc + ik; j = 0; for (i = 0; i < length; i += m) uvtmpp[j++] = &uvtmp[i]; z = tempptr1; tempptr1 += n; p = tempptr1; tempptr1 += n; q = tempptr1; tempptr1 += m; t = tempptr1; tempptr1 += m; alpha = tempptr1; tempptr1 += nc; beta = tempptr1; tempptr1 += nc; length = nc * nc; pp = tempptr1; tempptr1 += length; qq = tempptr1; tempptr1 += length; ppp = tempptr2; tempptr2 += nc; qqp = tempptr2; tempptr2 += nc; j = 0; for (i = 0; i < length; i += nc) { ppp[j] = &pp[i]; qqp[j++] = &qq[i]; } /* choose an initial random V[1] matrix */ length = n * nb; for (i = 0; i < length; i++) vv[i] = mrandom(&irand); orthg(nb, 0, n, yp, vvp, temp); /* initialize iteration counter */ iter = 0; while (iter < maxit) { nn = nb * ns; cont = TRUE; iter += 1; /*------------------------------------------------------------------* * PHASE 1 and PHASE 2 (block algorithm) * *------------------------------------------------------------------*/ if (nb > 1) { block1(w, wp, sp, rp, bigsp, m, n, nb, ns, &irand); } else { if (nbold != 1) k0 = 0; /*------------------------------------------------------------------* * PHASE 2A (point algorithm) * *------------------------------------------------------------------*/ cont = point1(&k, m, n, wp, res, sing, tol); if (cont) { for (i = 0; i < nn; i++) { for (j = 0; j < nn; j++) { ppp[i][j] = ZERO; qqp[i][j] = ZERO; } } for (i = 0; i < nn; i++) { ppp[i][i] = ONE; qqp[i][i] = ONE; } } } // /*------------------------------------------------------------------* * PHASE 3 * *------------------------------------------------------------------*/ if (!cont) break; qriter2(nn, alpha, beta, qqp, ppp); /*------------------------------------------------------------------* * PHASE 4 * *------------------------------------------------------------------*/ nbold = nb; if (iter > 1 && nb > 1) { k0 = 0; final = imin(nb, k); for (i = 0; i < final; i++) { if (fabs(tres[i]) <= tol) { index[k0] = i; sing[iconv + k0] = alpha[i]; res[iconv + k0] = tres[i]; k0 += 1; } } if (nb >= k) nb -= k0; else nb = imin(nb, k - k0); nc -= k0; k -= k0; if (k) ns = nc / nb; if (ns < MINBLKS) { nb = nc / MINBLKS; ns = nc / nb; } } /*------------------------------------------------------------------* * PHASE 5 (back transformation) * *------------------------------------------------------------------*/ if (nbold > 1) { /* uu[nn x m], qq[(nb+k0) x nn] */ dgemm(NTRANSP, NTRANSP, nb + k0, m, nn, ONE, qqp, uu, ZERO, u); dgemm(NTRANSP, NTRANSP, nb + k0, n, nn, ONE, ppp, vv, ZERO, v); } else { dgemv(TRANSP, nn, m, ONE, uup, qq, ZERO, u); dgemv(TRANSP, nn, n, ONE, vvp, pp, ZERO, v); } if (k0) { final = iconv + k0; ii = 0; for (jj = iconv; jj < final; jj++) { ptr1 = &u[m * index[ii]]; ptr2 = &u0[m * jj]; for (i = 0; i < m; i++) *ptr2++ = *ptr1++; ptr1 = &v[n * index[ii++]]; ptr2 = &v0[n * jj]; for (i = 0; i < n; i++) *ptr2++ = *ptr1++; } iconv = final; if (k <= 0) { iter -= 1; cont = FALSE; } /* reload unconverged right S-vector */ if (cont) { kk = 0; final = nb + k0; for (jj = 0; jj < final; jj++) { flag = FALSE; ll = 0; while (ll < k0 && !flag) { if (index[ll] != jj) ll++; else flag = TRUE; } if (!flag) { ptr1 = &vv[n * kk]; ptr2 = &v[n * jj]; for (i = 0; i < n; i++) *ptr1++ = *ptr2++; kk++; } } } } else { ptr1 = v; for (jj = 0; jj < nb; jj++) for (i = 0; i < n; i++) vvp[jj][i] = *ptr1++; } if (!cont) break; } /* return all converged S-vectors (right and left) */ ptr1 = u; ptr2 = u0; ptr3 = v; ptr4 = v0; for (j = 0; j < iconv; j++) { for (i = 0; i < m; i++) *ptr1++ = *ptr2++; for (i = 0; i < n; i++) *ptr3++ = *ptr4++; } *iko = ik - k; *ibo = nb; *ico = nc; free(workptr1); free(workptr2); free(index); return(0);}/*********************************************************************** * * * point1() * * *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -