📄 s_bls1.cc

📁 矩阵奇异分解(svd)最新c++版本
💻 CC
📖 第 1 页 / 共 2 页
字号:
12 下一页
// File: s_bls1.cc// Author: Suvrit Sra// Just snarfed everything from bls1.c and family/************************************************************************* THE ORIGINAL SVDPAKC COPYRIGHT                           (c) Copyright 1993                        University of Tennessee                          All Rights Reserved                           *************************************************************************/#include <cstdio>#include <cstdlib>#include <cmath>#include <cerrno>#include <cstring>#include <unistd.h>#include <fcntl.h>#include "s_bls1.h"using namespace ssvd;/*********************************************************************** *                                                                     * *                        blklan1()                                    * *                  Block Lanczos SVD Alogrithm                        * *                                                                     * ***********************************************************************//***********************************************************************   Description   -----------   This hybrid block Lanczos procedure consists of five phases:   Phase 1:  Block Lanczos outer iteration to yield a block upper             bidiagonal matrix S whose singular values approximate	     those of the original sparse matrix A.  Total (or complete)	     re- orthogonalization is used here.   Phase 2:  Lanczos method for bi-diagonalizing the S matrix from	     Phase 1 to yield the bi-diagonal matrix B whose singular	     values also approximate those of A.  Total (or complete)	     re-orthogonalization is used for this Lanczos recursion.  	     a point Lanczos method (single vector) is used if a blocksize	     of 1 is encountered via normal deflation.   Phase 3:  Apply an appropriate QR iteration to diagonalize B and	     hence produce approximate singular values (array alpha)	     of the original matrix A.   Phase 4:  Convergence test using a user-supplied residual tolerance.   Phase 5:  Iteration restart with orthogonalization with respect	     to any (all) converged singular vectors.   This version of blklan1 is designed to approximate the ik-largest   singular triplets of A.  Users interested in the ik-smallest   singular triplets need only sort the alpha array in increasing   (as opposed to the default ascending order) following the call to   qriter2 in Phase 3.  Also, the rows of the two-dimensional arrays   ppp and qqp must be reordered to reflect a one-to-one correspondence   with the newly sorted elements of alpha (which are approximate   singular values of the matrix A).   Arguments   ---------   (input)   fp       pointer to output file (implicit now....)   nnzero   number of nonzeros in matrix A   m        row dimension of the sparse matrix A whose SVD is sought   n        column dimension of the sparse matrix A whose SVD is sought   ik       number of singular triplets desired   ib       initial block size for outer iteration   ic       upper bound for dimension of Krylov subspace generated via	      outer iteration.  It is the maximum dimension for the block	      upper-bidiagonal matrix S generated in Phase 1 above.   tol     user-specified tolerance for approximate singular triplets   maxit   maximum number of outer iterations allowed   (output)   iko     number of singular triplets approximated   ico     last bound for dimension of Krylov subspace used within outer 	      iteration   ibo     final block size used in outer iteration   sing    linear array containing the iko approximate singular values   v       two-dimensional array containing the iko approximate right 	      singular vectors corresponding to the approximate singular 	      values in array sing   u       two-dimensional array containing the iko approximate left	   singular vectors corresponding to the approximate singular values 	   in array sing   res     linear array containing the iko residuals of the approximate 	   singular triplets   memory  memory storage needed in bytes   External parameters   -------------------   Defined and documented in bls1.h   Local parameters   ----------------    k        current # of desired triplets (exit when reduced to 0)    k0       count of triplets found in current iteration    nb       current block size    nc       size of current subspace    ns       number of blocks in current iteration   Functions called   --------------   BLAS         dgemv, dgemm, qriter2, orthg   MISC         validate, random, imin   BLS1         point1, block1   NOTE:  Unlike Fortran, C stores arrays in row-major order.  For the	  sake of efficiency, most matrices and vectors in the algorithm	  are transposed, i.e., each vector and each column of a	  matrix are stored row-wise. ***********************************************************************/long s_bls1::blklan1(FILE* fp, long nnzero, long m, long n, long ik, double *v,		     double *u, double *sing, long ic, long ib, double tol,		     double *res, long maxit, long *iko, long *ico, long *ibo, 		     long *memory){   long nbold, cont, irand, ns, nb, nc, k, k0, length, memsize;   long jj, ii, kk, ll, final, flag, i, j, *index;   double **wp, **sp, **rp, **bigsp, **workptr2, **tempptr2;   double *w, *s, *r, *bigs, *workptr1, *tempptr1;   double *ptr1, *ptr2, *ptr3, *ptr4;   nb = ib;   nc = ic;   k  = ik;   k0 = 0;   ns = nc / nb;   iconv = 0;   irand = SEED;   if (ns < MINBLKS) {      nb = nc / MINBLKS;      ns = nc / nb;   }   /* return upon error on input */   if (validate(fp, nnzero, m, n, *ico, *ibo, ik, tol))      return(-1);   memsize = sizeof(double) * 	     (nb * (m + nc + nc - nb + m + n + 1) +              nc * (nb + m + n + m + 3 + nc + nc) +	      ik * (m + m + n) + n + n + m + m);                      *memory += memsize; /***********************************************************************  *          Allocate work area and initialize pointers                 *  *     pointer              size                                       *  *    w     (wp)          nb      by m                                 *  *    s     (sp)          nc      by nb                                *  *    r     (rp)          (nc-nb) by nb                                *  *    bigs  (bigsp)       nc      by (nb+1)                            *  *    temp                m       by nb                                *  *    y     (yp)          nb      by n                                 *  *    uu    (uup)         nc      by m                                 *  *    vv    (vvp)         nc      by n                                 *  *    u0                  ik      by m                                 *  *    v0                  ik      by n                                 *  *    tres                nb                                           *  *    uvtmp (uvtmpp)      (nc+ik) by m  (assume max (m,n) = m)         *  *    z                   n                                            *  *    p                   n                                            *  *    q                   m                                            *  *    t                   m                                            *  *    alpha               nc                                           *  *    beta                nc                                           *  *    pp    (ppp)         nc      by nc                                *  *    qq    (qqp)         nc      by nc                                *  *    u                   ik      by m (allocated in bls1.c)           *  *    v                   ik      by n (allocated in bls1.c)           *  ***********************************************************************/   if (!(workptr1 = (double *)malloc(memsize)) ||       !(index   = (long   *)malloc(sizeof(long) * ib))){      perror("FIRST MALLOC FAILED in BLKLAN1()");      exit(errno);   }   *memory += sizeof(long) * ib;   memsize = sizeof(double *) * (8 * nc + nb + ik);   *memory += memsize;   if (!(workptr2 = (double **)malloc(memsize))){      perror("SECOND MALLOC FAILED in BLKLAN1()");      exit(errno);   }   tempptr1  = workptr1;   tempptr2  = workptr2;   length    = m * nb;   w         = tempptr1;   tempptr1 += length;   wp        = tempptr2;   tempptr2 += nb;   j = 0;   for (i = 0; i < length; i += m) wp[j++] = &w[i];   length    = nc * nb;   s         = tempptr1;   tempptr1 += length;   sp        = tempptr2;   tempptr2 += nc;   j = 0;   for (i = 0; i < length; i += nb) sp[j++] = &s[i];   length = (nc - nb) * nb;   r         = tempptr1;    tempptr1 += length;   rp        = tempptr2;   tempptr2 += (nc - nb);   j = 0;   for (i = 0; i < length; i += nb) rp[j++] = &r[i];   length    = nc * (nb + 1);   bigs      = tempptr1;   tempptr1 += length;   bigsp     = tempptr2;   tempptr2 += nc;   j = 0;   for (i = 0; i < length; i += nb + 1) bigsp[j++] = &bigs[i];   temp     = tempptr1;   tempptr1 += m * nb;   length    = n * nb;   y         = tempptr1;   tempptr1 += length;   yp        = tempptr2;   tempptr2 += nb;   j = 0;   for (i = 0; i < length; i += n) yp[j++] = &y[i];   length    = m * nc;   uu        = tempptr1;   tempptr1 += length;    uup       = tempptr2;   tempptr2 += nc;   j = 0;   for (i = 0; i < length; i += m) uup[j++] = &uu[i];   length    = n * nc;   vv        = tempptr1;   tempptr1 += length;   vvp       = tempptr2;   tempptr2 += nc;   j = 0;   for (i = 0; i < length; i += n) vvp[j++] = &vv[i];   u0        = tempptr1;   tempptr1 += m * ik;   v0        = tempptr1;   tempptr1 += n * ik;   tres      = tempptr1;   tempptr1 += nb;   length    = m * (nc + ik);   uvtmp     = tempptr1;   tempptr1 += length;   uvtmpp    = tempptr2;   tempptr2 += nc + ik;   j = 0;   for (i = 0; i < length; i += m) uvtmpp[j++] = &uvtmp[i];   z         = tempptr1;   tempptr1 += n;   p         = tempptr1;   tempptr1 += n;   q         = tempptr1;   tempptr1 += m;   t         = tempptr1;   tempptr1 += m;   alpha     = tempptr1;   tempptr1 += nc;   beta      = tempptr1;   tempptr1 += nc;   length    = nc * nc;   pp        = tempptr1;   tempptr1 += length;   qq        = tempptr1;   tempptr1 += length;   ppp       = tempptr2;   tempptr2 += nc;   qqp       = tempptr2;   tempptr2 += nc;   j = 0;   for (i = 0; i < length; i += nc) {      ppp[j]   = &pp[i];      qqp[j++] = &qq[i];   }   /* choose an initial random V[1] matrix */   length = n * nb;   for (i = 0; i < length; i++) vv[i] = mrandom(&irand);   orthg(nb, 0, n, yp, vvp, temp);    /* initialize iteration counter */   iter = 0;     while (iter < maxit) {      nn = nb * ns;      cont = TRUE;      iter += 1;      /*------------------------------------------------------------------*       *              PHASE 1 and PHASE 2 (block algorithm)               *       *------------------------------------------------------------------*/      if (nb > 1) {		block1(w, wp, sp, rp, bigsp, m, n, nb, ns, &irand);	  	  }      else {	 if (nbold != 1) k0 = 0;         /*------------------------------------------------------------------*          *                   PHASE 2A (point algorithm)                     *          *------------------------------------------------------------------*/	 cont = point1(&k, m, n, wp, res, sing, tol);         if (cont) {	    for (i = 0; i < nn; i++) {	       for (j = 0; j < nn; j++) {	          ppp[i][j] = ZERO;	          qqp[i][j] = ZERO;               }            }	    for (i = 0; i < nn; i++) {	       ppp[i][i] = ONE;	       qqp[i][i] = ONE;            }	 }      }	  //      /*------------------------------------------------------------------*       *                        PHASE 3                                   *       *------------------------------------------------------------------*/      if (!cont) break;      qriter2(nn, alpha, beta, qqp, ppp);      /*------------------------------------------------------------------*       *                        PHASE 4                                   *       *------------------------------------------------------------------*/      nbold = nb;      if (iter > 1 && nb > 1) {	 k0 = 0;	 final = imin(nb, k);	 for (i = 0; i < final; i++) {	    if (fabs(tres[i]) <= tol) {	      index[k0] = i;	      sing[iconv + k0] = alpha[i];	      res[iconv + k0] = tres[i];	      k0 += 1;            }         }	 if (nb >= k) nb -= k0;	 else nb = imin(nb, k - k0);	 nc -= k0;	 k -= k0;	 if (k) ns = nc / nb;         if (ns < MINBLKS) {            nb = nc / MINBLKS;            ns = nc / nb;         }      }      /*------------------------------------------------------------------*       *                  PHASE 5 (back transformation)                   *       *------------------------------------------------------------------*/      if (nbold > 1) {         /* uu[nn x m],  qq[(nb+k0) x nn] */         dgemm(NTRANSP, NTRANSP, nb + k0, m, nn, ONE, qqp, uu, ZERO, u);         dgemm(NTRANSP, NTRANSP, nb + k0, n, nn, ONE, ppp, vv, ZERO, v);      }      else {         dgemv(TRANSP, nn, m, ONE, uup, qq, ZERO, u);         dgemv(TRANSP, nn, n, ONE, vvp, pp, ZERO, v);      }      if (k0) {         final = iconv + k0;         ii = 0;         for (jj = iconv; jj < final; jj++) {	    ptr1 =  &u[m * index[ii]];	    ptr2 = &u0[m * jj];            for (i = 0; i < m; i++) *ptr2++ = *ptr1++;	    ptr1 =  &v[n * index[ii++]];	    ptr2 = &v0[n * jj];            for (i = 0; i < n; i++) *ptr2++ = *ptr1++;         }         iconv = final;         if (k <= 0) {	    iter -= 1;	    cont = FALSE;         }         /* reload unconverged right S-vector */         if (cont) {            kk = 0;            final = nb + k0;            for (jj = 0; jj < final; jj++) {	       flag = FALSE;	       ll = 0;	       while (ll < k0 && !flag) {	          if (index[ll] != jj) ll++;	          else flag = TRUE;	       }	       if (!flag) {	          ptr1 = &vv[n * kk];	          ptr2 =  &v[n * jj];                  for (i = 0; i < n; i++) *ptr1++ = *ptr2++;	          kk++;	       }            }	 }      }      else {          ptr1 =  v;         for (jj = 0; jj < nb; jj++)            for (i = 0; i < n; i++) vvp[jj][i] = *ptr1++;       }      if (!cont) break;   }   /* return all converged S-vectors (right and left) */   ptr1 =  u;   ptr2 = u0;   ptr3 =  v;   ptr4 = v0;   for (j = 0; j < iconv; j++) {      for (i = 0; i < m; i++) *ptr1++ = *ptr2++;      for (i = 0; i < n; i++) *ptr3++ = *ptr4++;   }   *iko = ik - k;   *ibo  = nb;   *ico = nc;   free(workptr1);   free(workptr2);   free(index);   return(0);}/*********************************************************************** *                                                                     * *                        point1()                                     * *                                                                     *
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -