📄 s_tms1.cc

📁 矩阵奇异分解(svd)最新c++版本
💻 CC
字号:
// File: s_tms1.cc -- Implements the tms1 class// Author: Suvrit Sra// Date: 15 Nov, 2003/************************************************************************* THE ORIGINAL SVDPAKC COPYRIGHT                           (c) Copyright 1993                        University of Tennessee                          All Rights Reserved                           *************************************************************************/#include <cmath>#include <cstdio>#include <cstdlib>#include <cerrno>#include <cstring>#include <unistd.h>#include <fcntl.h>#include "s_tms1.h"using namespace ssvd;/*********************************************************************** *                       tsvd1()                                       * *     Sparse SVD Via Eigensystem of Equivalent 2-Cyclic Matrix        * *                  (double precision)                                 * ***********************************************************************   Description   -----------   tsvd1 implements a trace-minimization svd method for determining   the p-largest singular triplets of a large sparse matrix A.   tms1 computes the singular triplets of the matrix a via the    equivalent symmetric eigenvalue problem.          [alpha*I     A]   B = [             ] , where A is m (=nrow) by n (=ncol),       [A'    alpha*I]       so that {u,abs(lambda-alpha),v} is a singular triplet of A.   alpha is chosen so that B is (symmetric) positive definite. In   the calling program, alpha is determined as the matrix 1-norm of   A.  The user can set alpha to be any known upper bound for the   the largest singular values of the matrix A.   (A' = transpose of A)        User supplied routines: opat,opb,timer    opat(x,y)                takes an m-vector x and should return A'*x                            in y.   opb(nrow+ncol,x,y,shift) takes an (m+n) vector x and should return                            D*x in y, where D=[B-shift*I], I is the                             (m+n) order identity matrix.   User should replace calls to timer() with an appropriate call to   an intrinsic timing routine that returns delta user cpu time   (i.e., user cpu time elapsed from a previous call to timing routine)   tsvd1 utilizes ritz-shifting for acceleration of convergence.   External parameters   -------------------   Defined and documented in tmsg.h   Local parameters:   ----------------   (input)    n               order of equivalent symmetric eigenvalue problem                   should be equal to nrows+ncols, where nrows is the                   number of rows of A and ncols is the number of                   columns of A.   p               number of desired singular triplets (largest) of A.   s               dimension of initial subspace (s should be greater                   than p;s=2*p is usually safe but since the complexity                   of the method is determined by s, the user should                   try to keep s as close to p as possible).   job             acceleration strategy switch:                   job = 0, no acceleration used,                       = 1, ritz-shifting used.   maxi            maximum number of trace minimization steps allowed.   tol             user-supplied tolerance for residual of an                   equivalent eigenpair of B (singular triplet of A).   red             user-supplied tolerance for residual reduction to                   initiate ritz-shifting when job=1.   lwork(s)        (logical 0,1) work array.   (output parameters):    -----------------   ierr            ierr=99, input parameter job invalid.   (error flag)   mem             number of bytes of memory needed   mxv(3)          matrix-vector multiplication counters:                   mxv[0] = number of A*x,                   mxv[1] = number of A'*x,                   mxv[2] = mxv[0] + mxv[1]   sig(s)          contains p-desired singular values.   y(n,s)          first p columns contains left and right singular                   vectors, i.e.,                   y(1:n,1:p)  = [u(1:r,1:p)' | v(1:c,1:p)']',                   where r = no. of rows of matrix A and                   c = no. of cols of matrix A.   titer(s)        titer(i) := number of trace min. steps                   need for i-th triplet of A.   itcgt(s)        itcgt(i) := number of cg steps needed for                   i-th triplet of A.   time(5)         timing breakdown array:                   time[0]  = gram-schmidt orthogonalization                   time[1]  = section-formation (spectral decomposition)                   time[2]  = convergence criteria                   time[3]  = conjugate gradient method                   time[4]  = total execution time (user-cpu)   res(s)          2-norms of residual vectors                   (A*y[i]-sig(i)*y[i]), i=1,2,...,p.   Functions used   --------------    BLAS       daxpy, ddot, dgemm, dgemv    USER       timer,opb,opat    RNG        random    PRECISION  epslon    MATH       dsqrt,fabs    EISPACK    tred2,tql2,pythag   Precision   ---------   All floating-point calculations use double precision;   variables are declared as long or double. */long s_tms1::tsvd1(FILE* fp, long n, long p, long s, long job,           double tol, double red, double *sig, long maxi,           long *mem, long *itcgt, long *titer, double *time,           double *res, long *mxv, double **work1, double **work2,           double **work3, double **work4, double **work5,           double **y, long **iwork, long *lwork){  double  t1,sec1,sec2,sec21,sec22,sec23,sec3,sec4,meps,total;  double  *tempptr5,*workptr5;  long    iptr, left, i, j, k, l, irand, iter, ierr;  long    shift, memsize, length, temp_left;  /* allocate memory for temporary arrays in cgt and cgts subr. */  memsize = sizeof(double) * n * 6;  if(!(workptr5 = (double *) malloc(memsize))) {    perror("MALLOC FAILED IN TSVD1()\n");    exit(errno);  }  *mem += memsize;  tempptr5 = workptr5;  z = tempptr5;  tempptr5 += n;  v2 = tempptr5;  tempptr5 +=n;  r = tempptr5;  tempptr5 += n;  r2 = tempptr5;  tempptr5 += n;  pp = tempptr5;  tempptr5 += n;  z2 = tempptr5;  /* get machine epsilon (meps) */  meps  = epslon(ONE);  shift = FALSE;  ierr  = 0;  if((job != 0) &&  (job != 1)) {     ierr = 99;     return(ierr);  }  if(job == 1) {     for(i=0; i<p; i++) {         lwork[i]    =  FALSE;         work5[2][i] = -ONE;         work5[3][i] = -ONE;     }  }  /* initialize timers and counters: */  sec1  = ZERO;  sec21 = ZERO;  sec22 = ZERO;  sec23 = ZERO;  sec3  = ZERO;  sec4  = ZERO;  mxv[0] = 0;  mxv[1] = 0;  /* initialize y(1:n,1:s) = random matrix     (carry s vectors in the tmin iterations, assuming s.ge.p) */  irand = SEED;  for(k=0; k<s; k++) {     sig[k]      = ZERO;     work5[1][k] = ZERO;  }  for(k=0; k<s; k++) {       for (i=0; i<n; i++) y[k][i] = mrandom(&irand);  }  /*----------------------------------------------------------       pointer and counter for hybrid monitor:                1 2 3 4 5 ... i ... p             -----------------------         sig:| | | | | |...| |...| | (ascending order)             -----------------------                            ^                            |                          iptr : points to first e-value of B                                 that has not converged         left:=s-iptr+1 (ie how many y-vectors remain for tsvd1)  -------------------------------------------------------------- */    /* initialize a few pointers and counters: */  total = ZERO;  iptr = 1;  left = s;  for(i=0; i<s; i++) {      titer[i] = 0;      itcgt[i] = 0;  }  /*--------------------------------------------------------------       main tmin iteration loop (nmax iterations)   --------------------------------------------------------------*/  for(iter=0; iter<maxi; iter++) {     t1 = timer();     for(i=0; i<s; i++) {      for(j=0; j<n; j++) work2[i][j] = y[i][j];     }     orthg(s,0,n,work1,work2,&work4[0][0]);     t1    = timer() - t1;     sec1 += t1;       /* form work1(1:n,iptr:s) = b(1:n,1:n)*work2(1:n,iptr:s) */       t1 = timer();       /* economization of 2-cyclic form used here */       for(i=iptr-1; i<s; i++) opat(work2[i], work1[i]);     mxv[1] += s - iptr + 1;       /* form work3(1:left,1:left)=work2(1:n,iptr:s)'*work1(1:n,iptr:s)*/     dgemm3(TRANSP, NTRANSP, left, left, ncol, ONE, work2, nrow,iptr-1,            work1, 0, iptr-1, ZERO, work3, 0, 0);          for(j=0; j<left; j++) {         for(i=j; i<left; i++) work3[j][i] += work3[i][j];         for(i=j; i<left; i++) work3[i][j] = work3[j][i];         work3[j][j] += alpha;     }     /* load gershgorin radii */     if(job == 1) {       for (j=0; j<left; j++)       work4[0][j] = dasum(left, &work3[0][j], s) - fabs(work3[j][j]);     }     t1     = timer() - t1;     sec21 += t1;     /* compute the eigenvalues and eigenvectors of work3:  */          t1 = timer();     /* eigenvectors overwrite array work3(:,:)        store current e-values in work5(:,2)  */          for(j=iptr-1; j<s; j++) work5[1][j] = sig[j];          tred2(0,left, work3, &sig[iptr-1], &work4[1][0], work3);     ierr = tql2(0,left, &sig[iptr-1], &work4[1][0], work3);          t1 = timer() - t1;     sec22 += t1;          /* form y(1:n,iptr:s) = work2(1:n,iptr:s)*work3(1:left,1:left) */     t1 = timer();     dgemm3(NTRANSP, NTRANSP, n, left, left, ONE, work2, 0, iptr-1,            work3, 0, 0, ZERO, y, 0, iptr-1);     t1     = timer() - t1;     sec23 += t1;          /* test for convergence here */          t1 = timer();     for(j=iptr-1; j<s; j++) {         myopb(n, y[j], work2[j], ZERO);         daxpy(n, -sig[j], y[j], 1, work2[j],1);         work4[2][j-iptr+1]  = sqrt(ddot(n, work2[j], 1,work2[j],1));         if(j < p) titer[j] += 1;     }     mxv[0] += s-iptr+1;     mxv[1] += s-iptr+1;     t1 = timer() - t1;     sec3 += t1;    /* array work4(:,3) stores the vector 2-norms of residual vectors */     temp_left=left;      for(k=0; k<temp_left; k++)         if(fabs(work4[2][k]) <= tol) {            iptr = iptr + 1;            if(iptr > p) goto L10;            left = s-iptr+1;         }     if((!shift) && (job==1)) {            /* work4(:,1) stores gershgorin radii  *        * iwork(:,1) is the clustering vector */        disk(left, &sig[iptr-1], work4[0], iwork[0], iwork[1]);              for(k=iptr-1; k<s; k++) {          if(iwork[0][k-iptr+1] == 1)            isol(k, work4[2][k-iptr+1], red, lwork, work5[2],                 work5[3], s);          else {             if(iwork[0][k-iptr+1] > 1)               clus(k, iwork[0][k-iptr+1], &work4[2][k-iptr+1], red,                    lwork, work5[2], work5[3], work4[1], s);          }       }     }            /* continue algorithm...  */            /* shift start */          if((iter>0) && (!shift) && (job==1)) {        if(lwork[iptr-1]) {           shift = TRUE;           orthg(s, 0, n, work1, y, &work4[0][0]);        }     }     if(shift) {          /* compute shifts in work5(:,1) here: */        for(k=iptr-1; k<s; k++) {         if((k>iptr-1) && (k<=p-1)) {           work5[0][k] = ZERO;           for(j=iptr-2; j<k; j++) {              if((j!=-1) && (sig[j] <= (sig[k] - work4[2][k-iptr+1])))                 work5[0][k] = sig[j];           }           if((work5[0][k] == sig[k-1]) && (work5[0][k-1]==sig[k-1]))             work5[0][k] = sig[k];           if(work5[0][k] == ZERO)              work5[0][k] = work5[0][k-1];         }         else if(k>p-1) work5[0][k] = work5[0][p-1];              else if(k==iptr-1) work5[0][k] = sig[k];      }     }     t1 = timer();          /* do cg iterations */     /* load y into work1 array for orthog. projector in subr. cgt */     for(j=0; j<left; j++) {        for(i=0; i<n; i++) work1[j][i] = y[iptr+j-1][i];     }               /* cg loop for independent systems (no shifting used)  */     if(!shift)        for(i=0; i<left; i++) {           cgt(n, left, y[iptr+i-1], work1, &itcgt[iptr+i-1],               sig[iptr+i-1], sig[s-1], &iwork[0][i], meps);       }     else         /* shift with work5(:,1) in cg iterations */       for(i=0; i<left; i++) {          cgts(n, left, y[iptr+i-1], work1, &itcgt[iptr+i-1],               sig[iptr+i-1], work5[1][iptr+i-1], sig[s-1],               work5[0][iptr+i-1], &iwork[0][i], meps);       }     for(i=0; i<left; i++) {        mxv[0] += iwork[0][i];        mxv[1] += iwork[0][i];     }     t1    = timer() - t1;     sec4 += t1;  }L10:        /* compute final 2-norms of residual vectors w.r.t. b (indefinite) */   for(j=0; j<p; j++) {     myopb(n, y[j], work2[j], alpha);     sig[j]=ddot(n, y[j], 1, work2[j], 1)/ddot( n, y[j], 1, y[j], 1);     daxpy(n, -sig[j], y[j], 1, work2[j],1);     work4[2][j] = sqrt(ddot(n, work2[j], 1,work2[j],1));     sig[j]      = fabs(sig[j]);   }   mxv[0] += p;   mxv[1] += p;      sec2   = sec21 + sec22 + sec23;   total += sec1 + sec2 + sec3 + sec4;      /* load output time and mxv arrays */   time[0] = sec1;   time[1] = sec2;   time[2] = sec3;   time[3] = sec4;   time[4] = total;   mxv[2]  = mxv[0]+mxv[1];    /* load residual vector */  for(i=0; i<p; i++) res[i] = work4[2][i];  return(0);}
💿 文件大小 82 K
👤 上传用户 tiantianyuehui
📂 所属分类数学计算
🏷️ 相关标签

#svd #矩阵 #分解 #版本
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -