📄 tms2.c

📁 求矩阵奇异分解svd算法
💻 C
📖 第 1 页 / 共 5 页
字号:
   fprintf(fp_out1,"... CONJUGATE GRADIENT            =%10.2E (%3ld%%)\n",           time[3], itime[3]);   fprintf(fp_out1,"... TOTAL CPU  TIME (SEC)         =%10.2E\n", time[4]);   fprintf(fp_out1,"... WALL-CLOCK TIME (SEC)         =%10ld\n\n",iwall);   fprintf(fp_out1, " %s\n", title);   fprintf(fp_out1, "           %s\n", name);   fprintf(fp_out1, " ... NO. OF TERMS     (ROWS)   = %10ld\n", nrow);   fprintf(fp_out1, " ... NO. OF DOCUMENTS (COLS)   = %10ld\n", ncol);   fprintf(fp_out1, " ... ORDER OF MATRIX A         = %10ld\n", n);   fprintf(fp_out1,"......\n");   fprintf(fp_out1,"...... COMPUTED SINGULAR VALUES  (RESIDUAL NORMS)  T-MIN STEPS CG STEPS\n");    fprintf(fp_out1,"......\n");   for(i=0; i<p; i++)   fprintf(fp_out1, "... %2ld  %24.14E ( %10.2E)      %3ld           %3ld \n", i+1, sig[i], res[i],titer[i],itcgt[i]);    if (vec) {      for (i = 0;  i < p;  i++)      write(fp_out2, (void *) &y[i][0], sizeof(double) * (nrow+ncol)); }   free(value);   free(pointr);   fclose(fp_in1);   fclose(fp_in2);   fclose(fp_out1);   if (vec) close(fp_out2);   free(workptr1);   free(workptr2);   free(workptr3);   free(workptr4);   exit(0);}#include <stdio.h>#include <math.h>#include <errno.h>#include <fcntl.h>#include "tmsc.h"extern long nrow, ncol;extern double alpha;extern double  *z,*v2,*r,*r2,*pp,*z2,alpha,*zz;double epslon(double);double random(long *);long   imin(long, long);long   imax(long, long);void   dscal(long, double, double *, long);void   dtrsm(char, char, long, char, long, long, double, double **, double **);void   porth(long, long, long, double **, double *, double *, double *,           double *, double *, double, long, double);void   pmul(long, double *, double *, double *, double *,          double *, double, long, double);void   orthg(long, long, long, double **, double **, double *);void   opat(double *,double *);void   opa(double *, double *);void   dgemm3(long, long, long, long, long, double, double **, long,            long, double **, long, long, double, double **, long, long);double dasum(long, double *, long);float  timer(void);void   opb(long, double *, double *,double);void   daxpy(long, double, double *,long, double *, long);void   tred2(long, long, double **, double *, double *, double **);long   tql2(long, long, double *, double *, double **);void   disk(long, double *, double *, long *, long *);void   isol(long, double, double, long *, double *, double *, long);void   clus(long, long, double *, double, long *, double *, double *,            double *, long);void   cgt(long, long, double *, double **, long *, double, double,           long *, double);void   cgts(long, long, double *, double **, long *, double, double,            double, double, long *, double);double ddot(long, double *, long, double *, long);#ifdef UNIX_CREAT#elsefloat acosh(float);#endif/*********************************************************************** *                                                                     * *                       tsvd2()                                       * *     Sparse SVD Via Eigensystem of Equivalent 2-Cyclic Matrix        * *                  (double precision)                                 * *                                                                     * ***********************************************************************   Description   -----------   tsvd2 implements a trace-minimization svd method for determining   the p-largest singular triplets of a large sparse matrix A.   tms2 computes the singular triplets of the matrix A via the    equivalent symmetric eigenvalue problem.      B = (alpha*alpha)*I - A'A, where A is m (=nrow) by n (=ncol),   so that sqrt(alpha*alpha-lambda) is a singular value of A.   alpha is chosen so that B is (symmetric) positive definite.  In   the calling program, alpha is determined as the matrix 1-norm of   A.  the user can set alpha to be any known upper bound for the   the largest singular value of the matrix A.   (A' = transpose of A)        User supplied routines: opat, opb,timer,store    opat(x,y)                takes an m-vector x and should return A'*x                            in y.   opb(nrow+ncol,x,y,shift) takes an (m+n)-vector x and should return                            D*x in y, where D=[B-shift*I], I is the                             (m+n) order identity matrix.   user should replace calls to timer() with an appropriate call to   an instrinsic timing routine that returns delta user cpu time   (i.e., user cpu time elapsed from a previous call to timing routine)   tsvd2 utilizes ritz-shifting and chebyshev polynomials for acceleration of    convergence.   External parameters   -------------------   Defined and documented in tmsg.h   Local parameters:   ----------------   (input)    n               order of equivalent symmetric eigenvalue problem                   should be equal to min(nrow,ncol), where nrow is the                   number of rows of A and ncol is the number of columns of A.   p               number of desired singular triplets (largest) of A.   s               dimension of initial subspace (s should be great er                   than p; s=2*p is usually safe but since the comp lexity                   of the method is determined by s, the user should                   try to keep s as close to p as possible).   job             acceleration strategy switch:                   job = 0, no acceleration used,                       = 1, ritz-shifting used.                       = 2, chebyshev polynomials and ritz-shifting used.   maxi            maximum number of trace minimization steps allowed.   tol             user-supplied tolerance for residual of an                   equivalent eigenpair of B (singular triplet of A).   red             user-supplied tolerance for residual reduction to                   initiate ritz-shifting when job=1 or job=2.   lwork(s)        (logical 0,1) work array.   (output parameters):    -----------------   ierr            ierr=99, input parameter job invalid.   (error flag)   mem             number of bytes of memory needed      maxd            maximum polynomial degree used (job = 2).   mxv(3)          matrix-vector multiplication counters:                   mxv[0] = number of A*x,                   mxv[1] = number of A'*x,                   mxv[2] = mxv[0] + mxv[1]   sig(s)          contains p-desired singular values.   y(nrow+ncol,s)  first p columns contain left and right singular                   vectors, i.e.,                   y(1:nrow+ncol,1:p)  = [u(1:r,1:p)' | v(1:c,1:p)']',                   where r = no. of rows of matrix A and                   c = no. of cols of matrix A.   titer(s)        titer(i) := number of trace min. steps                   need for i-th triplet of A.   itcgt(s)        itcgt(i) := number of cg steps needed for                   i-th triplet of A.   time(5)         timing breakdown array:                   time[0]  = gram-schmidt orthogonalization                   time[1]  = section-formation (spectral decomposition)                   time[2]  = convergence criteria                   time[3]  = conjugate gradient method                   time[4]  = total execution time (user-cpu)   res(s)          2-norms of residual vectors                   (A*y[i]-sig(i)*y[i]), i=1,2,...,p.   Functions used   --------------    BLAS       daxpy, ddot, dgemm3, dgemv, dgemv2, xerbla,               pmul, dtrsm, porth    USER       timer,opb,opat,acosh (if not in math library)    RNG        random    PRECISION  epslon    MATH       sqrt,fabs,acosh    EISPACK    tred2,tql2,pythag   Precision   ---------   All floating-point calculations use double precision;   variables are declared as long and double. */long tsvd2(FILE *fp_out1, long n, long p, long s, long job,           double tol, double red, double *sig, long maxi,           long *mem, long *itcgt, long *titer,double *time,           double *res, long *mxv, double **work1, double **work2,           double **work3, double **work4, double **work5,          double **y, long **iwork, long *lwork,long *maxd){  double  *workptr5,**workptr6,total,*tempptr5,**tempptr6,meps;  double  tmp,tmpi,pparm1,pparm2,e,enew;  double  t1,sec1,sec2,sec21,sec22,sec23,sec3,sec4;  long  degree,ndegre,itmp,ierr,iptr,left,i,j,k,l,irand,iter;  long  shift,memsize,length,temp_left,polyac;  /* allocate memory for temporary arrays in subroutines cgt() and cgts() */  memsize = sizeof(double) * n * 6;  if(!(workptr5 = (double *) malloc(memsize))) {    perror("FIRST MALLOC FAILED IN TSVD2()\n");    exit(errno);  }  *mem += memsize;  tempptr5 = workptr5;  z = tempptr5;  tempptr5 += n;  v2 = tempptr5;  tempptr5 +=n;  r = tempptr5;  tempptr5 += n;  r2 = tempptr5;  tempptr5 += n;  pp = tempptr5;  tempptr5 += n;  z2 = tempptr5;  /* allocate memory for temporary array in subroutine opb */  memsize = sizeof(double) * (ncol+nrow);  if(!(zz = (double *) malloc(memsize))) {     perror("SECOND MALLOC FAILED IN TSVD2()\n");     exit(errno);    }  *mem += memsize;  /*get machine epsilon (meps) */  meps = epslon(ONE);  shift = FALSE;  ierr = 0;  if(job && (job != 1) && (job != 2)) {     ierr = 99;     return(ierr);  }  if(job == 1) {    for(i=0; i<p; i++) {        lwork[i] = FALSE;        work5[2][i] = -ONE;        work5[3][i] = -ONE;    }  }  else {    if(job==2) {         degree = 0;         ndegre = 0;         pparm1 = 0.04;         pparm2 = 4.0;    }  }  /*initialize timers,counters and flags: */  sec1 = ZERO;  sec21 = ZERO;  sec22 = ZERO;  sec23 = ZERO;  sec3 = ZERO;  sec4 = ZERO;  mxv[0] = 0;  mxv[1] = 0;  polyac = FALSE;  /*initialize y(1:n,1:s) = random matrix     (carry s vectors in the tmin iterations, assuming s.ge.p) */  irand =SEED;  for(k=0; k<s; k++) {     sig[k] = ZERO;     work5[1][k] = ZERO;  }  for(k=0; k<s; k++) {      for (i=0; i<n; i++) y[k][i] = random(&irand);  }  /*----------------------------------------------------------       pointer and counter for hybrid monitor:                1 2 3 4 5 ... i ... p             -----------------------         sig:| | | | | |...| |...| | (ascending order)             -----------------------                            ^                            |                          iptr : points to first e-value of B                                 that has not converged         left:=s-iptr+1 (ie how many y-vectors remain for tsvd2)  -------------------------------------------------------------- */    /* initialize a few pointers and counters: */  total = ZERO;  iptr = 1;  left = s;  for(i=0; i<s; i++) {      titer[i] = 0;      itcgt[i] = 0;  }  /*--------------------------------------------------------------       main tmin iteration loop (nmax iterations)   --------------------------------------------------------------*/  for(iter=0; iter<maxi; iter++) {     if((job==2) && (!shift)) {        if(ndegre) {         degree = ndegre;         *maxd = imax(*maxd,degree);         polyac = TRUE;       }       else polyac = FALSE;     }/* section formation */     t1 = timer();     for(j=0; j<s; j++) {        for(i=0; i<n; i++) work2[j][i] = y[j][i];     }     if(polyac) {       porth(s,0,n,work2,work1[0],work1[1],work4[0],work4[1],             work4[2],e,degree,alpha);       mxv[0] += degree*s;       mxv[1] += degree*s;     }     else {       orthg(s,0,n,work1,work2,&work4[0][0]);     }   t1 = timer() - t1;   sec1 = sec1 + t1;       /* form work1(1:n,iptr:s) = b(1:n,1:n)*work2(1:n,iptr:s) */       t1 = timer();    if(polyac) {    for(j=0; j<left; j++) {       for(i=0; i<n; i++) work1[iptr+j-1][i] = work2[iptr+j-1][i];    }  }  else {     for(j=0; j<left; j++)        opb(n,work2[iptr+j-1],work1[iptr+j-1],ZERO);  } /* form work3(1:left,1:left) = work2(1:n,iptr:s)'*work1(1:n,iptr:s) */  dgemm3(TRANSP, NTRANSP, left, left, n, ONE, work2, 0,iptr-1,         work1, 0, iptr-1, ZERO, work3, 0, 0);       if(job >= 1) {    for(j=0; j<left; j++)       work4[0][j] = dasum(left, &work3[0][j], s) - fabs(work3[j][j]);  }     t1 = timer() - t1;     sec21 = sec21 + t1;     /*    compute the eigenvalues and eigenvectors of work3:  */          t1 = timer();     /*    eigenvectors overwrite array work3(:,:)         store current e-values in work5(:,2)  */  if(polyac) {    tred2(0,left,work3,work5[0],work4[1],work3);    ierr = tql2(0,left,work5[0],work4[1],work3);    if(ierr) printf("IERR FROM TQL2 = %ld\n",ierr);  }  else {          for(j=iptr-1; j<s; j++) work5[1][j] = sig[j];          tred2(0,left, work3, &sig[iptr-1], &work4[1][0], work3);     ierr = tql2(0,left, &sig[iptr-1], &work4[1][0], work3);    if(ierr) printf("IERR FROM TQL2 = %ld\n",ierr);  }   t1 = timer() - t1;  sec22 = sec22 + t1;          /* form y(1:n,iptr:s) = work2(1:n,iptr:s)*work3(1:left,1:left) */     t1 = timer();  dgemm3(NTRANSP, NTRANSP, n, left, left, ONE, work2, 0, iptr-1,           work3, 0, 0, ZERO, y, 0, iptr-1);     t1 = timer() - t1;     sec23 = sec23 + t1;          /*    test for convergence here */          t1 = timer();     for(j=iptr-1; j<s; j++) {        opb(n, y[j], work2[j], ZERO);        if(polyac) {          work5[1][j]=sig[j];          sig[j] = ddot(n,y[j],1,work2[j],1)/ddot(n,y[j],1,y[j],1);        }        daxpy(n, -sig[j], y[j], 1, work2[j],1);        work4[2][j-iptr+1] = sqrt(ddot(n,work2[j],1,work2[j],1));        if(j < p) titer[j] += 1;     }     mxv[0] += s-iptr+1;     mxv[1] += s-iptr+1;     t1 = timer() - t1;     sec3 = sec3 + t1;    temp_left=left;     /* array work4(:,3) stores the vector 2-norms of residual vectors */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -