📄 s_tms1.cc
字号:
// File: s_tms1.cc -- Implements the tms1 class// Author: Suvrit Sra// Date: 15 Nov, 2003/************************************************************************* THE ORIGINAL SVDPAKC COPYRIGHT (c) Copyright 1993 University of Tennessee All Rights Reserved *************************************************************************/#include <cmath>#include <cstdio>#include <cstdlib>#include <cerrno>#include <cstring>#include <unistd.h>#include <fcntl.h>#include "s_tms1.h"using namespace ssvd;/*********************************************************************** * tsvd1() * * Sparse SVD Via Eigensystem of Equivalent 2-Cyclic Matrix * * (double precision) * *********************************************************************** Description ----------- tsvd1 implements a trace-minimization svd method for determining the p-largest singular triplets of a large sparse matrix A. tms1 computes the singular triplets of the matrix a via the equivalent symmetric eigenvalue problem. [alpha*I A] B = [ ] , where A is m (=nrow) by n (=ncol), [A' alpha*I] so that {u,abs(lambda-alpha),v} is a singular triplet of A. alpha is chosen so that B is (symmetric) positive definite. In the calling program, alpha is determined as the matrix 1-norm of A. The user can set alpha to be any known upper bound for the the largest singular values of the matrix A. (A' = transpose of A) User supplied routines: opat,opb,timer opat(x,y) takes an m-vector x and should return A'*x in y. opb(nrow+ncol,x,y,shift) takes an (m+n) vector x and should return D*x in y, where D=[B-shift*I], I is the (m+n) order identity matrix. User should replace calls to timer() with an appropriate call to an intrinsic timing routine that returns delta user cpu time (i.e., user cpu time elapsed from a previous call to timing routine) tsvd1 utilizes ritz-shifting for acceleration of convergence. External parameters ------------------- Defined and documented in tmsg.h Local parameters: ---------------- (input) n order of equivalent symmetric eigenvalue problem should be equal to nrows+ncols, where nrows is the number of rows of A and ncols is the number of columns of A. p number of desired singular triplets (largest) of A. s dimension of initial subspace (s should be greater than p;s=2*p is usually safe but since the complexity of the method is determined by s, the user should try to keep s as close to p as possible). job acceleration strategy switch: job = 0, no acceleration used, = 1, ritz-shifting used. maxi maximum number of trace minimization steps allowed. tol user-supplied tolerance for residual of an equivalent eigenpair of B (singular triplet of A). red user-supplied tolerance for residual reduction to initiate ritz-shifting when job=1. lwork(s) (logical 0,1) work array. (output parameters): ----------------- ierr ierr=99, input parameter job invalid. (error flag) mem number of bytes of memory needed mxv(3) matrix-vector multiplication counters: mxv[0] = number of A*x, mxv[1] = number of A'*x, mxv[2] = mxv[0] + mxv[1] sig(s) contains p-desired singular values. y(n,s) first p columns contains left and right singular vectors, i.e., y(1:n,1:p) = [u(1:r,1:p)' | v(1:c,1:p)']', where r = no. of rows of matrix A and c = no. of cols of matrix A. titer(s) titer(i) := number of trace min. steps need for i-th triplet of A. itcgt(s) itcgt(i) := number of cg steps needed for i-th triplet of A. time(5) timing breakdown array: time[0] = gram-schmidt orthogonalization time[1] = section-formation (spectral decomposition) time[2] = convergence criteria time[3] = conjugate gradient method time[4] = total execution time (user-cpu) res(s) 2-norms of residual vectors (A*y[i]-sig(i)*y[i]), i=1,2,...,p. Functions used -------------- BLAS daxpy, ddot, dgemm, dgemv USER timer,opb,opat RNG random PRECISION epslon MATH dsqrt,fabs EISPACK tred2,tql2,pythag Precision --------- All floating-point calculations use double precision; variables are declared as long or double. */long s_tms1::tsvd1(FILE* fp, long n, long p, long s, long job, double tol, double red, double *sig, long maxi, long *mem, long *itcgt, long *titer, double *time, double *res, long *mxv, double **work1, double **work2, double **work3, double **work4, double **work5, double **y, long **iwork, long *lwork){ double t1,sec1,sec2,sec21,sec22,sec23,sec3,sec4,meps,total; double *tempptr5,*workptr5; long iptr, left, i, j, k, l, irand, iter, ierr; long shift, memsize, length, temp_left; /* allocate memory for temporary arrays in cgt and cgts subr. */ memsize = sizeof(double) * n * 6; if(!(workptr5 = (double *) malloc(memsize))) { perror("MALLOC FAILED IN TSVD1()\n"); exit(errno); } *mem += memsize; tempptr5 = workptr5; z = tempptr5; tempptr5 += n; v2 = tempptr5; tempptr5 +=n; r = tempptr5; tempptr5 += n; r2 = tempptr5; tempptr5 += n; pp = tempptr5; tempptr5 += n; z2 = tempptr5; /* get machine epsilon (meps) */ meps = epslon(ONE); shift = FALSE; ierr = 0; if((job != 0) && (job != 1)) { ierr = 99; return(ierr); } if(job == 1) { for(i=0; i<p; i++) { lwork[i] = FALSE; work5[2][i] = -ONE; work5[3][i] = -ONE; } } /* initialize timers and counters: */ sec1 = ZERO; sec21 = ZERO; sec22 = ZERO; sec23 = ZERO; sec3 = ZERO; sec4 = ZERO; mxv[0] = 0; mxv[1] = 0; /* initialize y(1:n,1:s) = random matrix (carry s vectors in the tmin iterations, assuming s.ge.p) */ irand = SEED; for(k=0; k<s; k++) { sig[k] = ZERO; work5[1][k] = ZERO; } for(k=0; k<s; k++) { for (i=0; i<n; i++) y[k][i] = mrandom(&irand); } /*---------------------------------------------------------- pointer and counter for hybrid monitor: 1 2 3 4 5 ... i ... p ----------------------- sig:| | | | | |...| |...| | (ascending order) ----------------------- ^ | iptr : points to first e-value of B that has not converged left:=s-iptr+1 (ie how many y-vectors remain for tsvd1) -------------------------------------------------------------- */ /* initialize a few pointers and counters: */ total = ZERO; iptr = 1; left = s; for(i=0; i<s; i++) { titer[i] = 0; itcgt[i] = 0; } /*-------------------------------------------------------------- main tmin iteration loop (nmax iterations) --------------------------------------------------------------*/ for(iter=0; iter<maxi; iter++) { t1 = timer(); for(i=0; i<s; i++) { for(j=0; j<n; j++) work2[i][j] = y[i][j]; } orthg(s,0,n,work1,work2,&work4[0][0]); t1 = timer() - t1; sec1 += t1; /* form work1(1:n,iptr:s) = b(1:n,1:n)*work2(1:n,iptr:s) */ t1 = timer(); /* economization of 2-cyclic form used here */ for(i=iptr-1; i<s; i++) opat(work2[i], work1[i]); mxv[1] += s - iptr + 1; /* form work3(1:left,1:left)=work2(1:n,iptr:s)'*work1(1:n,iptr:s)*/ dgemm3(TRANSP, NTRANSP, left, left, ncol, ONE, work2, nrow,iptr-1, work1, 0, iptr-1, ZERO, work3, 0, 0); for(j=0; j<left; j++) { for(i=j; i<left; i++) work3[j][i] += work3[i][j]; for(i=j; i<left; i++) work3[i][j] = work3[j][i]; work3[j][j] += alpha; } /* load gershgorin radii */ if(job == 1) { for (j=0; j<left; j++) work4[0][j] = dasum(left, &work3[0][j], s) - fabs(work3[j][j]); } t1 = timer() - t1; sec21 += t1; /* compute the eigenvalues and eigenvectors of work3: */ t1 = timer(); /* eigenvectors overwrite array work3(:,:) store current e-values in work5(:,2) */ for(j=iptr-1; j<s; j++) work5[1][j] = sig[j]; tred2(0,left, work3, &sig[iptr-1], &work4[1][0], work3); ierr = tql2(0,left, &sig[iptr-1], &work4[1][0], work3); t1 = timer() - t1; sec22 += t1; /* form y(1:n,iptr:s) = work2(1:n,iptr:s)*work3(1:left,1:left) */ t1 = timer(); dgemm3(NTRANSP, NTRANSP, n, left, left, ONE, work2, 0, iptr-1, work3, 0, 0, ZERO, y, 0, iptr-1); t1 = timer() - t1; sec23 += t1; /* test for convergence here */ t1 = timer(); for(j=iptr-1; j<s; j++) { myopb(n, y[j], work2[j], ZERO); daxpy(n, -sig[j], y[j], 1, work2[j],1); work4[2][j-iptr+1] = sqrt(ddot(n, work2[j], 1,work2[j],1)); if(j < p) titer[j] += 1; } mxv[0] += s-iptr+1; mxv[1] += s-iptr+1; t1 = timer() - t1; sec3 += t1; /* array work4(:,3) stores the vector 2-norms of residual vectors */ temp_left=left; for(k=0; k<temp_left; k++) if(fabs(work4[2][k]) <= tol) { iptr = iptr + 1; if(iptr > p) goto L10; left = s-iptr+1; } if((!shift) && (job==1)) { /* work4(:,1) stores gershgorin radii * * iwork(:,1) is the clustering vector */ disk(left, &sig[iptr-1], work4[0], iwork[0], iwork[1]); for(k=iptr-1; k<s; k++) { if(iwork[0][k-iptr+1] == 1) isol(k, work4[2][k-iptr+1], red, lwork, work5[2], work5[3], s); else { if(iwork[0][k-iptr+1] > 1) clus(k, iwork[0][k-iptr+1], &work4[2][k-iptr+1], red, lwork, work5[2], work5[3], work4[1], s); } } } /* continue algorithm... */ /* shift start */ if((iter>0) && (!shift) && (job==1)) { if(lwork[iptr-1]) { shift = TRUE; orthg(s, 0, n, work1, y, &work4[0][0]); } } if(shift) { /* compute shifts in work5(:,1) here: */ for(k=iptr-1; k<s; k++) { if((k>iptr-1) && (k<=p-1)) { work5[0][k] = ZERO; for(j=iptr-2; j<k; j++) { if((j!=-1) && (sig[j] <= (sig[k] - work4[2][k-iptr+1]))) work5[0][k] = sig[j]; } if((work5[0][k] == sig[k-1]) && (work5[0][k-1]==sig[k-1])) work5[0][k] = sig[k]; if(work5[0][k] == ZERO) work5[0][k] = work5[0][k-1]; } else if(k>p-1) work5[0][k] = work5[0][p-1]; else if(k==iptr-1) work5[0][k] = sig[k]; } } t1 = timer(); /* do cg iterations */ /* load y into work1 array for orthog. projector in subr. cgt */ for(j=0; j<left; j++) { for(i=0; i<n; i++) work1[j][i] = y[iptr+j-1][i]; } /* cg loop for independent systems (no shifting used) */ if(!shift) for(i=0; i<left; i++) { cgt(n, left, y[iptr+i-1], work1, &itcgt[iptr+i-1], sig[iptr+i-1], sig[s-1], &iwork[0][i], meps); } else /* shift with work5(:,1) in cg iterations */ for(i=0; i<left; i++) { cgts(n, left, y[iptr+i-1], work1, &itcgt[iptr+i-1], sig[iptr+i-1], work5[1][iptr+i-1], sig[s-1], work5[0][iptr+i-1], &iwork[0][i], meps); } for(i=0; i<left; i++) { mxv[0] += iwork[0][i]; mxv[1] += iwork[0][i]; } t1 = timer() - t1; sec4 += t1; }L10: /* compute final 2-norms of residual vectors w.r.t. b (indefinite) */ for(j=0; j<p; j++) { myopb(n, y[j], work2[j], alpha); sig[j]=ddot(n, y[j], 1, work2[j], 1)/ddot( n, y[j], 1, y[j], 1); daxpy(n, -sig[j], y[j], 1, work2[j],1); work4[2][j] = sqrt(ddot(n, work2[j], 1,work2[j],1)); sig[j] = fabs(sig[j]); } mxv[0] += p; mxv[1] += p; sec2 = sec21 + sec22 + sec23; total += sec1 + sec2 + sec3 + sec4; /* load output time and mxv arrays */ time[0] = sec1; time[1] = sec2; time[2] = sec3; time[3] = sec4; time[4] = total; mxv[2] = mxv[0]+mxv[1]; /* load residual vector */ for(i=0; i<p; i++) res[i] = work4[2][i]; return(0);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -