📄 tms2.c
字号:
fprintf(fp_out1, " %s\n", name); fprintf(fp_out1, " ... NO. OF DOCUMENTS (ROWS) = %10ld\n", nrow); fprintf(fp_out1, " ... NO. OF TERMS (COLS) = %10ld\n", ncol); fprintf(fp_out1, " ... ORDER OF MATRIX A = %10ld\n", n); fprintf(fp_out1,"......\n"); fprintf(fp_out1,"...... COMPUTED SINGULAR VALUES (RESIDUAL NORMS) T-MIN STEPS CG STEPS\n"); fprintf(fp_out1,"......\n"); for(i=0; i<p; i++) fprintf(fp_out1, "... %2ld %24.14E ( %10.2E) %3ld %3ld \n", i+1, sig[i], res[i],titer[i],itcgt[i]); if(vec) { size1 = sizeof(double) * (p*n); write(fp_out2, (char *)y, size1); } free(value); free(pointr); fclose(fp_in1); fclose(fp_in2); fclose(fp_out1); if (vec) close(fp_out2); free(workptr1); free(workptr2); free(workptr3); free(workptr4); exit(0);}#include <stdio.h>#include <math.h>#include <errno.h>#include <fcntl.h>#include "tmsc.h"extern long nrow, ncol;extern double alpha;extern double *z,*v2,*r,*r2,*pp,*z2,alpha,*zz;double epslon(double);double random(long *);long imin(long, long);long imax(long, long);void dscal(long, double, double *, long);void dtrsm(char, char, long, char, long, long, double, double **, double **);void porth(long, long, long, double **, double *, double *, double *, double *, double *, double, long, double);void pmul(long, double *, double *, double *, double *, double *, double, long, double);void orthg(long, long, long, double **, double **, double *);void opat(double *,double *);void opa(double *, double *);void dgemm3(long, long, long, long, long, double, double **, long, long, double **, long, long, double, double **, long, long);double dasum(long, double *, long);float timer(void);void opb(long, double *, double *,double);void daxpy(long, double, double *,long, double *, long);void tred2(long, long, double **, double *, double *, double **);long tql2(long, long, double *, double *, double **);void disk(long, double *, double *, long *, long *);void isol(long, double, double, long *, double *, double *, long);void clus(long, long, double *, double, long *, double *, double *, double *, long);void cgt(long, long, double *, double **, long *, double, double, long *, double);void cgts(long, long, double *, double **, long *, double, double, double, double, long *, double);double ddot(long, double *, long, double *, long);#ifdef UNIX_CREAT#elsefloat acosh(float);#endif/*********************************************************************** * * * tsvd2() * * Sparse SVD Via Eigensystem of Equivalent 2-Cyclic Matrix * * (double precision) * * * *********************************************************************** Description ----------- tsvd2 implements a trace-minimization svd method for determining the p-largest singular triplets of a large sparse matrix A. tms2 computes the singular triplets of the matrix A via the equivalent symmetric eigenvalue problem. B = (alpha*alpha)*I - A'A, where A is m (=nrow) by n (=ncol), so that sqrt(alpha*alpha-lambda) is a singular value of A. alpha is chosen so that B is (symmetric) positive definite. In the calling program, alpha is determined as the matrix 1-norm of A. the user can set alpha to be any known upper bound for the the largest singular value of the matrix A. (A' = transpose of A) User supplied routines: opat, opb,timer,store opat(x,y) takes an m-vector x and should return A'*x in y. opb(nrow+ncol,x,y,shift) takes an (m+n)-vector x and should return D*x in y, where D=[B-shift*I], I is the (m+n) order identity matrix. user should replace calls to timer() with an appropriate call to an instrinsic timing routine that returns delta user cpu time (i.e., user cpu time elapsed from a previous call to timing routine) tsvd2 utilizes ritz-shifting and chebyshev polynomials for acceleration of convergence. External parameters ------------------- Defined and documented in tmsg.h Local parameters: ---------------- (input) n order of equivalent symmetric eigenvalue problem should be equal to min(nrow,ncol), where nrow is the number of rows of A and ncol is the number of columns of A. p number of desired singular triplets (largest) of A. s dimension of initial subspace (s should be great er than p; s=2*p is usually safe but since the comp lexity of the method is determined by s, the user should try to keep s as close to p as possible). job acceleration strategy switch: job = 0, no acceleration used, = 1, ritz-shifting used. = 2, chebyshev polynomials and ritz-shifting used. maxi maximum number of trace minimization steps allowed. tol user-supplied tolerance for residual of an equivalent eigenpair of B (singular triplet of A). red user-supplied tolerance for residual reduction to initiate ritz-shifting when job=1 or job=2. lwork(s) (logical 0,1) work array. (output parameters): ----------------- ierr ierr=99, input parameter job invalid. (error flag) mem number of bytes of memory needed maxd maximum polynomial degree used (job = 2). mxv(3) matrix-vector multiplication counters: mxv[0] = number of A*x, mxv[1] = number of A'*x, mxv[2] = mxv[0] + mxv[1] sig(s) contains p-desired singular values. y(nrow+ncol,s) first p columns contain left and right singular vectors, i.e., y(1:nrow+ncol,1:p) = [u(1:r,1:p)' | v(1:c,1:p)']', where r = no. of rows of matrix A and c = no. of cols of matrix A. titer(s) titer(i) := number of trace min. steps need for i-th triplet of A. itcgt(s) itcgt(i) := number of cg steps needed for i-th triplet of A. time(5) timing breakdown array: time[0] = gram-schmidt orthogonalization time[1] = section-formation (spectral decomposition) time[2] = convergence criteria time[3] = conjugate gradient method time[4] = total execution time (user-cpu) res(s) 2-norms of residual vectors (A*y[i]-sig(i)*y[i]), i=1,2,...,p. Functions used -------------- BLAS daxpy, ddot, dgemm3, dgemv, dgemv2, xerbla, pmul, dtrsm, porth USER timer,opb,opat,acosh (if not in math library) RNG random PRECISION epslon MATH sqrt,fabs,acosh EISPACK tred2,tql2,pythag Precision --------- All floating-point calculations use double precision; variables are declared as long and double. */long tsvd2(FILE *fp_out1, long n, long p, long s, long job, double tol, double red, double *sig, long maxi, long *mem, long *itcgt, long *titer,double *time, double *res, long *mxv, double **work1, double **work2, double **work3, double **work4, double **work5, double **y, long **iwork, long *lwork,long *maxd){ double *workptr5,**workptr6,total,*tempptr5,**tempptr6,meps; double tmp,tmpi,pparm1,pparm2,e,enew; double t1,sec1,sec2,sec21,sec22,sec23,sec3,sec4; long degree,ndegre,itmp,ierr,iptr,left,i,j,k,l,irand,iter; long shift,memsize,length,temp_left,polyac; /* allocate memory for temporary arrays in subroutines cgt() and cgts() */ memsize = sizeof(double) * n * 6; if(!(workptr5 = (double *) malloc(memsize))) { perror("FIRST MALLOC FAILED IN TSVD2()\n"); exit(errno); } *mem += memsize; tempptr5 = workptr5; z = tempptr5; tempptr5 += n; v2 = tempptr5; tempptr5 +=n; r = tempptr5; tempptr5 += n; r2 = tempptr5; tempptr5 += n; pp = tempptr5; tempptr5 += n; z2 = tempptr5; /* allocate memory for temporary array in subroutine opb */ memsize = sizeof(double) * (ncol+nrow); if(!(zz = (double *) malloc(memsize))) { perror("SECOND MALLOC FAILED IN TSVD2()\n"); exit(errno); } *mem += memsize; /*get machine epsilon (meps) */ meps = epslon(ONE); shift = FALSE; ierr = 0; if(job && (job != 1) && (job != 2)) { ierr = 99; return(ierr); } if(job == 1) { for(i=0; i<p; i++) { lwork[i] = FALSE; work5[2][i] = -ONE; work5[3][i] = -ONE; } } else { if(job==2) { degree = 0; ndegre = 0; pparm1 = 0.04; pparm2 = 4.0; } } /*initialize timers,counters and flags: */ sec1 = ZERO; sec21 = ZERO; sec22 = ZERO; sec23 = ZERO; sec3 = ZERO; sec4 = ZERO; mxv[0] = 0; mxv[1] = 0; polyac = FALSE; /*initialize y(1:n,1:s) = random matrix (carry s vectors in the tmin iterations, assuming s.ge.p) */ irand =SEED; for(k=0; k<s; k++) { sig[k] = ZERO; work5[1][k] = ZERO; } for(k=0; k<s; k++) { for (i=0; i<n; i++) y[k][i] = random(&irand); } /*---------------------------------------------------------- pointer and counter for hybrid monitor: 1 2 3 4 5 ... i ... p ----------------------- sig:| | | | | |...| |...| | (ascending order) ----------------------- ^ | iptr : points to first e-value of B that has not converged left:=s-iptr+1 (ie how many y-vectors remain for tsvd2) -------------------------------------------------------------- */ /* initialize a few pointers and counters: */ total = ZERO; iptr = 1; left = s; for(i=0; i<s; i++) { titer[i] = 0; itcgt[i] = 0; } /*-------------------------------------------------------------- main tmin iteration loop (nmax iterations) --------------------------------------------------------------*/ for(iter=0; iter<maxi; iter++) { if((job==2) && (!shift)) { if(ndegre) { degree = ndegre; *maxd = imax(*maxd,degree); polyac = TRUE; } else polyac = FALSE; }/* section formation */ t1 = timer(); for(j=0; j<s; j++) { for(i=0; i<n; i++) work2[j][i] = y[j][i]; } if(polyac) { porth(s,0,n,work2,work1[0],work1[1],work4[0],work4[1], work4[2],e,degree,alpha); mxv[0] += degree*s; mxv[1] += degree*s; } else { orthg(s,0,n,work1,work2,&work4[0][0]); } t1 = timer() - t1; sec1 = sec1 + t1; /* form work1(1:n,iptr:s) = b(1:n,1:n)*work2(1:n,iptr:s) */ t1 = timer(); if(polyac) { for(j=0; j<left; j++) { for(i=0; i<n; i++) work1[iptr+j-1][i] = work2[iptr+j-1][i]; } } else { for(j=0; j<left; j++) opb(n,work2[iptr+j-1],work1[iptr+j-1],ZERO); } /* form work3(1:left,1:left) = work2(1:n,iptr:s)'*work1(1:n,iptr:s) */ dgemm3(TRANSP, NTRANSP, left, left, n, ONE, work2, 0,iptr-1, work1, 0, iptr-1, ZERO, work3, 0, 0); if(job >= 1) { for(j=0; j<left; j++) work4[0][j] = dasum(left, &work3[0][j], s) - fabs(work3[j][j]); } t1 = timer() - t1; sec21 = sec21 + t1; /* compute the eigenvalues and eigenvectors of work3: */ t1 = timer(); /* eigenvectors overwrite array work3(:,:) store current e-values in work5(:,2) */ if(polyac) { tred2(0,left,work3,work5[0],work4[1],work3); ierr = tql2(0,left,work5[0],work4[1],work3); if(ierr) printf("IERR FROM TQL2 = %ld\n",ierr); } else { for(j=iptr-1; j<s; j++) work5[1][j] = sig[j]; tred2(0,left, work3, &sig[iptr-1], &work4[1][0], work3); ierr = tql2(0,left, &sig[iptr-1], &work4[1][0], work3); if(ierr) printf("IERR FROM TQL2 = %ld\n",ierr); } t1 = timer() - t1; sec22 = sec22 + t1; /* form y(1:n,iptr:s) = work2(1:n,iptr:s)*work3(1:left,1:left) */ t1 = timer(); dgemm3(NTRANSP, NTRANSP, n, left, left, ONE, work2, 0, iptr-1, work3, 0, 0, ZERO, y, 0, iptr-1); t1 = timer() - t1; sec23 = sec23 + t1; /* test for convergence here */ t1 = timer(); for(j=iptr-1; j<s; j++) { opb(n, y[j], work2[j], ZERO); if(polyac) { work5[1][j]=sig[j]; sig[j] = ddot(n,y[j],1,work2[j],1)/ddot(n,y[j],1,y[j],1); } daxpy(n, -sig[j], y[j], 1, work2[j],1); work4[2][j-iptr+1] = sqrt(ddot(n,work2[j],1,work2[j],1)); if(j < p) titer[j] += 1; } mxv[0] += s-iptr+1; mxv[1] += s-iptr+1; t1 = timer() - t1; sec3 = sec3 + t1; temp_left=left; /* array work4(:,3) stores the vector 2-norms of residual vectors */ for(k=0; k<temp_left; k++) { if(fabs(work4[2][k]) <= tol) { iptr = iptr + 1; if(iptr > p) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -