📄 bls1.c
字号:
/************************************************************************* (c) Copyright 1993 University of Tennessee All Rights Reserved *************************************************************************/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <errno.h>#include <math.h>#include <fcntl.h>#include "bls1.h"#define ZERO 0.0#define UNIX_CREAT#ifdef UNIX_CREAT#define PERMS 0664#endiffloat ddot(int, float *,int, float *, int);void opa(int, int, float *, float *);void opat(int, float *, float *);void daxpy(int, float, float *,int, float *, int);int validate(FILE *, int, int, int, int, int, int, float);float timer(void);int blklan1(FILE *, int, int, int, int, float *, float *, float *, int, int, float, float *, int , int *, int *, int *, int *);/*********************************************************************** * * * main() * * Sparse SVD Via Hybrid Block Lanczos Procedure for Equivalent * * 2-Cyclic Eigensystems (float precision) * * * ***********************************************************************//*********************************************************************** Description ----------- This is a sample driver that invokes BLKLAN1 to compute singular triplets of a large sparse matrix A. In this test program, bls1, the Harwell-Boeing sparse matrix format is employed for accessing elements of the m by n sparse matrix A and its transpose. Other sparse matrix formats can be used, of course. Approximate singular values of A and their residuals can be found in formatted output file blo1 and corresponding right and left singular vectors can be found in unformatted output file blv1. User supplied routines: opa, opat, timer opa(m, n, x, y) takes an n-vector x and returns the m-vector y = A * x. opat(n, m, x, y) takes an m-vector x and returns the n-vector y = A' * x, where A' denotes the transpose of the matrix A. User should edit timer() with an appropriate call to an intrinsic timing routine that returns elapsed user cpu time. External parameters ------------------- Defined and documented in bls1.h Local parameters ---------------- (input) maxit maximum number of iterations nc maximum subspace size nb initial block size nums number of singular values desired tol residual tolerance vtf TRUE or FALSE to indicate whether both singular values and vectors are wanted or only singular values are wanted. If vtf is set to "TRUE", the unformatted output file blv1 will contain the approximate singular vectors written in the order u[0], v[0], u[1], v[1], ..., u[iko-1], v[iko-1]. u[i], v[i] denote the left and right singular vectors, respectively, corresponding to the i-th approximate singular value sing[i], and iko is the number of singular values approximated. (output) sing array of singular values res array of residual norms of the singular values calculated memory total memory allocated in bytes Functions called -------------- BLAS ddot, daxpy USER opa, opat, timer MISC validate BLS1 blklan1 BLS1 development ---------------- BLS1 is a C translation of the Fortran-77 BLS1 from the SVDPACK library written by Michael W. Berry, University of Tennessee, Dept. of Computer Science, 107 Ayres Hall, Knoxville, TN, 37996-1301. It is a modified version of the block Lanczos algorithm first published by Golub, Luk, and Overton (ACM TOMS 7(2):149-169, 1981). This particular implementation is discussed in "Multiprocessor Sparse SVD Algorithms and Applications", Ph.D. thesis by M. Berry, University of Illinois at Urbana-Champaign, October 1990. Date written: 05 May 1992 Theresa H. Do University of Tennessee Dept. of Computer Science 107 Ayres Hall Knoxville, TN, 37996-1301 internet: tdo@cs.utk.edu ***********************************************************************/ main() { float exetime; int i, nrow, ncol, nnzero, nc, nb, nums, maxit, nn; int ncold, nbold, nk, size1, size2, indexu, indexv; int memory, vectors; float tol, *sing, *v, *u, *res, *tmpu, *tmpv, *tptr1; float tmp1, tmp2, tmp3, xnorm; char title[73], name[41], vtf[6]; FILE *fp_in1, *fp_in2; FILE *fp_out1 = NULL; int fp_out2; char *in1, *in2, *out1, *out2; in1 = "blp1"; in2 = "matrix"; out1 = "blo1"; out2 = "blv1"; /* open files for input/output */ if (!(fp_in1 = fopen(in1, "r"))) { printf("cannot open file %s for reading\n", in1); exit(-1); } if (!(fp_in2 = fopen(in2, "r"))) { printf("cannot open file %s for reading\n", in2); exit(-1); } if (!(fp_out1 = fopen(out1, "w"))) { printf("cannot open output file %s \n", out1); exit(-1); } /* read data */ fscanf (fp_in2,"%72c%*s%*s%*s%d%d%d%*d", title, &nrow, &ncol, &nnzero); nn=nrow+ncol; title[73] = '\0'; fscanf (fp_in1,"%s%d%d%d%d%f%s", name, &maxit, &nc, &nb, &nums, &tol, vtf); if (!(strcmp(vtf, "TRUE"))) { vectors = 1;#if !defined UNIX_CREAT if ((fp_out2 = open(out2, O_CREAT | O_RDWR)) == -1) { printf("cannot open output file %s \n", out2); exit(-1); }#else if ((fp_out2 = creat(out2, PERMS )) == -1) { printf("cannot open output file %s \n", out2); exit(-1); }#endif } else vectors = 0; /* even though the validity of the parameters will be checked in the * SVD code, some parameter checking should also be done before * allocating memory to ensure that they are nonnegative */ if (validate(fp_out1, nnzero, nrow, ncol, nc, nb, nums, tol)) { fclose(fp_in1); fclose(fp_in2); fclose(fp_out1); if (vectors) close(fp_out2); exit(-1); } /******************************************************************* * allocate memory * * pointr - column start array of harwell-boeing sparse matrix * * format (ncol + 1) * * rowind - row indices array of harwell-boeing format (nnzero) * * value - nonzero values array of harwell-boeing sparse matrix * * format (nnzero) * * sing - (nums) * * res - (nums) * * u - (nrow * nums) * * v - (ncol * nums) * *******************************************************************/ size1 = sizeof(float) * (nnzero + nums * (nrow + ncol + 2)); size2 = sizeof(int) * (ncol + nnzero + 1); if (!(rowind = (int *) malloc(size2)) || !(value = (float *) malloc(size1))){ perror("MALLOC FAILED in MAIN()"); exit(errno); } tptr1 = value; /* calculate memory allocated for problem */ memory = size1 + size2; pointr = rowind + nnzero; tptr1 += nnzero; u = tptr1; tptr1 += nrow * nums; v = tptr1; tptr1 += ncol * nums; sing = tptr1; tptr1 += nums; res = tptr1; /* skip data format line */ fscanf(fp_in2,"%*s %*s %*s %*s"); /* read data */ for (i = 0; i <= ncol; i++) fscanf(fp_in2, "%d", &pointr[i]); for (i = 0; i < ncol; i++) pointr[i] -= 1; /* define last element of pointr in case it is not */ pointr[i] = nnzero; for (i = 0; i < nnzero; i++) fscanf(fp_in2, "%d", &rowind[i]); for (i = 0; i < nnzero; i++) rowind[i] -= 1; for (i = 0; i < nnzero; i++) fscanf(fp_in2, "%f", &value[i]); ncold = nc; nbold = nb; nk = nums; mxvcount = 0; mtxvcount = 0; exetime = timer(); /* make a lanczos run; exit upon error */ if (blklan1(fp_out1, nnzero, nrow, ncol, nums, v, u, sing, ncold, nbold, tol, res, maxit, &nk, &nc, &nb, &memory)) { free(value); free(rowind); fclose(fp_in1); fclose(fp_in2); fclose(fp_out1); if (vectors) close(fp_out2); exit(-1); } exetime = timer() - exetime; if (vectors) { size1 = sizeof(float) * nrow; size2 = sizeof(float) * ncol; memory += size1 + size2; /* extra memory required if singular vectors are desired */ if (!(tmpu = (float *) malloc(size1 + size2))) { perror("SECOND MALLOC FAILED in MAIN()"); exit(errno); } tmpv = tmpu + nrow; indexu = 0; indexv = 0; for (i = 0; i < nk; i++) { tmp1 = ddot(ncol, &v[indexv], 1, &v[indexv], 1); tmp2 = ddot(nrow, &u[indexu], 1, &u[indexu], 1); tmp3 = sqrt(tmp1 + tmp2); opa(nrow, ncol, &v[indexv], tmpu); opat(ncol, &u[indexu], tmpv); daxpy(nrow, -sing[i], &u[indexu], 1, tmpu, 1); daxpy(ncol, -sing[i], &v[indexv], 1, tmpv, 1); xnorm = ddot(ncol, tmpv, 1, tmpv, 1) + ddot(nrow, tmpu, 1, tmpu, 1); xnorm = sqrt(xnorm); res[i] = xnorm / tmp3; /* write vectors to binary output file */ write (fp_out2, (char *)&u[indexu], size1); write (fp_out2, (char *)&v[indexv], size2); indexu += nrow; indexv += ncol; } } /* write results to output file */ fprintf(fp_out1,"\n"); fprintf(fp_out1, " ... \n"); fprintf(fp_out1, " ... HYBRID BLOCK LANCZOS (CYCLIC)\n"); fprintf(fp_out1, " ... NO. OF EQUATIONS =%10d\n", nn); fprintf(fp_out1, " ... MAX. NO. OF ITERATIONS =%10d\n", maxit); fprintf(fp_out1, " ... NO. OF ITERATIONS TAKEN =%10d\n", iter); fprintf(fp_out1, " ... NO. OF TRIPLETS SOUGHT =%10d\n", nums); fprintf(fp_out1, " ... NO. OF TRIPLETS FOUND =%10d\n", nk); fprintf(fp_out1, " ... INITIAL BLOCKSIZE =%10d\n", nbold); fprintf(fp_out1, " ... FINAL BLOCKSIZE =%10d\n", nb); fprintf(fp_out1, " ... MAXIMUM SUBSPACE BOUND =%10d\n", ncold); fprintf(fp_out1, " ... FINAL SUBSPACE BOUND =%10d\n", nc); fprintf(fp_out1, " ... NO. MULTIPLICATIONS BY A =%10d\n", mxvcount); fprintf(fp_out1, " ... NO. MULT. BY TRANSPOSE(A) =%10d\n", mtxvcount); fprintf(fp_out1, " ... TOTAL SPARSE MAT-VEC MULT.=%10d\n", mxvcount + mtxvcount); fprintf(fp_out1, " ... MEMORY NEEDED IN BYTES =%10d\n", memory); if (vectors) fprintf(fp_out1, " ... WANT S-VECTORS ON OUTPUT? T\n"); else fprintf(fp_out1, " ... WANT S-VECTORS ON OUTPUT? F\n"); fprintf(fp_out1, " ... TOLERANCE =%10.2E\n\n", tol); fprintf(fp_out1, " %s\n", title); fprintf(fp_out1, " %s\n", name); fprintf(fp_out1, " ... NO. OF DOC. (ROWS OF A) = %8d\n", nrow); fprintf(fp_out1, " ... NO. OF TERMS (COLS OF A) = %8d\n", ncol); fprintf(fp_out1, " ... \n\n"); fprintf(fp_out1, " ...... BLSVD EXECUTION TIME=%10.2E\n",exetime); fprintf(fp_out1, " ...... \n"); fprintf(fp_out1, " ...... COMPUTED S-VALUES (RES. NORMS)\n"); fprintf(fp_out1, " ...... \n"); for (i = 0; i < nk; i++) fprintf(fp_out1," ...... %3d %22.14E (%11.2E)\n",i+1,sing[i],res[i]); free(value); free(rowind); fclose(fp_in1); fclose(fp_in2); fclose(fp_out1); if (vectors) { free(tmpu); close(fp_out2); } exit(0);}#include <stdio.h>#include <math.h>#include <errno.h>#include <fcntl.h>extern float *tres, *y, *temp, *uu, *vv, *u0, *v0, *uvtmp, *pp, *qq;extern float *alpha, *beta, *p, *q, *t, *z;extern float **uup, **yp, **vvp, **uvtmpp, **ppp, **qqp;extern int iconv, nn, iter;#define SEED 91827211#define MINBLKS 2#define TRANSP 1#define NTRANSP 0#define TRUE 1#define FALSE 0#define ZERO 0.0#define ONE 1.0void qriter2(int, float *, float *, float **, float **);void dgemv(int, int, int, float, float **, float *, float, float *);void dgemm(int, int, int, int, int, float, float **, float *, float, float *);int imin(int, int);float random(short *);int validate(FILE *, int, int, int, int, int, int, float);int point1(int *, int, int, float **, float *, float *, float);void block1(float *, float **, float **, float **, float **, int, int, int, int, short *);void orthg(int, int, int, float **, float **, float *);/*********************************************************************** * * * blklan1() * * Block Lanczos SVD Alogrithm * * * ***********************************************************************//*********************************************************************** Description ----------- This hybrid block Lanczos procedure consists of five phases: Phase 1: Block Lanczos outer iteration to yield a block upper bidiagonal matrix S whose singular values approximate those of the original sparse matrix A. Total (or complete) re- orthogonalization is used here. Phase 2: Lanczos method for bi-diagonalizing the S matrix from Phase 1 to yield the bi-diagonal matrix B whose singular values also approximate those of A. Total (or complete) re-orthogonalization is used for this Lanczos recursion. a point Lanczos method (single vector) is used if a blocksize of 1 is encountered via normal deflation. Phase 3: Apply an appropriate QR iteration to diagonalize B and hence produce approximate singular values (array alpha) of the original matrix A. Phase 4: Convergence test using a user-supplied residual tolerance. Phase 5: Iteration restart with orthogonalization with respect to any (all) converged singular vectors. This version of blklan1 is designed to approximate the ik-largest singular triplets of A. Users interested in the ik-smallest singular triplets need only sort the alpha array in increasing (as opposed to the default ascending order) following the call to qriter2 in Phase 3. Also, the rows of the two-dimensional arrays ppp and qqp must be reordered to reflect a one-to-one correspondence
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -