📄 s_bls1.cc
字号:
***********************************************************************//*********************************************************************** Description ----------- This function is a single-vector Lanczos bidiagonalization procedure (degenerate case of block size = 1 for block1.c) which is used when normal deflation reduces the current block size to 1. The function returns DONE when the number of remaining triplets to be approximated is less than or equal to zero. Otherwise, it returns status CONTINUE. Arguments --------- (input) k current # of desired triplets m row dimension of the sparse matrix A whose SVD is sought n column dimension of the sparse matrix A whose SVD is sought tol user-specified tolerance for approximate singular triplets wp work space (output) sing linear array containing the iko approximate singular values res linear array containing the iko residuals of the approximate singular triplets alpha diagonals of bidiagonal matrix from the inner recursion beta super-diagonals of bidiagonal matrix from the inner recursion External parameters ------------------- Defined and documented in bls1.h Functions called -------------- BLAS ddot, enorm, daxpy, orthg USER opa, opat ***********************************************************************/long s_bls1::point1(long *k, long m, long n, double **wp, double *res, double *sing, double tol) { double *ptr, dum, znm; long i, j, jj, convpj; for (j = 0; j < n; j++) p[j] = vv[j]; opa(m, n, p, t); /* orthogonalize T w.r.t. converged left S-vectors (U0 array) */ if (iconv) { ptr = u0; for (j = 0; j < iconv; j++) for (i = 0; i < m; i++) uvtmpp[j][i] = *ptr++; if (iconv > 1) { ptr = uvtmpp[iconv]; for (i = 0; i < m; i++) *ptr++ = t[i]; orthg(1, iconv, m, wp, uvtmpp, temp); ptr = uvtmpp[iconv]; for (i = 0; i < m; i++) t[i] = *ptr++; } else { dum = -ddot(nn, uvtmp, 1, t, 1); daxpy(nn, dum, uvtmp, 1, t, 1); } } /* compute alpha[0] */ alpha[0] = enorm(m, t); for (j = 0; j < nn; j++) { if (alpha[j] != ZERO) { /* compute Q[j] := T[j] / alpha[j] */ ptr = uup[j]; for (i = 0; i < m; i++) { q[i] = t[i] / alpha[j]; *ptr++ = q[i]; } if (j == nn - 1) return(CONTINUE); /* compute Z[j] := A^T * Q[J] - alpha[j] * P[j] */ opat(n, q, z); daxpy(n, -alpha[j], p, 1, z, 1); if (!j) { if ((znm = enorm(n, z)) <= tol) { ptr = &u0[iconv * m]; for (i = 0; i < m; i++) *ptr++ = q[i]; ptr = &v0[iconv * n]; for (i = 0; i < n; i++) *ptr++ = p[i]; sing[iconv] = alpha[0]; res[iconv] = znm; iconv += 1; *k -= 1; if (!*k) { iter -= 1; return(DONE); } } } convpj = iconv + j; ptr = v0; /* orthogonalize Z w.r.t. converged right S-vectors and * previous VV's */ for (jj = 0; jj < iconv; jj++) for (i = 0; i < n; i++) uvtmpp[jj][i] = *ptr++; ptr = vv; for (jj = iconv; jj <= convpj; jj++) for (i = 0; i < n; i++) uvtmpp[jj][i] = *ptr++; if (convpj) { ptr = uvtmpp[convpj + 1]; for (i = 0; i < n; i++) *ptr++ = z[i]; orthg(1, convpj + 1, n, wp, uvtmpp, temp); ptr = uvtmpp[convpj + 1]; for (i = 0; i < n; i++) z[i] = *ptr++; } else { dum = -ddot(n, uvtmp, 1, z, 1); daxpy(n, dum, uvtmp, 1, z, 1); } beta[j] = enorm(n,z); if (beta[j] != ZERO) { /* compute P[j+1] := Z[j] / beta[j] */ ptr = vvp[j + 1]; for (i = 0; i < n; i++) { p[i] = z[i] /beta[j]; *ptr++ = p[i]; } /* compute T[j+1] := A * P[j+1] - beta[j] * Q[j] */ opa(m, n, p, t); daxpy(m, -beta[j], q, 1, t, 1); /* orthogonalize T w.r.t. converged left S-vectors and * previous UU's */ convpj = iconv + j; ptr = u0; for (jj = 0; jj < iconv; jj++) for (i = 0; i < m; i++) uvtmpp[jj][i] = *ptr++; ptr = uu; for (jj = iconv; jj <= convpj; jj++) for (i = 0; i < m; i++) uvtmpp[jj][i] = *ptr++; if (convpj) { ptr = uvtmpp[convpj + 1]; for (i = 0; i < m; i++) *ptr++ = t[i]; orthg(1, convpj + 1, m, wp, uvtmpp, temp); ptr = uvtmpp[convpj + 1]; for (i = 0; i < m; i++) t[i] = *ptr++; } else { dum = -ddot(m, uvtmp, 1, t, 1); daxpy(m, dum, uvtmp, 1, t, 1); } alpha[j + 1] = enorm(m, t); } else return(CONTINUE); } else return(CONTINUE); }}/*********************************************************************** * * * block1() * * * ***********************************************************************//*********************************************************************** Description ----------- This function implements the first two phases of the hybrid block Lanczos procedure. In the first phase, which is also known as the block Lanczos outer iteration, a block upper-bidiagonal matrix S is formed. The singular values of the matrix S approximate those of the original sparse matrix A. Total (or complete) re- orthogonalization is used. In the second phase, single-vector Lanczos bidiagonalization is used to reduce (preserving singular values of S) the block matrix S to a bi-diagonal matrix B. Arguments --------- (input) w, wp work space sp diagonal blocks (upper-triangular submatrices) of the block upper-bidiagonal matrix S rp super-diagonal blocks (upper-triangular submatrices) of the block upper-bidiagonal matrix S bigsp block upper-bidiagonal matrix S m row dimension of sparse matrix A n column dimension of sparse matrix A nb current block size ns number of blocks in current iteration irand seed for random number generator (output - globally defined) alpha diagonal elements of bidiagonal matrix B (reduced from matrix S) beta off-diagonal elements of bidiagonal matrix B (reduced from matrix S) tres residuals of approximate singular triplets determined from a previous set of block Lanczos outer iterations. ppp array of left singular vectors of S qqp array of right singular vectors of S External parameters ------------------- Defined and documented in bls1.h Functions called -------------- BLAS ddot, daxpy, enorm, dgemm, orthg, dtbmv USER opat MISC random BLS1 formbigs ***********************************************************************/ void s_bls1::block1(double *w, double **wp, double **sp, double **rp, double **bigsp, long m, long n, long nb, long ns, long *irand){ long jinc, nk, nj, i, j, k, blkptr; double *ptr, dum, pnm; /*------------------------------------------------------------------* * PHASE 1 * *------------------------------------------------------------------*/ /* ns (number of blocks) is assumed to be at least 2 */ nk = nn - nb; /* initialize S and R arrays */ for (i = 0; i < nn; i++) for (j = 0; j < nb; j++) sp[i][j] = ZERO; for (i = 0; i < nk; i++) for (j = 0; j < nb; j++) rp[i][j] = ZERO; for (i = 0; i < nb; i++) opa(m, n, vvp[i], wp[i]); ptr = uvtmp; nk = iconv * m; if (iconv) for (i = 0; i < nk; i++) *ptr++ = u0[i]; nj = nb * m; for (i = 0; i < nj; i++) { *ptr++ = w[i]; w[i] = ZERO; } orthg(nb, iconv, m, wp, uvtmpp, temp); for (i = 0; i < nb; i++) for (j = i; j < nb; j++) sp[i][j] = wp[j][i]; ptr = uvtmp + nk; for (i = 0; i < nj; i++) uu[i] = *ptr++; blkptr = 0; /* iterate for j = 1 to (ns-1) */ for (j = 1; j < ns; j++) { /* compute Y[j] = A^T * U[j] - V[j] * S[j]^T */ for (i = 0; i < nb; i++) opat(n, uup[blkptr + i], yp[i]); /* compute -S * VVP + Y, Y is [nb,n] */ dgemm(NTRANSP, NTRANSP, nb, n, nb, -ONE, &sp[blkptr], vvp[blkptr], ONE, y); /* store residuals in Y for convergence test in Phase 4 */ if (j == 1 && iter > 1) for (i = 0; i < nb; i++) tres[i] = enorm(n, yp[i]); if (iconv) { /* orthogonalize Y[j] w.r.t. [V[0], ... V[j] | V0] */ nk = nb * j; nj = nk + iconv; ptr = vv; for (k = 0; k < nk; k++) for (i = 0; i < n; i++) uvtmpp[k][i] = *ptr++; ptr = v0; for (k = nk; k < nj; k++) for (i = 0; i < n; i++) uvtmpp[k][i] = *ptr++; } else { /* orthogonalize Y[j] w.r.t. [V[0], ... V[j]] */ nj = nb * j; ptr = vv; for (k = 0; k < nj; k++) for (i = 0; i < n; i++) uvtmpp[k][i] = *ptr++; } ptr = y; for (k = 0; k < nb; k++) for (i = 0; i < n; i++) { uvtmpp[nj + k][i] = *ptr; *ptr++ = ZERO; } orthg(nb, nj, n, yp, uvtmpp, temp); /* get QR factorization: Y[j] = V[j] * R[j], [R[j] := upper tri. ] */ /* load R[j] submatrix */ for (k = 0; k < nb; k++) for (i = k; i < nb; i++) rp[blkptr + k][i] = yp[i][k]; /* increment pointer for V[j] */ jinc = blkptr + nb; /* load V[j] submatrix */ ptr = vvp[jinc]; for (k = nj; k < nj + nb; k++) for (i = 0; i < n; i++) *ptr++ = uvtmpp[k][i]; /* compute W[j] = A * V[j] - U[j] * R[j]^T */ for (k = 0; k < nb; k++) opa(m, n, vvp[jinc + k], wp[k]); dgemm(NTRANSP, NTRANSP, nb, m, nb, -ONE, &rp[blkptr], uup[blkptr], ONE, w); if (iconv) { /* orthogonalize W[j] w.r.t. [U[0], ... U[j] | U0] */ nk = nb * j; nj = nk + iconv; ptr = uu; for (k = 0; k < nk; k++) for (i = 0; i < m; i++) uvtmpp[k][i] = *ptr++; ptr = u0; for (k = nk; k < nj; k++) for (i = 0; i < m; i++) uvtmpp[k][i] = *ptr++; } else { /* orthogonalize W[j] w.r.t. [U[0], ... U[j]] */ nj = nb * j; ptr = uu; for (k = 0; k < nj; k++) for (i = 0; i < m; i++) uvtmpp[k][i] = *ptr++; } ptr = w; for (k = 0; k < nb; k++) for (i = 0; i < m; i++) { uvtmpp[nj + k][i] = *ptr; *ptr++ = ZERO; } orthg(nb, nj, m, wp, uvtmpp, temp); /* get QR factorization: W[j] = U[j] * S[j], [S[j] := upper tri. ] */ /* load S[j] submatrix */ for (k = 0; k < nb; k++) for (i = k; i < nb; i++) sp[k + jinc][i] = wp[i][k]; ptr = uup[jinc]; for (k = 0; k < nb; k++) for (i = 0; i < m; i++) *ptr++ = uvtmpp[nj + k][i]; /* compute pointer to next block */ blkptr += nb; } /* form the block upper-triangular S matrix in BIGS array; * array ppp will store left S-vectors of BIGS * array qqp will store right S-vectors of BIGS */ formbigs(nn, nb, rp, sp, bigsp); /*------------------------------------------------------------------* * PHASE 2 (block size > 1) * *------------------------------------------------------------------*/ /* choose starting vector P[0] having unity 2-norm */ for (i = 0; i < nn; i++) p[i] = mrandom(irand); pnm = enorm(nn,p); ptr = pp; for (i = 0; i < nn; i++) { p[i] /= pnm; t[i] = p[i]; *ptr++ = p[i]; } /* compute T[0] := S * P[0] */ dtbmv(NTRANSP, nn, nb, bigsp, t); /* compute alpha[0] */ alpha[0] = enorm(nn,t); for (j = 0; j < nn; j++) { if (alpha[j] != ZERO) { /* compute Q[j] := T[j] / alpha[j] */ ptr = qqp[j]; for (i = 0; i < nn; i++) { q[i] = t[i] / alpha[j]; z[i] = q[i]; *ptr++ = q[i]; } if (j == nn - 1) return; /* compute Z[j] := S^T * Q[J] - alpha[j] * P[j] */ dtbmv(TRANSP, nn, nb, bigsp, z); daxpy(nn, -alpha[j], p, 1, z, 1); /* orthogonalize Z w.r.t. previous PP's */ for (k = 0; k <= j; k++) for (i = 0; i < nn; i++) uvtmpp[k][i] = ppp[k][i]; if (j) { ptr = uvtmpp[j + 1]; for (i = 0; i < nn; i++) *ptr++ = z[i]; orthg(1, j+1, nn, wp, uvtmpp, temp); ptr = uvtmpp[j + 1]; for (i = 0; i < nn; i++) z[i] = *ptr++; } else { dum = -ddot(nn, uvtmp, 1, z, 1); daxpy(nn, dum, uvtmp, 1, z, 1); } beta[j] = enorm(nn,z); if (beta[j] != ZERO) { /* compute p[j+1] := z[j] / beta[j] */ ptr = ppp[j + 1]; for (i = 0; i < nn; i++) { p[i] = z[i] / beta[j]; t[i] = p[i]; *ptr++ = p[i]; } /* compute T[j+1] := S * P[j+1] - beta[j] * Q[j] */ dtbmv(NTRANSP, nn, nb, bigsp, t); daxpy(nn, -beta[j], q, 1, t, 1); /* orthogonalize Z w.r.t. previous QQ's */ for (k = 0; k <= j; k++) for (i = 0; i < nn; i++) uvtmpp[k][i] = qqp[k][i]; if (j) { ptr = uvtmpp[j + 1]; for (i = 0; i < nn; i++) *ptr++ = t[i]; orthg(1, j+1, nn, wp, uvtmpp, temp); ptr = uvtmpp[j + 1]; for (i = 0; i < nn; i++) t[i] = *ptr++; } else { dum = -ddot(nn, uvtmp, 1, t, 1); daxpy(nn, dum, uvtmp, 1, t, 1); } /* compute alpha[j+1] */ alpha[j + 1] = enorm(nn, t); } else return; } else return; } return;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -