📄 bls1.c
字号:
with the newly sorted elements of alpha (which are approximate singular values of the matrix A). Arguments --------- (input) fp pointer to output file nnzero number of nonzeros in matrix A m row dimension of the sparse matrix A whose SVD is sought n column dimension of the sparse matrix A whose SVD is sought ik number of singular triplets desired ib initial block size for outer iteration ic upper bound for dimension of Krylov subspace generated via outer iteration. It is the maximum dimension for the block upper-bidiagonal matrix S generated in Phase 1 above. tol user-specified tolerance for approximate singular triplets maxit maximum number of outer iterations allowed (output) iko number of singular triplets approximated ico last bound for dimension of Krylov subspace used within outer iteration ibo final block size used in outer iteration sing linear array containing the iko approximate singular values v two-dimensional array containing the iko approximate right singular vectors corresponding to the approximate singular values in array sing u two-dimensional array containing the iko approximate left singular vectors corresponding to the approximate singular values in array sing res linear array containing the iko residuals of the approximate singular triplets memory memory storage needed in bytes External parameters ------------------- Defined and documented in bls1.h Local parameters ---------------- k current # of desired triplets (exit when reduced to 0) k0 count of triplets found in current iteration nb current block size nc size of current subspace ns number of blocks in current iteration Functions called -------------- BLAS dgemv, dgemm, qriter2, orthg MISC validate, random, imin BLS1 point1, block1 NOTE: Unlike Fortran, C stores arrays in row-major order. For the sake of efficiency, most matrices and vectors in the algorithm are transposed, i.e., each vector and each column of a matrix are stored row-wise. ***********************************************************************/int blklan1(FILE *fp, int nnzero, int m, int n, int ik, float *v, float *u, float *sing, int ic, int ib, float tol, float *res, int maxit, int *iko, int *ico, int *ibo, int *memory){ short irand; int nbold, cont, ns, nb, nc, k, k0, length, memsize; int jj, ii, kk, ll, final, flag, i, j, *index; float **wp, **sp, **rp, **bigsp, **workptr2, **tempptr2; float *w, *s, *r, *bigs, *workptr1, *tempptr1; float *ptr1, *ptr2, *ptr3, *ptr4; nb = ib; nc = ic; k = ik; k0 = 0; ns = nc / nb; iconv = 0; irand = (short) SEED; if (ns < MINBLKS) { nb = nc / MINBLKS; ns = nc / nb; } /* return upon error on input */ if (validate(fp, nnzero, m, n, *ico, *ibo, ik, tol)) return(-1); memsize = sizeof(float) * (nb * (m + nc + nc - nb + m + n + 1) + nc * (nb + m + n + m + 3 + nc + nc) + ik * (m + m + n) + n + n + m + m); *memory += memsize; /*********************************************************************** * Allocate work area and initialize pointers * * pointer size * * w (wp) nb by m * * s (sp) nc by nb * * r (rp) (nc-nb) by nb * * bigs (bigsp) nc by (nb+1) * * temp m by nb * * y (yp) nb by n * * uu (uup) nc by m * * vv (vvp) nc by n * * u0 ik by m * * v0 ik by n * * tres nb * * uvtmp (uvtmpp) (nc+ik) by m (assume max (m,n) = m) * * z n * * p n * * q m * * t m * * alpha nc * * beta nc * * pp (ppp) nc by nc * * qq (qqp) nc by nc * * u ik by m (allocated in bls1.c) * * v ik by n (allocated in bls1.c) * ***********************************************************************/ if (!(workptr1 = (float *)malloc(memsize)) || !(index = (int *)malloc(sizeof(int) * ib))){ perror("FIRST MALLOC FAILED in BLKLAN1()"); exit(errno); } *memory += sizeof(int) * ib; memsize = sizeof(float *) * (8 * nc + nb + ik); *memory += memsize; if (!(workptr2 = (float **)malloc(memsize))){ perror("SECOND MALLOC FAILED in BLKLAN1()"); exit(errno); } tempptr1 = workptr1; tempptr2 = workptr2; length = m * nb; w = tempptr1; tempptr1 += length; wp = tempptr2; tempptr2 += nb; j = 0; for (i = 0; i < length; i += m) wp[j++] = &w[i]; length = nc * nb; s = tempptr1; tempptr1 += length; sp = tempptr2; tempptr2 += nc; j = 0; for (i = 0; i < length; i += nb) sp[j++] = &s[i]; length = (nc - nb) * nb; r = tempptr1; tempptr1 += length; rp = tempptr2; tempptr2 += (nc - nb); j = 0; for (i = 0; i < length; i += nb) rp[j++] = &r[i]; length = nc * (nb + 1); bigs = tempptr1; tempptr1 += length; bigsp = tempptr2; tempptr2 += nc; j = 0; for (i = 0; i < length; i += nb + 1) bigsp[j++] = &bigs[i]; temp = tempptr1; tempptr1 += m * nb; length = n * nb; y = tempptr1; tempptr1 += length; yp = tempptr2; tempptr2 += nb; j = 0; for (i = 0; i < length; i += n) yp[j++] = &y[i]; length = m * nc; uu = tempptr1; tempptr1 += length; uup = tempptr2; tempptr2 += nc; j = 0; for (i = 0; i < length; i += m) uup[j++] = &uu[i]; length = n * nc; vv = tempptr1; tempptr1 += length; vvp = tempptr2; tempptr2 += nc; j = 0; for (i = 0; i < length; i += n) vvp[j++] = &vv[i]; u0 = tempptr1; tempptr1 += m * ik; v0 = tempptr1; tempptr1 += n * ik; tres = tempptr1; tempptr1 += nb; length = m * (nc + ik); uvtmp = tempptr1; tempptr1 += length; uvtmpp = tempptr2; tempptr2 += nc + ik; j = 0; for (i = 0; i < length; i += m) uvtmpp[j++] = &uvtmp[i]; z = tempptr1; tempptr1 += n; p = tempptr1; tempptr1 += n; q = tempptr1; tempptr1 += m; t = tempptr1; tempptr1 += m; alpha = tempptr1; tempptr1 += nc; beta = tempptr1; tempptr1 += nc; length = nc * nc; pp = tempptr1; tempptr1 += length; qq = tempptr1; tempptr1 += length; ppp = tempptr2; tempptr2 += nc; qqp = tempptr2; tempptr2 += nc; j = 0; for (i = 0; i < length; i += nc) { ppp[j] = &pp[i]; qqp[j++] = &qq[i]; } /* choose an initial random V[1] matrix */ length = n * nb; for (i = 0; i < length; i++) vv[i] = random(&irand); orthg(nb, 0, n, yp, vvp, temp); /* initialize iteration counter */ iter = 0; while (iter < maxit) { nn = nb * ns; cont = TRUE; iter += 1; /*------------------------------------------------------------------* * PHASE 1 and PHASE 2 (block algorithm) * *------------------------------------------------------------------*/ if (nb > 1) block1(w, wp, sp, rp, bigsp, m, n, nb, ns, &irand); else { if (nbold != 1) k0 = 0; /*------------------------------------------------------------------* * PHASE 2A (point algorithm) * *------------------------------------------------------------------*/ cont = point1(&k, m, n, wp, res, sing, tol); if (cont) { for (i = 0; i < nn; i++) { for (j = 0; j < nn; j++) { ppp[i][j] = ZERO; qqp[i][j] = ZERO; } } for (i = 0; i < nn; i++) { ppp[i][i] = ONE; qqp[i][i] = ONE; } } } /*------------------------------------------------------------------* * PHASE 3 * *------------------------------------------------------------------*/ if (!cont) break; qriter2(nn, alpha, beta, qqp, ppp); /*------------------------------------------------------------------* * PHASE 4 * *------------------------------------------------------------------*/ nbold = nb; if (iter > 1 && nb > 1) { k0 = 0; final = imin(nb, k); for (i = 0; i < final; i++) { if (fabs(tres[i]) <= tol) { index[k0] = i; sing[iconv + k0] = alpha[i]; res[iconv + k0] = tres[i]; k0 += 1; } } if (nb >= k) nb -= k0; else nb = imin(nb, k - k0); nc -= k0; k -= k0; if (k) ns = nc / nb; if (ns < MINBLKS) { nb = nc / MINBLKS; ns = nc / nb; } } /*------------------------------------------------------------------* * PHASE 5 (back transformation) * *------------------------------------------------------------------*/ if (nbold > 1) { /* uu[nn x m], qq[(nb+k0) x nn] */ dgemm(NTRANSP, NTRANSP, nb + k0, m, nn, ONE, qqp, uu, ZERO, u); dgemm(NTRANSP, NTRANSP, nb + k0, n, nn, ONE, ppp, vv, ZERO, v); } else { dgemv(TRANSP, nn, m, ONE, uup, qq, ZERO, u); dgemv(TRANSP, nn, n, ONE, vvp, pp, ZERO, v); } if (k0) { final = iconv + k0; ii = 0; for (jj = iconv; jj < final; jj++) { ptr1 = &u[m * index[ii]]; ptr2 = &u0[m * jj]; for (i = 0; i < m; i++) *ptr2++ = *ptr1++; ptr1 = &v[n * index[ii++]]; ptr2 = &v0[n * jj]; for (i = 0; i < n; i++) *ptr2++ = *ptr1++; } iconv = final; if (k <= 0) { iter -= 1; cont = FALSE; } /* reload unconverged right S-vector */ if (cont) { kk = 0; final = nb + k0; for (jj = 0; jj < final; jj++) { flag = FALSE; ll = 0; while (ll < k0 && !flag) { if (index[ll] != jj) ll++; else flag = TRUE; } if (!flag) { ptr1 = &vv[n * kk]; ptr2 = &v[n * jj]; for (i = 0; i < n; i++) *ptr1++ = *ptr2++; kk++; } } } } else { ptr1 = v; for (jj = 0; jj < nb; jj++) for (i = 0; i < n; i++) vvp[jj][i] = *ptr1++; } if (!cont) break; } /* return all converged S-vectors (right and left) */ ptr1 = u; ptr2 = u0; ptr3 = v; ptr4 = v0; for (j = 0; j < iconv; j++) { for (i = 0; i < m; i++) *ptr1++ = *ptr2++; for (i = 0; i < n; i++) *ptr3++ = *ptr4++; } *iko = ik - k; *ibo = nb; *ico = nc; free(workptr1); free(workptr2); free(index); return(0);}#define ZERO 0.0#define ONE 1.0#define TRANSP 1#define NTRANSP 0extern float *tres, *temp, *uu, *vv, *u0, *v0, *y, *uvtmp, *pp, *qq;extern float **uup, **vvp, **uvtmpp, **yp, **ppp, **qqp;extern float *p, *q, *t, *z, *alpha, *beta;extern int iconv, nn, iter;float ddot(int, float *,int, float *, int);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -