📄 zsp_blas2.c
字号:
/* * -- SuperLU routine (version 3.0) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, * and Lawrence Berkeley National Lab. * October 15, 2003 * *//* * File name: zsp_blas2.c * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations. */#include "slu_zdefs.h"/* * Function prototypes */void zusolve(int, int, doublecomplex*, doublecomplex*);void zlsolve(int, int, doublecomplex*, doublecomplex*);void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*);intsp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, SuperMatrix *U, doublecomplex *x, SuperLUStat_t *stat, int *info){/* * Purpose * ======= * * sp_ztrsv() solves one of the systems of equations * A*x = b, or A'*x = b, * where b and x are n element vectors and A is a sparse unit , or * non-unit, upper or lower triangular matrix. * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. * * Parameters * ========== * * uplo - (input) char* * On entry, uplo specifies whether the matrix is an upper or * lower triangular matrix as follows: * uplo = 'U' or 'u' A is an upper triangular matrix. * uplo = 'L' or 'l' A is a lower triangular matrix. * * trans - (input) char* * On entry, trans specifies the equations to be solved as * follows: * trans = 'N' or 'n' A*x = b. * trans = 'T' or 't' A'*x = b. * trans = 'C' or 'c' A^H*x = b. * * diag - (input) char* * On entry, diag specifies whether or not A is unit * triangular as follows: * diag = 'U' or 'u' A is assumed to be unit triangular. * diag = 'N' or 'n' A is not assumed to be unit * triangular. * * L - (input) SuperMatrix* * The factor L from the factorization Pr*A*Pc=L*U. Use * compressed row subscripts storage for supernodes, * i.e., L has types: Stype = SC, Dtype = SLU_Z, Mtype = TRLU. * * U - (input) SuperMatrix* * The factor U from the factorization Pr*A*Pc=L*U. * U has types: Stype = NC, Dtype = SLU_Z, Mtype = TRU. * * x - (input/output) doublecomplex* * Before entry, the incremented array X must contain the n * element right-hand side vector b. On exit, X is overwritten * with the solution vector x. * * info - (output) int* * If *info = -i, the i-th argument had an illegal value. * */#ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), ftcs2 = _cptofcd("N", strlen("N")), ftcs3 = _cptofcd("U", strlen("U"));#endif SCformat *Lstore; NCformat *Ustore; doublecomplex *Lval, *Uval; int incx = 1, incy = 1; doublecomplex temp; doublecomplex alpha = {1.0, 0.0}, beta = {1.0, 0.0}; doublecomplex comp_zero = {0.0, 0.0}; int nrow; int fsupc, nsupr, nsupc, luptr, istart, irow; int i, k, iptr, jcol; doublecomplex *work; flops_t solve_ops; /* Test the input parameters */ *info = 0; if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1; else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) *info = -2; else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3; else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4; else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5; if ( *info ) { i = -(*info); xerbla_("sp_ztrsv", &i); return 0; } Lstore = L->Store; Lval = Lstore->nzval; Ustore = U->Store; Uval = Ustore->nzval; solve_ops = 0; if ( !(work = doublecomplexCalloc(L->nrow)) ) ABORT("Malloc fails for work in sp_ztrsv()."); if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */ if ( lsame_(uplo, "L") ) { /* Form x := inv(L)*x */ if ( L->nrow == 0 ) return 0; /* Quick return */ for (k = 0; k <= Lstore->nsuper; k++) { fsupc = L_FST_SUPC(k); istart = L_SUB_START(fsupc); nsupr = L_SUB_START(fsupc+1) - istart; nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); nrow = nsupr - nsupc; /* 1 z_div costs 10 flops */ solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc; solve_ops += 8 * nrow * nsupc; if ( nsupc == 1 ) { for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) { irow = L_SUB(iptr); ++luptr; zz_mult(&comp_zero, &x[fsupc], &Lval[luptr]); z_sub(&x[irow], &x[irow], &comp_zero); } } else {#ifdef USE_VENDOR_BLAS#ifdef _CRAY CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);#else ztrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); zgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);#endif#else zlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]); zmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc], &x[fsupc], &work[0] );#endif iptr = istart + nsupc; for (i = 0; i < nrow; ++i, ++iptr) { irow = L_SUB(iptr); z_sub(&x[irow], &x[irow], &work[i]); /* Scatter */ work[i] = comp_zero; } } } /* for k ... */ } else { /* Form x := inv(U)*x */ if ( U->nrow == 0 ) return 0; /* Quick return */ for (k = Lstore->nsuper; k >= 0; k--) { fsupc = L_FST_SUPC(k); nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); /* 1 z_div costs 10 flops */ solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc; if ( nsupc == 1 ) { z_div(&x[fsupc], &x[fsupc], &Lval[luptr]); for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) { irow = U_SUB(i); zz_mult(&comp_zero, &x[fsupc], &Uval[i]); z_sub(&x[irow], &x[irow], &comp_zero); } } else {#ifdef USE_VENDOR_BLAS#ifdef _CRAY CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx);#else ztrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx);#endif#else zusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );#endif for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) { irow = U_SUB(i); zz_mult(&comp_zero, &x[jcol], &Uval[i]); z_sub(&x[irow], &x[irow], &comp_zero); } } } } /* for k ... */ } } else if ( lsame_(trans, "T") ) { /* Form x := inv(A')*x */ if ( lsame_(uplo, "L") ) { /* Form x := inv(L')*x */ if ( L->nrow == 0 ) return 0; /* Quick return */ for (k = Lstore->nsuper; k >= 0; --k) { fsupc = L_FST_SUPC(k); istart = L_SUB_START(fsupc); nsupr = L_SUB_START(fsupc+1) - istart; nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); solve_ops += 8 * (nsupr - nsupc) * nsupc; for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { iptr = istart + nsupc; for (i = L_NZ_START(jcol) + nsupc; i < L_NZ_START(jcol+1); i++) { irow = L_SUB(iptr); zz_mult(&comp_zero, &x[irow], &Lval[i]); z_sub(&x[jcol], &x[jcol], &comp_zero); iptr++; } } if ( nsupc > 1 ) { solve_ops += 4 * nsupc * (nsupc - 1);#ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd("T", strlen("T")); ftcs3 = _cptofcd("U", strlen("U")); CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx);#else ztrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx);#endif } } } else { /* Form x := inv(U')*x */ if ( U->nrow == 0 ) return 0; /* Quick return */ for (k = 0; k <= Lstore->nsuper; k++) { fsupc = L_FST_SUPC(k); nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) { irow = U_SUB(i); zz_mult(&comp_zero, &x[irow], &Uval[i]); z_sub(&x[jcol], &x[jcol], &comp_zero); } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -