📄 csp_blas2.c
字号:
/* 1 c_div costs 10 flops */ solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc; if ( nsupc == 1 ) { c_div(&x[fsupc], &x[fsupc], &Lval[luptr]); } else {#ifdef _CRAY ftcs1 = _cptofcd("U", strlen("U")); ftcs2 = _cptofcd("T", strlen("T")); ftcs3 = _cptofcd("N", strlen("N")); CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx);#else ctrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx);#endif } } /* for k ... */ } } else { /* Form x := conj(inv(A'))*x */ if ( lsame_(uplo, "L") ) { /* Form x := conj(inv(L'))*x */ if ( L->nrow == 0 ) return 0; /* Quick return */ for (k = Lstore->nsuper; k >= 0; --k) { fsupc = L_FST_SUPC(k); istart = L_SUB_START(fsupc); nsupr = L_SUB_START(fsupc+1) - istart; nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); solve_ops += 8 * (nsupr - nsupc) * nsupc; for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { iptr = istart + nsupc; for (i = L_NZ_START(jcol) + nsupc; i < L_NZ_START(jcol+1); i++) { irow = L_SUB(iptr); cc_conj(&temp, &Lval[i]); cc_mult(&comp_zero, &x[irow], &temp); c_sub(&x[jcol], &x[jcol], &comp_zero); iptr++; } } if ( nsupc > 1 ) { solve_ops += 4 * nsupc * (nsupc - 1);#ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd(trans, strlen("T")); ftcs3 = _cptofcd("U", strlen("U")); CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx);#else ctrsv_("L", trans, "U", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx);#endif } } } else { /* Form x := conj(inv(U'))*x */ if ( U->nrow == 0 ) return 0; /* Quick return */ for (k = 0; k <= Lstore->nsuper; k++) { fsupc = L_FST_SUPC(k); nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) { irow = U_SUB(i); cc_conj(&temp, &Uval[i]); cc_mult(&comp_zero, &x[irow], &temp); c_sub(&x[jcol], &x[jcol], &comp_zero); } } /* 1 c_div costs 10 flops */ solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc; if ( nsupc == 1 ) { cc_conj(&temp, &Lval[luptr]); c_div(&x[fsupc], &x[fsupc], &temp); } else {#ifdef _CRAY ftcs1 = _cptofcd("U", strlen("U")); ftcs2 = _cptofcd(trans, strlen("T")); ftcs3 = _cptofcd("N", strlen("N")); CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx);#else ctrsv_("U", trans, "N", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx);#endif } } /* for k ... */ } } stat->ops[SOLVE] += solve_ops; SUPERLU_FREE(work); return 0;}intsp_cgemv(char *trans, complex alpha, SuperMatrix *A, complex *x, int incx, complex beta, complex *y, int incy){/* Purpose ======= sp_cgemv() performs one of the matrix-vector operations y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, where alpha and beta are scalars, x and y are vectors and A is a sparse A->nrow by A->ncol matrix. Parameters ========== TRANS - (input) char* On entry, TRANS specifies the operation to be performed as follows: TRANS = 'N' or 'n' y := alpha*A*x + beta*y. TRANS = 'T' or 't' y := alpha*A'*x + beta*y. TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. ALPHA - (input) complex On entry, ALPHA specifies the scalar alpha. A - (input) SuperMatrix* Before entry, the leading m by n part of the array A must contain the matrix of coefficients. X - (input) complex*, array of DIMENSION at least ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' and at least ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. Before entry, the incremented array X must contain the vector x. INCX - (input) int On entry, INCX specifies the increment for the elements of X. INCX must not be zero. BETA - (input) complex On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. Y - (output) complex*, array of DIMENSION at least ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' and at least ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. Before entry with BETA non-zero, the incremented array Y must contain the vector y. On exit, Y is overwritten by the updated vector y. INCY - (input) int On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. ==== Sparse Level 2 Blas routine. */ /* Local variables */ NCformat *Astore; complex *Aval; int info; complex temp, temp1; int lenx, leny, i, j, irow; int iy, jx, jy, kx, ky; int notran; complex comp_zero = {0.0, 0.0}; complex comp_one = {1.0, 0.0}; notran = lsame_(trans, "N"); Astore = A->Store; Aval = Astore->nzval; /* Test the input parameters */ info = 0; if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1; else if ( A->nrow < 0 || A->ncol < 0 ) info = 3; else if (incx == 0) info = 5; else if (incy == 0) info = 8; if (info != 0) { xerbla_("sp_cgemv ", &info); return 0; } /* Quick return if possible. */ if (A->nrow == 0 || A->ncol == 0 || c_eq(&alpha, &comp_zero) && c_eq(&beta, &comp_one)) return 0; /* Set LENX and LENY, the lengths of the vectors x and y, and set up the start points in X and Y. */ if (lsame_(trans, "N")) { lenx = A->ncol; leny = A->nrow; } else { lenx = A->nrow; leny = A->ncol; } if (incx > 0) kx = 0; else kx = - (lenx - 1) * incx; if (incy > 0) ky = 0; else ky = - (leny - 1) * incy; /* Start the operations. In this version the elements of A are accessed sequentially with one pass through A. */ /* First form y := beta*y. */ if ( !c_eq(&beta, &comp_one) ) { if (incy == 1) { if ( c_eq(&beta, &comp_zero) ) for (i = 0; i < leny; ++i) y[i] = comp_zero; else for (i = 0; i < leny; ++i) cc_mult(&y[i], &beta, &y[i]); } else { iy = ky; if ( c_eq(&beta, &comp_zero) ) for (i = 0; i < leny; ++i) { y[iy] = comp_zero; iy += incy; } else for (i = 0; i < leny; ++i) { cc_mult(&y[iy], &beta, &y[iy]); iy += incy; } } } if ( c_eq(&alpha, &comp_zero) ) return 0; if ( notran ) { /* Form y := alpha*A*x + y. */ jx = kx; if (incy == 1) { for (j = 0; j < A->ncol; ++j) { if ( !c_eq(&x[jx], &comp_zero) ) { cc_mult(&temp, &alpha, &x[jx]); for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; cc_mult(&temp1, &temp, &Aval[i]); c_add(&y[irow], &y[irow], &temp1); } } jx += incx; } } else { ABORT("Not implemented."); } } else { /* Form y := alpha*A'*x + y. */ jy = ky; if (incx == 1) { for (j = 0; j < A->ncol; ++j) { temp = comp_zero; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; cc_mult(&temp1, &Aval[i], &x[irow]); c_add(&temp, &temp, &temp1); } cc_mult(&temp1, &alpha, &temp); c_add(&y[jy], &y[jy], &temp1); jy += incy; } } else { ABORT("Not implemented."); } } return 0; } /* sp_cgemv */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -