csp_blas2.c

来自「SuperLU is a general purpose library for」· C语言 代码 · 共 574 行 · 第 1/2 页

C
574
字号
                /* 1 c_div costs 10 flops */		solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;		if ( nsupc == 1 ) {		    c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);		} else {#ifdef _CRAY                    ftcs1 = _cptofcd("U", strlen("U"));                    ftcs2 = _cptofcd("T", strlen("T"));                    ftcs3 = _cptofcd("N", strlen("N"));		    CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,			    &x[fsupc], &incx);#else		    ctrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,			    &x[fsupc], &incx);#endif		}	    } /* for k ... */	}    } else { /* Form x := conj(inv(A'))*x */		if ( lsame_(uplo, "L") ) {	    /* Form x := conj(inv(L'))*x */    	    if ( L->nrow == 0 ) return 0; /* Quick return */	    	    for (k = Lstore->nsuper; k >= 0; --k) {	    	fsupc = L_FST_SUPC(k);	    	istart = L_SUB_START(fsupc);	    	nsupr = L_SUB_START(fsupc+1) - istart;	    	nsupc = L_FST_SUPC(k+1) - fsupc;	    	luptr = L_NZ_START(fsupc);		solve_ops += 8 * (nsupr - nsupc) * nsupc;		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {		    iptr = istart + nsupc;		    for (i = L_NZ_START(jcol) + nsupc; 				i < L_NZ_START(jcol+1); i++) {			irow = L_SUB(iptr);                        cc_conj(&temp, &Lval[i]);			cc_mult(&comp_zero, &x[irow], &temp);		    	c_sub(&x[jcol], &x[jcol], &comp_zero);			iptr++;		    } 		} 		 		if ( nsupc > 1 ) {		    solve_ops += 4 * nsupc * (nsupc - 1);#ifdef _CRAY                    ftcs1 = _cptofcd("L", strlen("L"));                    ftcs2 = _cptofcd(trans, strlen("T"));                    ftcs3 = _cptofcd("U", strlen("U"));		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,			&x[fsupc], &incx);#else                    ctrsv_("L", trans, "U", &nsupc, &Lval[luptr], &nsupr,                           &x[fsupc], &incx);#endif		}	    }	} else {	    /* Form x := conj(inv(U'))*x */	    if ( U->nrow == 0 ) return 0; /* Quick return */	    	    for (k = 0; k <= Lstore->nsuper; k++) {	    	fsupc = L_FST_SUPC(k);	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);	    	nsupc = L_FST_SUPC(k+1) - fsupc;	    	luptr = L_NZ_START(fsupc);		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {		    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {			irow = U_SUB(i);                        cc_conj(&temp, &Uval[i]);			cc_mult(&comp_zero, &x[irow], &temp);		    	c_sub(&x[jcol], &x[jcol], &comp_zero);		    }		}                /* 1 c_div costs 10 flops */		solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc; 		if ( nsupc == 1 ) {                    cc_conj(&temp, &Lval[luptr]);		    c_div(&x[fsupc], &x[fsupc], &temp);		} else {#ifdef _CRAY                    ftcs1 = _cptofcd("U", strlen("U"));                    ftcs2 = _cptofcd(trans, strlen("T"));                    ftcs3 = _cptofcd("N", strlen("N"));		    CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,			    &x[fsupc], &incx);#else                    ctrsv_("U", trans, "N", &nsupc, &Lval[luptr], &nsupr,                               &x[fsupc], &incx);#endif  		}  	    } /* for k ... */  	}    }    stat->ops[SOLVE] += solve_ops;    SUPERLU_FREE(work);    return 0;}/*! \brief Performs one of the matrix-vector operations y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y * * <pre>   *   Purpose    *   =======    * *   sp_cgemv()  performs one of the matrix-vector operations    *      y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,    *   where alpha and beta are scalars, x and y are vectors and A is a *   sparse A->nrow by A->ncol matrix.    * *   Parameters    *   ==========    * *   TRANS  - (input) char* *            On entry, TRANS specifies the operation to be performed as    *            follows:    *               TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.    *               TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.    *               TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.    * *   ALPHA  - (input) complex *            On entry, ALPHA specifies the scalar alpha.    * *   A      - (input) SuperMatrix* *            Before entry, the leading m by n part of the array A must    *            contain the matrix of coefficients.    * *   X      - (input) complex*, array of DIMENSION at least    *            ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'    *           and at least    *            ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.    *            Before entry, the incremented array X must contain the    *            vector x.    *  *   INCX   - (input) int *            On entry, INCX specifies the increment for the elements of    *            X. INCX must not be zero.    * *   BETA   - (input) complex *            On entry, BETA specifies the scalar beta. When BETA is    *            supplied as zero then Y need not be set on input.    * *   Y      - (output) complex*,  array of DIMENSION at least    *            ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'    *            and at least    *            ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.    *            Before entry with BETA non-zero, the incremented array Y    *            must contain the vector y. On exit, Y is overwritten by the  *            updated vector y. *	       *   INCY   - (input) int *            On entry, INCY specifies the increment for the elements of    *            Y. INCY must not be zero.    * *    ==== Sparse Level 2 Blas routine.    * </pre>*/intsp_cgemv(char *trans, complex alpha, SuperMatrix *A, complex *x, 	 int incx, complex beta, complex *y, int incy){    /* Local variables */    NCformat *Astore;    complex   *Aval;    int info;    complex temp, temp1;    int lenx, leny, i, j, irow;    int iy, jx, jy, kx, ky;    int notran;    complex comp_zero = {0.0, 0.0};    complex comp_one = {1.0, 0.0};    notran = lsame_(trans, "N");    Astore = A->Store;    Aval = Astore->nzval;        /* Test the input parameters */    info = 0;    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;    else if (incx == 0) info = 5;    else if (incy == 0)	info = 8;    if (info != 0) {	xerbla_("sp_cgemv ", &info);	return 0;    }    /* Quick return if possible. */    if (A->nrow == 0 || A->ncol == 0 || 	c_eq(&alpha, &comp_zero) && 	c_eq(&beta, &comp_one))	return 0;    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set        up the start points in  X  and  Y. */    if (lsame_(trans, "N")) {	lenx = A->ncol;	leny = A->nrow;    } else {	lenx = A->nrow;	leny = A->ncol;    }    if (incx > 0) kx = 0;    else kx =  - (lenx - 1) * incx;    if (incy > 0) ky = 0;    else ky =  - (leny - 1) * incy;    /* Start the operations. In this version the elements of A are          accessed sequentially with one pass through A. */    /* First form  y := beta*y. */    if ( !c_eq(&beta, &comp_one) ) {	if (incy == 1) {	    if ( c_eq(&beta, &comp_zero) )		for (i = 0; i < leny; ++i) y[i] = comp_zero;	    else		for (i = 0; i < leny; ++i) 		  cc_mult(&y[i], &beta, &y[i]);	} else {	    iy = ky;	    if ( c_eq(&beta, &comp_zero) )		for (i = 0; i < leny; ++i) {		    y[iy] = comp_zero;		    iy += incy;		}	    else		for (i = 0; i < leny; ++i) {		    cc_mult(&y[iy], &beta, &y[iy]);		    iy += incy;		}	}    }        if ( c_eq(&alpha, &comp_zero) ) return 0;    if ( notran ) {	/* Form  y := alpha*A*x + y. */	jx = kx;	if (incy == 1) {	    for (j = 0; j < A->ncol; ++j) {		if ( !c_eq(&x[jx], &comp_zero) ) {		    cc_mult(&temp, &alpha, &x[jx]);		    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {			irow = Astore->rowind[i];			cc_mult(&temp1, &temp,  &Aval[i]);			c_add(&y[irow], &y[irow], &temp1);		    }		}		jx += incx;	    }	} else {	    ABORT("Not implemented.");	}    } else {	/* Form  y := alpha*A'*x + y. */	jy = ky;	if (incx == 1) {	    for (j = 0; j < A->ncol; ++j) {		temp = comp_zero;		for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {		    irow = Astore->rowind[i];		    cc_mult(&temp1, &Aval[i], &x[irow]);		    c_add(&temp, &temp, &temp1);		}		cc_mult(&temp1, &alpha, &temp);		c_add(&y[jy], &y[jy], &temp1);		jy += incy;	    }	} else {	    ABORT("Not implemented.");	}    }    return 0;    } /* sp_cgemv */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?