zgsrfs.c

来自「SuperLU is a general purpose library for」· C语言 代码 · 共 461 行 · 第 1/2 页

C
461
字号
    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */    nz     = A->ncol + 1;    eps    = dlamch_("Epsilon");    safmin = dlamch_("Safe minimum");    /* Set SAFE1 essentially to be the underflow threshold times the       number of additions in each row. */    safe1  = nz * safmin;    safe2  = safe1 / eps;    /* Compute the number of nonzeros in each row (or column) of A */    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;    if ( notran ) {	for (k = 0; k < A->ncol; ++k)	    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 		++iwork[Astore->rowind[i]];    } else {	for (k = 0; k < A->ncol; ++k)	    iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];    }	    /* Copy one column of RHS B into Bjcol. */    Bjcol.Stype = B->Stype;    Bjcol.Dtype = B->Dtype;    Bjcol.Mtype = B->Mtype;    Bjcol.nrow  = B->nrow;    Bjcol.ncol  = 1;    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );    if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");    Bjcol_store = Bjcol.Store;    Bjcol_store->lda = ldb;    Bjcol_store->nzval = work; /* address aliasing */	    /* Do for each right hand side ... */    for (j = 0; j < nrhs; ++j) {	count = 0;	lstres = 3.;	Bptr = &Bmat[j*ldb];	Xptr = &Xmat[j*ldx];	while (1) { /* Loop until stopping criterion is satisfied. */	    /* Compute residual R = B - op(A) * X,   	       where op(A) = A, A**T, or A**H, depending on TRANS. */	    #ifdef _CRAY	    CCOPY(&A->nrow, Bptr, &ione, work, &ione);#else	    zcopy_(&A->nrow, Bptr, &ione, work, &ione);#endif	    sp_zgemv(transc, ndone, A, Xptr, ione, done, work, ione);	    /* Compute componentwise relative backward error from formula 	       max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   	       where abs(Z) is the componentwise absolute value of the matrix	       or vector Z.  If the i-th component of the denominator is less	       than SAFE2, then SAFE1 is added to the i-th component of the   	       numerator before dividing. */	    for (i = 0; i < A->nrow; ++i) rwork[i] = z_abs1( &Bptr[i] );	    	    /* Compute abs(op(A))*abs(X) + abs(B). */	    if (notran) {		for (k = 0; k < A->ncol; ++k) {		    xk = z_abs1( &Xptr[k] );		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)			rwork[Astore->rowind[i]] += z_abs1(&Aval[i]) * xk;		}	    } else {		for (k = 0; k < A->ncol; ++k) {		    s = 0.;		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {			irow = Astore->rowind[i];			s += z_abs1(&Aval[i]) * z_abs1(&Xptr[irow]);		    }		    rwork[k] += s;		}	    }	    s = 0.;	    for (i = 0; i < A->nrow; ++i) {		if (rwork[i] > safe2) {		    s = SUPERLU_MAX( s, z_abs1(&work[i]) / rwork[i] );                } else if ( rwork[i] != 0.0 ) {		    s = SUPERLU_MAX( s, (z_abs1(&work[i]) + safe1) / rwork[i] );                }                /* If rwork[i] is exactly 0.0, then we know the true                    residual also must be exactly 0.0. */	    }	    berr[j] = s;	    /* Test stopping criterion. Continue iterating if   	       1) The residual BERR(J) is larger than machine epsilon, and   	       2) BERR(J) decreased by at least a factor of 2 during the   	          last iteration, and   	       3) At most ITMAX iterations tried. */	    if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {		/* Update solution and try again. */		zgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);		#ifdef _CRAY		CAXPY(&A->nrow, &done, work, &ione,		       &Xmat[j*ldx], &ione);#else		zaxpy_(&A->nrow, &done, work, &ione,		       &Xmat[j*ldx], &ione);#endif		lstres = berr[j];		++count;	    } else {		break;	    }        	} /* end while */	stat->RefineSteps = count;	/* Bound error from formula:	   norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   	   ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)             where               norm(Z) is the magnitude of the largest component of Z               inv(op(A)) is the inverse of op(A)               abs(Z) is the componentwise absolute value of the matrix or	       vector Z               NZ is the maximum number of nonzeros in any row of A, plus 1               EPS is machine epsilon             The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))             is incremented by SAFE1 if the i-th component of             abs(op(A))*abs(X) + abs(B) is less than SAFE2.             Use ZLACON to estimate the infinity-norm of the matrix                inv(op(A)) * diag(W),             where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */		for (i = 0; i < A->nrow; ++i) rwork[i] = z_abs1( &Bptr[i] );		/* Compute abs(op(A))*abs(X) + abs(B). */	if ( notran ) {	    for (k = 0; k < A->ncol; ++k) {		xk = z_abs1( &Xptr[k] );		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)		    rwork[Astore->rowind[i]] += z_abs1(&Aval[i]) * xk;	    }	} else {	    for (k = 0; k < A->ncol; ++k) {		s = 0.;		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {		    irow = Astore->rowind[i];		    xk = z_abs1( &Xptr[irow] );		    s += z_abs1(&Aval[i]) * xk;		}		rwork[k] += s;	    }	}		for (i = 0; i < A->nrow; ++i)	    if (rwork[i] > safe2)		rwork[i] = z_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i];	    else		rwork[i] = z_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;	kase = 0;	do {	    zlacon_(&A->nrow, &work[A->nrow], work,		    &ferr[j], &kase);	    if (kase == 0) break;	    if (kase == 1) {		/* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */		if ( notran && colequ )		    for (i = 0; i < A->ncol; ++i) {		        zd_mult(&work[i], &work[i], C[i]);	            }		else if ( !notran && rowequ )		    for (i = 0; i < A->nrow; ++i) {		        zd_mult(&work[i], &work[i], R[i]);                    }		zgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info);				for (i = 0; i < A->nrow; ++i) {		    zd_mult(&work[i], &work[i], rwork[i]);	 	}	    } else {		/* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */		for (i = 0; i < A->nrow; ++i) {		    zd_mult(&work[i], &work[i], rwork[i]);		}				zgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);				if ( notran && colequ )		    for (i = 0; i < A->ncol; ++i) {		        zd_mult(&work[i], &work[i], C[i]);		    }		else if ( !notran && rowequ )		    for (i = 0; i < A->ncol; ++i) {		        zd_mult(&work[i], &work[i], R[i]);  		    }	    }	    	} while ( kase != 0 );	/* Normalize error. */	lstres = 0.; 	if ( notran && colequ ) {	    for (i = 0; i < A->nrow; ++i)	    	lstres = SUPERLU_MAX( lstres, C[i] * z_abs1( &Xptr[i]) );  	} else if ( !notran && rowequ ) {	    for (i = 0; i < A->nrow; ++i)	    	lstres = SUPERLU_MAX( lstres, R[i] * z_abs1( &Xptr[i]) );	} else {	    for (i = 0; i < A->nrow; ++i)	    	lstres = SUPERLU_MAX( lstres, z_abs1( &Xptr[i]) );	}	if ( lstres != 0. )	    ferr[j] /= lstres;    } /* for each RHS j ... */        SUPERLU_FREE(work);    SUPERLU_FREE(rwork);    SUPERLU_FREE(iwork);    SUPERLU_FREE(Bjcol.Store);    return;} /* zgsrfs */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?