📄 pdgssvx.c

📁 LU分解求解矩阵方程组的解
💻 C
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
	    stat->utime[SYMBFAC] = SuperLU_timer_() - t;	    if ( iinfo < 0 ) { /* Successful return */		QuerySpace_dist(n, -iinfo, Glu_freeable, &symb_mem_usage);#if ( PRNTlevel>=1 )		if ( !iam ) {		    printf("\tNo of supers %ld\n", Glu_persist->supno[n-1]+1);		    printf("\tSize of G(L) %ld\n", Glu_freeable->xlsub[n]);		    printf("\tSize of G(U) %ld\n", Glu_freeable->xusub[n]);		    printf("\tint %d, short %d, float %d, double %d\n", 			   sizeof(int_t), sizeof(short), sizeof(float),			   sizeof(double));		    printf("\tSYMBfact (MB):\tL\\U %.2f\ttotal %.2f\texpansions %d\n",			   symb_mem_usage.for_lu*1e-6, 			   symb_mem_usage.total*1e-6,			   symb_mem_usage.expansions);		}#endif	    } else {		if ( !iam ) {		    fprintf(stderr, "symbfact() error returns %d\n", iinfo);		    exit(-1);		}	    }	} /* end if Fact ... */	/* Apply column permutation to the original distributed A */	for (j = 0; j < nnz_loc; ++j) colind[j] = perm_c[colind[j]];	/* Distribute Pc*Pr*diag(R)*A*diag(C)*Pc' into L and U storage. 	   NOTE: the row permutation Pc*Pr is applied internally in the	   distribution routine. */	t = SuperLU_timer_();	dist_mem_use = pddistribute(Fact, n, A, ScalePermstruct,                                  Glu_freeable, LUstruct, grid);	stat->utime[DIST] = SuperLU_timer_() - t;	/* Deallocate storage used in symbolic factorization. */	if ( Fact != SamePattern_SameRowPerm ) {	    iinfo = symbfact_SubFree(Glu_freeable);	    SUPERLU_FREE(Glu_freeable);	}	/* Perform numerical factorization in parallel. */	t = SuperLU_timer_();	pdgstrf(options, m, n, anorm, LUstruct, grid, stat, info);	stat->utime[FACT] = SuperLU_timer_() - t;#if ( PRNTlevel>=1 )	{	    int_t TinyPivots;	    float for_lu, total, max, avg, temp;	    dQuerySpace_dist(n, LUstruct, grid, &num_mem_usage);	    MPI_Reduce( &num_mem_usage.for_lu, &for_lu,		       1, MPI_FLOAT, MPI_SUM, 0, grid->comm );	    MPI_Reduce( &num_mem_usage.total, &total,		       1, MPI_FLOAT, MPI_SUM, 0, grid->comm );	    temp = SUPERLU_MAX(symb_mem_usage.total,			       symb_mem_usage.for_lu +			       (float)dist_mem_use + num_mem_usage.for_lu);	    temp = SUPERLU_MAX(temp, num_mem_usage.total);	    MPI_Reduce( &temp, &max,		       1, MPI_FLOAT, MPI_MAX, 0, grid->comm );	    MPI_Reduce( &temp, &avg,		       1, MPI_FLOAT, MPI_SUM, 0, grid->comm );	    MPI_Allreduce( &stat->TinyPivots, &TinyPivots, 1, mpi_int_t,			  MPI_SUM, grid->comm );	    stat->TinyPivots = TinyPivots;	    if ( !iam ) {		printf("\tNUMfact (MB) all PEs:\tL\\U\t%.2f\tall\t%.2f\n",		       for_lu*1e-6, total*1e-6);		printf("\tAll space (MB):"		       "\t\ttotal\t%.2f\tAvg\t%.2f\tMax\t%.2f\n",		       avg*1e-6, avg/grid->nprow/grid->npcol*1e-6, max*1e-6);		printf("\tNumber of tiny pivots: %10d\n", stat->TinyPivots);	    }	}#endif            /* Destroy GA */        if ( Fact != SamePattern_SameRowPerm )            Destroy_CompCol_Matrix_dist(&GA);    } /* end if (!factored) */	    /* ------------------------------------------------------------       Compute the solution matrix X.       ------------------------------------------------------------*/    if ( nrhs ) {	if ( !(b_work = doubleMalloc_dist(n)) )	    ABORT("Malloc fails for b_work[]");	/* ------------------------------------------------------------	   Scale the right-hand side if equilibration was performed. 	   ------------------------------------------------------------*/	if ( notran ) {	    if ( rowequ ) {		b_col = B;		for (j = 0; j < nrhs; ++j) {		    irow = fst_row;		    for (i = 0; i < m_loc; ++i) {		        b_col[i] *= R[irow];		        ++irow;		    }		    b_col += ldb;		}	    }	} else if ( colequ ) {	    b_col = B;	    for (j = 0; j < nrhs; ++j) {	        irow = fst_row;		for (i = 0; i < m_loc; ++i) {		    b_col[i] *= C[irow];		    ++irow;		}		b_col += ldb;	    }	}	/* Save a copy of the right-hand side. */	ldx = ldb;	if ( !(X = doubleMalloc_dist(((size_t)ldx) * nrhs)) )	    ABORT("Malloc fails for X[]");	x_col = X;  b_col = B;	for (j = 0; j < nrhs; ++j) {	    for (i = 0; i < m_loc; ++i) x_col[i] = b_col[i];	    x_col += ldx;  b_col += ldb;	}	/* ------------------------------------------------------------	   Solve the linear system.	   ------------------------------------------------------------*/	if ( options->SolveInitialized == NO ) {	    dSolveInit(options, A, perm_r, perm_c, nrhs, LUstruct, grid,		       SOLVEstruct);	}	pdgstrs(n, LUstruct, ScalePermstruct, grid, X, m_loc, 		fst_row, ldb, nrhs, SOLVEstruct, stat, info);	/* ------------------------------------------------------------	   Use iterative refinement to improve the computed solution and	   compute error bounds and backward error estimates for it.	   ------------------------------------------------------------*/	if ( options->IterRefine ) {	    /* Improve the solution by iterative refinement. */	    int_t *it, *colind_gsmv = SOLVEstruct->A_colind_gsmv;	    SOLVEstruct_t *SOLVEstruct1;  /* Used by refinement. */	    t = SuperLU_timer_();	    if ( options->RefineInitialized == NO || Fact == DOFACT ) {	        /* All these cases need to re-initialize gsmv structure */	        if ( options->RefineInitialized )		    pdgsmv_finalize(SOLVEstruct->gsmv_comm);	        pdgsmv_init(A, SOLVEstruct->row_to_proc, grid,			    SOLVEstruct->gsmv_comm);	                       /* Save a copy of the transformed local col indices		   in colind_gsmv[]. */	        if ( colind_gsmv ) SUPERLU_FREE(colind_gsmv);	        if ( !(it = intMalloc_dist(nnz_loc)) )		    ABORT("Malloc fails for colind_gsmv[]");	        colind_gsmv = SOLVEstruct->A_colind_gsmv = it;	        for (i = 0; i < nnz_loc; ++i) colind_gsmv[i] = colind[i];	        options->RefineInitialized = YES;	    } else if ( Fact == SamePattern ||			Fact == SamePattern_SameRowPerm ) {	        double at;	        int_t k, jcol, p;	        /* Swap to beginning the part of A corresponding to the		   local part of X, as was done in pdgsmv_init() */	        for (i = 0; i < m_loc; ++i) { /* Loop through each row */		    k = rowptr[i];		    for (j = rowptr[i]; j < rowptr[i+1]; ++j) {		        jcol = colind[j];		        p = SOLVEstruct->row_to_proc[jcol];		        if ( p == iam ) { /* Local */		            at = a[k]; a[k] = a[j]; a[j] = at;		            ++k;		        }		    }	        }	      	        /* Re-use the local col indices of A obtained from the		   previous call to pdgsmv_init() */	        for (i = 0; i < nnz_loc; ++i) colind[i] = colind_gsmv[i];	    }	    if ( nrhs == 1 ) { /* Use the existing solve structure */	        SOLVEstruct1 = SOLVEstruct;	    } else { /* For nrhs > 1, since refinement is performed for RHS			one at a time, the communication structure for pdgstrs			is different than the solve with nrhs RHS. 			So we use SOLVEstruct1 for the refinement step.		     */	        if ( !(SOLVEstruct1 = (SOLVEstruct_t *) 		                       SUPERLU_MALLOC(sizeof(SOLVEstruct_t))) )		    ABORT("Malloc fails for SOLVEstruct1");	        /* Copy the same stuff */	        SOLVEstruct1->row_to_proc = SOLVEstruct->row_to_proc;	        SOLVEstruct1->inv_perm_c = SOLVEstruct->inv_perm_c;	        SOLVEstruct1->num_diag_procs = SOLVEstruct->num_diag_procs;	        SOLVEstruct1->diag_procs = SOLVEstruct->diag_procs;	        SOLVEstruct1->diag_len = SOLVEstruct->diag_len;	        SOLVEstruct1->gsmv_comm = SOLVEstruct->gsmv_comm;	        SOLVEstruct1->A_colind_gsmv = SOLVEstruct->A_colind_gsmv;				/* Initialize the *gstrs_comm for 1 RHS. */		if ( !(SOLVEstruct1->gstrs_comm = (pxgstrs_comm_t *)		       SUPERLU_MALLOC(sizeof(pxgstrs_comm_t))) )		    ABORT("Malloc fails for gstrs_comm[]");		pxgstrs_init(n, m_loc, 1, fst_row, perm_r, perm_c, grid, 			     Glu_persist, SOLVEstruct1);	    }	    pdgsrfs(n, A, anorm, LUstruct, ScalePermstruct, grid,		    B, ldb, X, ldx, nrhs, SOLVEstruct1, berr, stat, info);            /* Deallocate the storage associated with SOLVEstruct1 */	    if ( nrhs > 1 ) {	        pxgstrs_finalize(SOLVEstruct1->gstrs_comm);	        SUPERLU_FREE(SOLVEstruct1);	    }	    stat->utime[REFINE] = SuperLU_timer_() - t;	}	/* Permute the solution matrix B <= Pc'*X. */	pdPermute_Dense_Matrix(fst_row, m_loc, SOLVEstruct->row_to_proc,			       SOLVEstruct->inv_perm_c,			       X, ldx, B, ldb, nrhs, grid);#if ( DEBUGlevel>=2 )	printf("\n (%d) .. After pdPermute_Dense_Matrix(): b =\n", iam);	for (i = 0; i < m_loc; ++i)	  printf("\t(%d)\t%4d\t%.10f\n", iam, i+fst_row, B[i]);#endif		/* Transform the solution matrix X to a solution of the original	   system before the equilibration. */	if ( notran ) {	    if ( colequ ) {		b_col = B;		for (j = 0; j < nrhs; ++j) {		    irow = fst_row;		    for (i = 0; i < m_loc; ++i) {		        b_col[i] *= C[irow];		        ++irow;		    }		    b_col += ldb;		}	    }	} else if ( rowequ ) {	    b_col = B;	    for (j = 0; j < nrhs; ++j) {	        irow = fst_row;		for (i = 0; i < m_loc; ++i) {		    b_col[i] *= R[irow];		    ++irow;		}		b_col += ldb;	    }	}	SUPERLU_FREE(b_work);	SUPERLU_FREE(X);    } /* end if nrhs != 0 */#if ( PRNTlevel>=1 )    if ( !iam ) printf(".. DiagScale = %d\n", ScalePermstruct->DiagScale);#endif    /* Deallocate R and/or C if it was not used. */    if ( Equil && Fact != SamePattern_SameRowPerm ) {	switch ( ScalePermstruct->DiagScale ) {	    case NOEQUIL:	        SUPERLU_FREE(R);		SUPERLU_FREE(C);		break;	    case ROW: 		SUPERLU_FREE(C);		break;	    case COL: 		SUPERLU_FREE(R);		break;	}    }    if ( !factored && Fact != SamePattern_SameRowPerm ) 	Destroy_CompCol_Permuted_dist(&GAC);#if ( DEBUGlevel>=1 )    CHECK_MALLOC(iam, "Exit pdgssvx()");#endif}
上一页 1 2 34
💿 文件大小 1005 K
👤 上传用户 fpeisheng
📂 所属分类数值算法/人工智能
🏷️ 相关标签

#分解 #矩阵 #方程
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -