pzgssvx.c

来自「SuperLU 2.2版本。对大型、稀疏、非对称的线性系统的直接求解」· C语言代码 · 共 1,281 行 · 第 1/4 页
1,281 行
		    if ( !iam ) {		        fprintf(stderr,"symbfact() error returns %d\n",iinfo);		    	exit(-1);		    }	        }	    } /* end if serial symbolic factorization */	    else {  /* parallel symbolic factorization */	    	t = SuperLU_timer_();	    	flinfo = symbfact_dist(nprocs_num, noDomains, A, perm_c, perm_r,				       sizes, fstVtxSep, &Pslu_freeable, 				       &(grid->comm), &symb_comm,				       &symb_mem_usage); 	    	stat->utime[SYMBFAC] = SuperLU_timer_() - t;	    	if (flinfo > 0) 	      	    ABORT("Insufficient memory for parallel symbolic factorization.");	    }	} /* end if Fact ... */	if (!iam) printf("\tSYMBfact time: %.2f\n", stat->utime[SYMBFAC]);        if (sizes) SUPERLU_FREE (sizes);        if (fstVtxSep) SUPERLU_FREE (fstVtxSep);	if (symb_comm != MPI_COMM_NULL)	  MPI_Comm_free (&symb_comm); 	if (parSymbFact == NO || Fact == SamePattern_SameRowPerm) {  	    /* Apply column permutation to the original distributed A */	    for (j = 0; j < nnz_loc; ++j) colind[j] = perm_c[colind[j]];	    /* Distribute Pc*Pr*diag(R)*A*diag(C)*Pc' into L and U storage. 	       NOTE: the row permutation Pc*Pr is applied internally in the  	       distribution routine. */	    t = SuperLU_timer_();	    dist_mem_use = pzdistribute(Fact, n, A, ScalePermstruct,                                      Glu_freeable, LUstruct, grid);	    stat->utime[DIST] = SuperLU_timer_() - t;  	    /* Deallocate storage used in symbolic factorization. */	    if ( Fact != SamePattern_SameRowPerm ) {	        iinfo = symbfact_SubFree(Glu_freeable);	        SUPERLU_FREE(Glu_freeable);	    }	} else {	    /* Distribute Pc*Pr*diag(R)*A*diag(C)*Pc' into L and U storage. 	       NOTE: the row permutation Pc*Pr is applied internally in the	       distribution routine. */	    /* Apply column permutation to the original distributed A */	    for (j = 0; j < nnz_loc; ++j) colind[j] = perm_c[colind[j]];    	    t = SuperLU_timer_();	    dist_mem_use = zdist_psymbtonum(Fact, n, A, ScalePermstruct,		  			   &Pslu_freeable, LUstruct, grid);	    if (dist_mem_use > 0)	        ABORT ("Not enough memory available for dist_psymbtonum\n");	    stat->utime[DIST] = SuperLU_timer_() - t;	}	if (!iam) printf ("\tDISTRIBUTE time    %8.2f\n", stat->utime[DIST]);	/* Perform numerical factorization in parallel. */	t = SuperLU_timer_();	pzgstrf(options, m, n, anorm, LUstruct, grid, stat, info);	stat->utime[FACT] = SuperLU_timer_() - t;#if ( PRNTlevel>=1 )	{	    int_t TinyPivots;	    float for_lu, total, max, avg, temp;	    zQuerySpace_dist(n, LUstruct, grid, &num_mem_usage);	    MPI_Reduce( &num_mem_usage.for_lu, &for_lu,		       1, MPI_FLOAT, MPI_SUM, 0, grid->comm );	    MPI_Reduce( &num_mem_usage.total, &total,		       1, MPI_FLOAT, MPI_SUM, 0, grid->comm );	    temp = SUPERLU_MAX(symb_mem_usage.total,			       symb_mem_usage.for_lu +			       (float)dist_mem_use + num_mem_usage.for_lu);	    if (parSymbFact == TRUE)	      /* The memory used in the redistribution routine		 includes the memory used for storing the symbolic		 structure and the memory allocated for numerical		 factorization */	      temp = SUPERLU_MAX(symb_mem_usage.total,				 (float)dist_mem_use);	    temp = SUPERLU_MAX(temp, num_mem_usage.total);	    MPI_Reduce( &temp, &max,		       1, MPI_FLOAT, MPI_MAX, 0, grid->comm );	    MPI_Reduce( &temp, &avg,		       1, MPI_FLOAT, MPI_SUM, 0, grid->comm );	    MPI_Allreduce( &stat->TinyPivots, &TinyPivots, 1, mpi_int_t,			  MPI_SUM, grid->comm );	    stat->TinyPivots = TinyPivots;	    if ( !iam ) {		printf("\tNUMfact (MB) all PEs:\tL\\U\t%.2f\tall\t%.2f\n",		       for_lu*1e-6, total*1e-6);		printf("\tAll space (MB):"		       "\t\ttotal\t%.2f\tAvg\t%.2f\tMax\t%.2f\n",		       avg*1e-6, avg/grid->nprow/grid->npcol*1e-6, max*1e-6);		printf("\tNumber of tiny pivots: %10d\n", stat->TinyPivots);	    }	}#endif            /* Destroy GA */        if ( Fact != SamePattern_SameRowPerm )            Destroy_CompCol_Matrix_dist(&GA);    } /* end if (!factored) */	    /* ------------------------------------------------------------       Compute the solution matrix X.       ------------------------------------------------------------*/    if ( nrhs ) {	if ( !(b_work = doublecomplexMalloc_dist(n)) )	    ABORT("Malloc fails for b_work[]");	/* ------------------------------------------------------------	   Scale the right-hand side if equilibration was performed. 	   ------------------------------------------------------------*/	if ( notran ) {	    if ( rowequ ) {		b_col = B;		for (j = 0; j < nrhs; ++j) {		    irow = fst_row;		    for (i = 0; i < m_loc; ++i) {                        zd_mult(&b_col[i], &b_col[i], R[irow]);		        ++irow;		    }		    b_col += ldb;		}	    }	} else if ( colequ ) {	    b_col = B;	    for (j = 0; j < nrhs; ++j) {	        irow = fst_row;		for (i = 0; i < m_loc; ++i) {		    zd_mult(&b_col[i], &b_col[i], C[irow]);		    ++irow;		}		b_col += ldb;	    }	}	/* Save a copy of the right-hand side. */	ldx = ldb;	if ( !(X = doublecomplexMalloc_dist(((size_t)ldx) * nrhs)) )	    ABORT("Malloc fails for X[]");	x_col = X;  b_col = B;	for (j = 0; j < nrhs; ++j) {	    for (i = 0; i < m_loc; ++i) x_col[i] = b_col[i];	    x_col += ldx;  b_col += ldb;	}	/* ------------------------------------------------------------	   Solve the linear system.	   ------------------------------------------------------------*/	if ( options->SolveInitialized == NO ) {	    zSolveInit(options, A, perm_r, perm_c, nrhs, LUstruct, grid,		       SOLVEstruct);	}	pzgstrs(n, LUstruct, ScalePermstruct, grid, X, m_loc, 		fst_row, ldb, nrhs, SOLVEstruct, stat, info);	/* ------------------------------------------------------------	   Use iterative refinement to improve the computed solution and	   compute error bounds and backward error estimates for it.	   ------------------------------------------------------------*/	if ( options->IterRefine ) {	    /* Improve the solution by iterative refinement. */	    int_t *it, *colind_gsmv = SOLVEstruct->A_colind_gsmv;	    SOLVEstruct_t *SOLVEstruct1;  /* Used by refinement. */	    t = SuperLU_timer_();	    if ( options->RefineInitialized == NO || Fact == DOFACT ) {	        /* All these cases need to re-initialize gsmv structure */	        if ( options->RefineInitialized )		    pzgsmv_finalize(SOLVEstruct->gsmv_comm);	        pzgsmv_init(A, SOLVEstruct->row_to_proc, grid,			    SOLVEstruct->gsmv_comm);	                       /* Save a copy of the transformed local col indices		   in colind_gsmv[]. */	        if ( colind_gsmv ) SUPERLU_FREE(colind_gsmv);	        if ( !(it = intMalloc_dist(nnz_loc)) )		    ABORT("Malloc fails for colind_gsmv[]");	        colind_gsmv = SOLVEstruct->A_colind_gsmv = it;	        for (i = 0; i < nnz_loc; ++i) colind_gsmv[i] = colind[i];	        options->RefineInitialized = YES;	    } else if ( Fact == SamePattern ||			Fact == SamePattern_SameRowPerm ) {	        doublecomplex at;	        int_t k, jcol, p;	        /* Swap to beginning the part of A corresponding to the		   local part of X, as was done in pzgsmv_init() */	        for (i = 0; i < m_loc; ++i) { /* Loop through each row */		    k = rowptr[i];		    for (j = rowptr[i]; j < rowptr[i+1]; ++j) {		        jcol = colind[j];		        p = SOLVEstruct->row_to_proc[jcol];		        if ( p == iam ) { /* Local */		            at = a[k]; a[k] = a[j]; a[j] = at;		            ++k;		        }		    }	        }	      	        /* Re-use the local col indices of A obtained from the		   previous call to pzgsmv_init() */	        for (i = 0; i < nnz_loc; ++i) colind[i] = colind_gsmv[i];	    }	    if ( nrhs == 1 ) { /* Use the existing solve structure */	        SOLVEstruct1 = SOLVEstruct;	    } else { /* For nrhs > 1, since refinement is performed for RHS			one at a time, the communication structure for pdgstrs			is different than the solve with nrhs RHS. 			So we use SOLVEstruct1 for the refinement step.		     */	        if ( !(SOLVEstruct1 = (SOLVEstruct_t *) 		                       SUPERLU_MALLOC(sizeof(SOLVEstruct_t))) )		    ABORT("Malloc fails for SOLVEstruct1");	        /* Copy the same stuff */	        SOLVEstruct1->row_to_proc = SOLVEstruct->row_to_proc;	        SOLVEstruct1->inv_perm_c = SOLVEstruct->inv_perm_c;	        SOLVEstruct1->num_diag_procs = SOLVEstruct->num_diag_procs;	        SOLVEstruct1->diag_procs = SOLVEstruct->diag_procs;	        SOLVEstruct1->diag_len = SOLVEstruct->diag_len;	        SOLVEstruct1->gsmv_comm = SOLVEstruct->gsmv_comm;	        SOLVEstruct1->A_colind_gsmv = SOLVEstruct->A_colind_gsmv;				/* Initialize the *gstrs_comm for 1 RHS. */		if ( !(SOLVEstruct1->gstrs_comm = (pxgstrs_comm_t *)		       SUPERLU_MALLOC(sizeof(pxgstrs_comm_t))) )		    ABORT("Malloc fails for gstrs_comm[]");		pxgstrs_init(n, m_loc, 1, fst_row, perm_r, perm_c, grid, 			     Glu_persist, SOLVEstruct1);	    }	    pzgsrfs(n, A, anorm, LUstruct, ScalePermstruct, grid,		    B, ldb, X, ldx, nrhs, SOLVEstruct1, berr, stat, info);            /* Deallocate the storage associated with SOLVEstruct1 */	    if ( nrhs > 1 ) {	        pxgstrs_finalize(SOLVEstruct1->gstrs_comm);	        SUPERLU_FREE(SOLVEstruct1);	    }	    stat->utime[REFINE] = SuperLU_timer_() - t;	}	/* Permute the solution matrix B <= Pc'*X. */	pzPermute_Dense_Matrix(fst_row, m_loc, SOLVEstruct->row_to_proc,			       SOLVEstruct->inv_perm_c,			       X, ldx, B, ldb, nrhs, grid);#if ( DEBUGlevel>=2 )	printf("\n (%d) .. After pzPermute_Dense_Matrix(): b =\n", iam);	for (i = 0; i < m_loc; ++i)	  printf("\t(%d)\t%4d\t%.10f\n", iam, i+fst_row, B[i]);#endif		/* Transform the solution matrix X to a solution of the original	   system before the equilibration. */	if ( notran ) {	    if ( colequ ) {		b_col = B;		for (j = 0; j < nrhs; ++j) {		    irow = fst_row;		    for (i = 0; i < m_loc; ++i) {                        zd_mult(&b_col[i], &b_col[i], C[irow]);		        ++irow;		    }		    b_col += ldb;		}	    }	} else if ( rowequ ) {	    b_col = B;	    for (j = 0; j < nrhs; ++j) {	        irow = fst_row;		for (i = 0; i < m_loc; ++i) {		    zd_mult(&b_col[i], &b_col[i], R[irow]);		    ++irow;		}		b_col += ldb;	    }	}	SUPERLU_FREE(b_work);	SUPERLU_FREE(X);    } /* end if nrhs != 0 */#if ( PRNTlevel>=1 )    if ( !iam ) printf(".. DiagScale = %d\n", ScalePermstruct->DiagScale);#endif    /* Deallocate R and/or C if it was not used. */    if ( Equil && Fact != SamePattern_SameRowPerm ) {	switch ( ScalePermstruct->DiagScale ) {	    case NOEQUIL:	        SUPERLU_FREE(R);		SUPERLU_FREE(C);		break;	    case ROW: 		SUPERLU_FREE(C);		break;	    case COL: 		SUPERLU_FREE(R);		break;	}    }    if ( !factored && Fact != SamePattern_SameRowPerm && !parSymbFact) 	Destroy_CompCol_Permuted_dist(&GAC);#if ( DEBUGlevel>=1 )    CHECK_MALLOC(iam, "Exit pzgssvx()");#endif}
pzgssvx.c - 源码说明

本页面展示了「SuperLU 2.2版本。对大型、稀疏、非对称的线性系统的直接求解」中的 pzgssvx.c 源码文件，采用 C语言编程语言编写，共 1,281 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与SuperLU相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?