📄 util.c
字号:
/* * -- Distributed SuperLU routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley. * February 1, 2003 * */#include <math.h>#include "superlu_ddefs.h"/* Deallocate the structure pointing to the actual storage of the matrix. */voidDestroy_SuperMatrix_Store_dist(SuperMatrix *A){ SUPERLU_FREE ( A->Store );}voidDestroy_CompCol_Matrix_dist(SuperMatrix *A){ NCformat *Astore = A->Store; SUPERLU_FREE( Astore->rowind ); SUPERLU_FREE( Astore->colptr ); if ( Astore->nzval ) SUPERLU_FREE( Astore->nzval ); SUPERLU_FREE( Astore );}voidDestroy_CompRowLoc_Matrix_dist(SuperMatrix *A){ NRformat_loc *Astore = A->Store; SUPERLU_FREE( Astore->rowptr ); SUPERLU_FREE( Astore->colind ); SUPERLU_FREE( Astore->nzval ); SUPERLU_FREE( Astore );}voidDestroy_CompRow_Matrix_dist(SuperMatrix *A){ SUPERLU_FREE( ((NRformat *)A->Store)->rowptr ); SUPERLU_FREE( ((NRformat *)A->Store)->colind ); SUPERLU_FREE( ((NRformat *)A->Store)->nzval ); SUPERLU_FREE( A->Store );}voidDestroy_SuperNode_Matrix_dist(SuperMatrix *A){ SUPERLU_FREE ( ((SCformat *)A->Store)->rowind ); SUPERLU_FREE ( ((SCformat *)A->Store)->rowind_colptr ); SUPERLU_FREE ( ((SCformat *)A->Store)->nzval ); SUPERLU_FREE ( ((SCformat *)A->Store)->nzval_colptr ); SUPERLU_FREE ( ((SCformat *)A->Store)->col_to_sup ); SUPERLU_FREE ( ((SCformat *)A->Store)->sup_to_col ); SUPERLU_FREE ( A->Store );}/* A is of type Stype==NCP */voidDestroy_CompCol_Permuted_dist(SuperMatrix *A){ SUPERLU_FREE ( ((NCPformat *)A->Store)->colbeg ); SUPERLU_FREE ( ((NCPformat *)A->Store)->colend ); SUPERLU_FREE ( A->Store );}/* A is of type Stype==DN */voidDestroy_Dense_Matrix_dist(SuperMatrix *A){ DNformat* Astore = A->Store; SUPERLU_FREE (Astore->nzval); SUPERLU_FREE ( A->Store );}/* Destroy distributed L & U matrices. */voidDestroy_LU(int_t n, gridinfo_t *grid, LUstruct_t *LUstruct){ int_t i, nb, nsupers; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu;#if ( DEBUGlevel>=1 ) int iam; MPI_Comm_rank( MPI_COMM_WORLD, &iam ); CHECK_MALLOC(iam, "Enter Destroy_LU()");#endif nsupers = Glu_persist->supno[n-1] + 1; nb = CEILING(nsupers, grid->npcol); for (i = 0; i < nb; ++i) if ( Llu->Lrowind_bc_ptr[i] ) { SUPERLU_FREE (Llu->Lrowind_bc_ptr[i]); SUPERLU_FREE (Llu->Lnzval_bc_ptr[i]); } SUPERLU_FREE (Llu->Lrowind_bc_ptr); SUPERLU_FREE (Llu->Lnzval_bc_ptr); nb = CEILING(nsupers, grid->nprow); for (i = 0; i < nb; ++i) if ( Llu->Ufstnz_br_ptr[i] ) { SUPERLU_FREE (Llu->Ufstnz_br_ptr[i]); SUPERLU_FREE (Llu->Unzval_br_ptr[i]); } SUPERLU_FREE (Llu->Ufstnz_br_ptr); SUPERLU_FREE (Llu->Unzval_br_ptr); /* The following can be freed after factorization. */ SUPERLU_FREE(Llu->ToRecv); SUPERLU_FREE(Llu->ToSendD); SUPERLU_FREE(Llu->ToSendR[0]); SUPERLU_FREE(Llu->ToSendR); /* The following can be freed only after iterative refinement. */ SUPERLU_FREE(Llu->ilsum); SUPERLU_FREE(Llu->fmod); SUPERLU_FREE(Llu->fsendx_plist[0]); SUPERLU_FREE(Llu->fsendx_plist); SUPERLU_FREE(Llu->bmod); SUPERLU_FREE(Llu->bsendx_plist[0]); SUPERLU_FREE(Llu->bsendx_plist); SUPERLU_FREE(Glu_persist->xsup); SUPERLU_FREE(Glu_persist->supno);#if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit Destroy_LU()");#endif}/* Allocate storage in ScalePermstruct */void ScalePermstructInit(const int_t m, const int_t n, ScalePermstruct_t *ScalePermstruct){ ScalePermstruct->DiagScale = NOEQUIL; if ( !(ScalePermstruct->perm_r = intMalloc_dist(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(ScalePermstruct->perm_c = intMalloc_dist(n)) ) ABORT("Malloc fails for perm_c[].");}/* Deallocate ScalePermstruct */void ScalePermstructFree(ScalePermstruct_t *ScalePermstruct){ SUPERLU_FREE(ScalePermstruct->perm_r); SUPERLU_FREE(ScalePermstruct->perm_c); switch ( ScalePermstruct->DiagScale ) { case ROW: SUPERLU_FREE(ScalePermstruct->R); break; case COL: SUPERLU_FREE(ScalePermstruct->C); break; case BOTH: SUPERLU_FREE(ScalePermstruct->R); SUPERLU_FREE(ScalePermstruct->C); break; }}/* Allocate storage in LUstruct */void LUstructInit(const int_t m, const int_t n, LUstruct_t *LUstruct){ if ( !(LUstruct->etree = intMalloc_dist(n)) ) ABORT("Malloc fails for etree[]."); if ( !(LUstruct->Glu_persist = (Glu_persist_t *) SUPERLU_MALLOC(sizeof(Glu_persist_t))) ) ABORT("Malloc fails for Glu_persist_t."); if ( !(LUstruct->Llu = (LocalLU_t *) SUPERLU_MALLOC(sizeof(LocalLU_t))) ) ABORT("Malloc fails for LocalLU_t.");}/* Deallocate LUstruct */void LUstructFree(LUstruct_t *LUstruct){ SUPERLU_FREE(LUstruct->etree); SUPERLU_FREE(LUstruct->Glu_persist); SUPERLU_FREE(LUstruct->Llu);}/* * Count the total number of nonzeros in factors L and U, and in the * symmetrically reduced L. */voidcountnz_dist(const int_t n, int_t *xprune, int_t *nnzL, int_t *nnzU, Glu_persist_t *Glu_persist, Glu_freeable_t *Glu_freeable){ int_t fnz, fsupc, i, j, nsuper; int_t nnzL0, jlen, irep; int_t *supno, *xsup, *xlsub, *xusub, *usub; supno = Glu_persist->supno; xsup = Glu_persist->xsup; xlsub = Glu_freeable->xlsub; xusub = Glu_freeable->xusub; usub = Glu_freeable->usub; *nnzL = 0; *nnzU = 0; nnzL0 = 0; nsuper = supno[n]; if ( n <= 0 ) return; /* * For each supernode in L. */ for (i = 0; i <= nsuper; i++) { fsupc = xsup[i]; jlen = xlsub[fsupc+1] - xlsub[fsupc]; for (j = fsupc; j < xsup[i+1]; j++) { *nnzL += jlen; *nnzU += j - fsupc + 1; jlen--; } irep = xsup[i+1] - 1; nnzL0 += xprune[irep] - xlsub[irep]; } /* printf("\tNo of nonzeros in symm-reduced L = %d\n", nnzL0);*/ /* For each column in U. */ for (j = 0; j < n; ++j) { for (i = xusub[j]; i < xusub[j+1]; ++i) { fnz = usub[i]; fsupc = xsup[supno[fnz]+1]; *nnzU += fsupc - fnz; } }}/* * Fix up the data storage lsub for L-subscripts. It removes the subscript * sets for structural pruning, and applies permuation to the remaining * subscripts. */int_tfixupL_dist(const int_t n, const int_t *perm_r, Glu_persist_t *Glu_persist, Glu_freeable_t *Glu_freeable){ register int_t nsuper, fsupc, nextl, i, j, k, jstrt, lsub_size; int_t *xsup, *lsub, *xlsub; if ( n <= 1 ) return 0; xsup = Glu_persist->xsup; lsub = Glu_freeable->lsub; xlsub = Glu_freeable->xlsub; nextl = 0; nsuper = (Glu_persist->supno)[n]; lsub_size = xlsub[n]; /* * For each supernode ... */ for (i = 0; i <= nsuper; i++) { fsupc = xsup[i]; jstrt = xlsub[fsupc]; xlsub[fsupc] = nextl; for (j = jstrt; j < xlsub[fsupc+1]; j++) { lsub[nextl] = perm_r[lsub[j]]; /* Now indexed into P*A */ nextl++; } for (k = fsupc+1; k < xsup[i+1]; k++) xlsub[k] = nextl; /* Other columns in supernode i */ } xlsub[n] = nextl; return lsub_size;}/* * Set the default values for the options argument. */void set_default_options_dist(superlu_options_t *options){ options->Fact = DOFACT; options->Equil = YES; options->ParSymbFact = NO; options->ColPerm = MMD_AT_PLUS_A; options->RowPerm = LargeDiag; options->ReplaceTinyPivot = YES; options->IterRefine = DOUBLE; options->Trans = NOTRANS; options->SolveInitialized = NO; options->RefineInitialized = NO; options->PrintStat = YES;}/* * Print the options setting. */void print_options_dist(superlu_options_t *options){ printf(".. options:\n"); printf("\tFact\t %8d\n", options->Fact); printf("\tEquil\t %8d\n", options->Equil); printf("\tColPerm\t %8d\n", options->ColPerm); printf("\tRowPerm\t %8d\n", options->RowPerm); printf("\tReplaceTinyPivot %4d\n", options->ReplaceTinyPivot); printf("\tTrans\t %8d\n", options->Trans); printf("\tIterRefine\t%4d\n", options->IterRefine); printf("..\n");}int_tpxgstrs_init(int_t n, int_t m_loc, int_t nrhs, int_t fst_row, int_t perm_r[], int_t perm_c[], gridinfo_t *grid, Glu_persist_t *Glu_persist, SOLVEstruct_t *SOLVEstruct){/* * Purpose * ======= * Set up the communication pattern for the triangular solution. * * Arguments * ========= * * n (input) int (global) * The dimension of the linear system. * * m_loc (input) int (local) * The local row dimension of the distributed input matrix. * * nrhs (input) int (global) * Number of right-hand sides. * * fst_row (input) int (global) * The row number of matrix B's first row in the global matrix. * * perm_r (input) int* (global) * The row permutation vector. * * perm_c (input) int* (global) * The column permutation vector. * * grid (input) gridinfo_t* * The 2D process mesh. * */ int *SendCnt, *SendCnt_nrhs, *RecvCnt, *RecvCnt_nrhs; int *sdispls, *sdispls_nrhs, *rdispls, *rdispls_nrhs; int *itemp, *ptr_to_ibuf, *ptr_to_dbuf; int_t *row_to_proc; int_t i, gbi, k, l, num_diag_procs, *diag_procs; int_t irow, lk, q, knsupc, nsupers, *xsup, *supno; int iam, p, pkk, procs; pxgstrs_comm_t *gstrs_comm; procs = grid->nprow * grid->npcol; iam = grid->iam; gstrs_comm = SOLVEstruct->gstrs_comm; xsup = Glu_persist->xsup; supno = Glu_persist->supno; nsupers = Glu_persist->supno[n-1] + 1; row_to_proc = SOLVEstruct->row_to_proc; /* ------------------------------------------------------------ SET UP COMMUNICATION PATTERN FOR ReDistribute_B_to_X. ------------------------------------------------------------*/ if ( !(itemp = SUPERLU_MALLOC(8*procs * sizeof(int))) ) ABORT("Malloc fails for B_to_X_itemp[]."); SendCnt = itemp; SendCnt_nrhs = itemp + procs; RecvCnt = itemp + 2*procs; RecvCnt_nrhs = itemp + 3*procs; sdispls = itemp + 4*procs; sdispls_nrhs = itemp + 5*procs; rdispls = itemp + 6*procs; rdispls_nrhs = itemp + 7*procs; /* Count the number of elements to be sent to each diagonal process.*/ for (p = 0; p < procs; ++p) SendCnt[p] = 0; for (i = 0, l = fst_row; i < m_loc; ++i, ++l) { irow = perm_c[perm_r[l]]; /* Row number in Pc*Pr*B */ gbi = BlockNum( irow );
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -