⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 psymbfact.c

📁 SuperLU 2.2版本。对大型、稀疏、非对称的线性系统的直接求解
💻 C
📖 第 1 页 / 共 5 页
字号:
/* * -- Parallel symbolic factorization routine  (version 2.2) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley - July 2003 * INRIA France - January 2004 * Laura Grigori * * November 1, 2007 * Feburary 20, 2008 *//*  * The function symbfact_dist implements the parallel symbolic factorization * algorithm described in the paper: * * Parallel Symbolic Factorization for Sparse LU with Static Pivoting, * Laura Grigori, James W. Demmel and Xiaoye S. Li, * Pages 1289-1314, SIAM Journal on Scientific Computing, Volume 29, Issue 3. * *//* limits.h:  the largest positive integer (INT_MAX) */#include <limits.h>#include <math.h>#include "superlu_ddefs.h"#include "psymbfact.h"/* * Internal protypes */static int_t *intMalloc_symbfact(int_t );static int_t *intCalloc_symbfact(int_t );static int_tinitParmsAndStats(psymbfact_stat_t *PS);static voidestimate_memUsage(int_t, int, mem_usage_t *, float *, float *, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, comm_symbfact_t *, psymbfact_stat_t *);static voidsymbfact_free (int, int, Llu_symbfact_t *, vtcsInfo_symbfact_t *, comm_symbfact_t *);static int_tdenseSep_symbfact (int , int_t, int, int, int, int_t *, int_t *, int,  int,  int, int_t, int_t, int_t *, int_t *, int_t *, int_t *, int_t *, MPI_Comm, MPI_Comm *, Llu_symbfact_t *, Pslu_freeable_t *_freeable, vtcsInfo_symbfact_t *,  comm_symbfact_t *, psymbfact_stat_t * );static int_tdnsUpSeps_symbfact(int_t, int, int, int, int, int_t *, int_t *, int_t, Llu_symbfact_t *, Pslu_freeable_t *, vtcsInfo_symbfact_t *,  comm_symbfact_t *, psymbfact_stat_t *, int_t *, int_t *, int_t *);static voidintraLvl_symbfact (SuperMatrix *, int, int, int, int, int, int_t *, int_t *, int,  int, int_t, int_t,  Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *,  comm_symbfact_t *, psymbfact_stat_t *, int_t *, int_t *, int_t *, int_t *,  int_t *, int_t *, int_t *, MPI_Comm, MPI_Comm *);static voidinitLvl_symbfact(int_t, int, int_t, int_t, Pslu_freeable_t *,  Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *, MPI_Comm,  int_t *, int_t, int_t);static voidcreateComm (int, int, MPI_Comm *, MPI_Comm *);static voidfreeComm (int, int, MPI_Comm *, MPI_Comm *);static voiddomain_symbfact(SuperMatrix *, int, int, int,  int, int, int_t *, int_t *, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, comm_symbfact_t *, psymbfact_stat_t *, int_t *, int_t *, int_t *, int_t *,  int_t *, int_t *, int_t *);static floatallocPrune_domain(int_t, int_t, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *);static floatallocPrune_lvl(Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *);static intsymbfact_alloc(int_t, int, Pslu_freeable_t *, Llu_symbfact_t *,  vtcsInfo_symbfact_t *, comm_symbfact_t *, psymbfact_stat_t *);static float symbfact_mapVtcs(int, int, int, SuperMatrix *, int_t *, int_t *,  Pslu_freeable_t *, vtcsInfo_symbfact_t *, int_t *, int_t, psymbfact_stat_t *);static void symbfact_distributeMatrix (int, int, int, SuperMatrix *, int_t *, int_t *, matrix_symbfact_t *,  Pslu_freeable_t *, vtcsInfo_symbfact_t *, int_t *, MPI_Comm *);static int_tinterLvl_symbfact(SuperMatrix *, int, int, int, int, int, int, int,  int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, Llu_symbfact_t *, Pslu_freeable_t*, comm_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *, MPI_Comm, MPI_Comm *);static floatcntsVtcs (int_t, int, int, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *,  int_t *, int_t *, int_t *, psymbfact_stat_t *, MPI_Comm *);/************************************************************************/float symbfact_dist/************************************************************************/( int         nprocs_num,  /* Input - no of processors */ int         nprocs_symb, /* Input - no of processors for the symbolic			     factorization */ SuperMatrix *A,          /* Input - distributed input matrix */ int_t       *perm_c,     /* Input - column permutation */ int_t       *perm_r,     /* Input - row permutation */ int_t       *sizes,      /* Input - sizes of each node in the separator tree */ int_t       *fstVtxSep,  /* Input - first vertex of each node in the tree */ Pslu_freeable_t *Pslu_freeable, /* Output - local L and U structure, 				    global to local indexing information */ MPI_Comm    *num_comm,   /* Input - communicator for numerical factorization */ MPI_Comm    *symb_comm,  /* Input - communicator for symbolic factorization */ mem_usage_t *symb_mem_usage ){/*  * Purpose * ======= *   symbfact_dist() performs symbolic factorization of matrix A suitable *   for performing the supernodal Gaussian elimination with no pivoting (GEPP).  *   This routine computes the structure of one column of L and one row of U  *   at a time.  It uses: *        o distributed input matrix *        o supernodes *        o symmetric structure pruning * * * Arguments * ========= * * nprocs_num (input) int *         Number of processors SuperLU_DIST is executed on, and the input  *         matrix is distributed on. * * nprocs_symb (input) int *         Number of processors on which the symbolic factorization is *         performed.  It is equal to the number of independent domains *         idenfied in the graph partitioning algorithm executed *         previously and has to be a power of 2.  It corresponds to *         number of leaves in the separator tree. * * A       (input) SuperMatrix* *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The *         number of the linear equations is A->nrow.  Matrix A is *         distributed in NRformat_loc format. *         Matrix A is not yet permuted by perm_c. * * perm_c  (input) int_t* *	   Column permutation vector of size A->ncol, which defines the  *         permutation matrix Pc; perm_c[i] = j means column i of A is  *         in position j in A*Pc. * * perm_r  (input) int_t* *	   Row permutation vector of size A->nrow, which defines the  *         permutation matrix Pr; perm_r[i] = j means column i of A is  *         in position j in Pr*A. * * sizes   (input) int_t* *         Contains the number of vertices in each separator. * * fstVtxSep (input) int_t* *         Contains first vertex for each separator. * * Pslu_freeable (output) Pslu_freeable_t* *         Returns the local L and U structure, and global to local *         information on the indexing of the vertices.  Contains all *         the information necessary for performing the data *         distribution towards the numeric factorization. *				     * num_comm (input) MPI_Comm* *         Communicator for numerical factorization  * * symb_comm (input) MPI_Comm* *         Communicator for symbolic factorization  * * symb_mem_usage (input) mem_usage_t * *         Statistics on memory usage. * * Return value * ============ *   < 0, number of bytes allocated on return from the symbolic factorization. *   > 0, number of bytes allocated when out of memory. * * Sketch of the algorithm * ======================= * *  Distrbute the vertices on the processors using a subtree to *  subcube algorithm. * *  Redistribute the structure of the input matrix A according to the *  subtree to subcube computed previously for the symbolic *  factorization routine.  This implies in particular a distribution *  from nprocs_num processors to nprocs_symb processors. * *  Perform symbolic factorization guided by the separator tree provided by *  a graph partitioning algorithm.  The symbolic factorization uses a  *  combined left-looking, right-looking approach.  * */  NRformat_loc *Astore;  int iam, szSep, fstP, lstP, npNode, nlvls, lvl, p, iSep, jSep;  int iinfo; /* return code */  int_t m, n;  int_t nextl, nextu, neltsZr, neltsTotal, nsuper_loc, szLGr, szUGr;  int_t ind_blk, nsuper, vtx, min_mn, nnzL, nnzU, szsn;  float stat_loc[23], stat_glob[23], mem_glob[15];    Llu_symbfact_t Llu_symbfact; /* local L and U and pruned L and U data structures */  vtcsInfo_symbfact_t VInfo; /* local information on number of blocks,				number of vertices in a block etc */  matrix_symbfact_t   AS; /* temporary storage for the input matrix after redistribution */  comm_symbfact_t CS;  /* information on communication */  /* relaxation parameters (for future release) and      statistics collected during the symbolic factorization */  psymbfact_stat_t PS;   /* temp array of size n, used as a marker by the subroutines */  int_t *tempArray;   int_t i, j, k;  int_t fstVtx, lstVtx, mark, fstVtx_lid, vtx_lid, maxNvtcsPProc;  int_t nnz_asup_loc, nnz_ainf_loc, fill_rcmd;  float totalMemLU, overestimMem;  MPI_Comm *commLvls;    /* maximum block size */  int_t  maxSzBlk;  float flinfo;#if ( PRNTlevel >= 1)  float stat_msgs_l[10], stat_msgs_g[10]; #endif  #if ( PROFlevel>=1 )  double t, t_symbFact[3], t_symbFact_loc[3];  double *time_lvlsT, *time_lvls, t1, t2, time_lvlsg[9];#endif    /* Initialization */  MPI_Comm_rank ((*num_comm), &iam);  commLvls = NULL;#if ( DEBUGlevel>=1 )  CHECK_MALLOC(iam, "Enter psymbfact()");#endif  initParmsAndStats (&PS);  if (nprocs_symb != 1) {    if (!(commLvls = (MPI_Comm *) SUPERLU_MALLOC(2*nprocs_symb*sizeof(MPI_Comm)))) {      fprintf (stderr, "Malloc fails for commLvls[].");        return (PS.allocMem);    }    PS.allocMem += 2 * nprocs_symb * sizeof(MPI_Comm);  }    nlvls = (int) LOG2( nprocs_num ) + 1;#if ( PROFlevel>=1 )  time_lvlsT = (double *) SUPERLU_MALLOC(3*nprocs_symb*(nlvls+1) 					 * sizeof(double));  time_lvls  = (double *) SUPERLU_MALLOC(3*(nlvls+1) * sizeof(double));  if (!time_lvls || !time_lvlsT) {    fprintf (stderr, "Malloc fails for time_lvls[].");      return (PS.allocMem);  }  PS.allocMem += (3*nprocs_symb*(nlvls+1) + 3*(nlvls+1)) * sizeof(double);#endif    VInfo.xlsub_nextLvl  = 0;  VInfo.xusub_nextLvl  = 0;  VInfo.maxSzBlk = sp_ienv_dist(3);  maxSzBlk = VInfo.maxSzBlk;    mark = EMPTY;  nsuper_loc = 0;  nextl   = 0; nextu      = 0;  neltsZr = 0; neltsTotal = 0;    m = A->nrow;  n = A->ncol;  min_mn = SUPERLU_MIN( m, n );    if (!(tempArray = intMalloc_symbfact(n))) {    fprintf (stderr, "Malloc fails for tempArray[].\n");      return (PS.allocMem);  }  PS.allocMem += n * sizeof(int_t);  #if ( PROFlevel>=1 )    t = SuperLU_timer_();#endif    /* Distribute vertices on processors */  if ((flinfo =        symbfact_mapVtcs (iam, nprocs_num, nprocs_symb, A, fstVtxSep, sizes, 			 Pslu_freeable, &VInfo, tempArray, maxSzBlk, &PS)) > 0)     return (flinfo);  maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc;    /* Redistribute matrix A on processors following the distribution found     in symbfact_mapVtcs.  Store the redistributed A temporarily into AS */  symbfact_distributeMatrix (iam, nprocs_num, nprocs_symb,  A, 			     perm_c, perm_r, &AS, 			     Pslu_freeable, &VInfo, tempArray, num_comm);    /* THE REST OF THE SYMBOLIC FACTORIZATION IS EXECUTED ONLY BY NPROCS_SYMB     PROCESSORS */  if ( iam < nprocs_symb ) {    #if ( PROFlevel>=1 )    t_symbFact_loc[0] = SuperLU_timer_() - t;    t = SuperLU_timer_();    t_symbFact_loc[1] = t;#endif    /* Allocate storage common to the symbolic factor routines */    if (iinfo = symbfact_alloc (n, nprocs_symb, Pslu_freeable, 				&Llu_symbfact, &VInfo, &CS, &PS))       return (PS.allocMem);      /* Copy the redistributed input matrix AS at the end of the memory buffer       allocated to store L and U.  That is, copy (AS.x_ainf, AS.ind_ainf) in       (xlsub, lsub), (AS.x_asup, AS.ind_asup) in (xusub, usub).  Free the       memory used to store the input matrix */    nnz_ainf_loc = VInfo.nnz_ainf_loc;    nnz_asup_loc = VInfo.nnz_asup_loc;    j = Llu_symbfact.szUsub - VInfo.nnz_asup_loc;    k = Llu_symbfact.szLsub - VInfo.nnz_ainf_loc;    for (i = 0; i <= VInfo.nvtcs_loc; i++) {      Llu_symbfact.xusub[i] = AS.x_asup[i] + j;      Llu_symbfact.xlsub[i] = AS.x_ainf[i] + k;    }        for (i = 0; i < VInfo.nnz_asup_loc; i++, j++)       Llu_symbfact.usub[j] = AS.ind_asup[i];    for (i = 0; i < VInfo.nnz_ainf_loc; i++, k++)         Llu_symbfact.lsub[k] = AS.ind_ainf[i];    SUPERLU_FREE( AS.x_ainf );    SUPERLU_FREE( AS.x_asup );      SUPERLU_FREE( AS.ind_ainf );      SUPERLU_FREE( AS.ind_asup );      if (nprocs_symb != 1) {      createComm (iam, nprocs_symb, commLvls, symb_comm);    #if ( PROFlevel>=1 )      t_symbFact_loc[2] = SuperLU_timer_();#endif      if ((flinfo = cntsVtcs (n, iam, nprocs_symb, Pslu_freeable, &Llu_symbfact, 			      &VInfo, tempArray, fstVtxSep, sizes, &PS, commLvls)) > 0) 	return (flinfo);			     #if ( PROFlevel>=1 )      t_symbFact_loc[2] = SuperLU_timer_() - t_symbFact_loc[2];#endif    }        /* set to EMPTY marker[] array */    for (i = 0; i < n; i++)      tempArray[i] = EMPTY;        szSep = nprocs_symb;    iSep = 0;    lvl = 0;    while (szSep >= 1) {      /* for each level in the separator tree */      npNode = nprocs_symb / szSep;       fstP = 0;       /* for each node in the level */      for (jSep = iSep; jSep < iSep + szSep; jSep++) {	fstVtx = fstVtxSep[jSep];	lstVtx  = fstVtx + sizes[jSep];	/* if this is the first level */	if (szSep == nprocs_symb) {	  /* compute symbolic factorization for my domain */	  if (fstP == iam) {	    /* allocate storage for the pruned structures */#if ( PROFlevel>=1 )	    t1 = SuperLU_timer_();#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -