📄 symbfact.c
字号:
/* * -- Distributed SuperLU routine (version 1.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley. * September 1, 1999 * *//* Copyright (c) 1994 by Xerox Corporation. All rights reserved. THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. Permission is hereby granted to use or copy this program for any purpose, provided the above notices are retained on all copies. Permission to modify the code and to distribute modified code is granted, provided the above notices are retained, and a notice that the code was modified is included with the above copyright notice.*//* * Modified by X. S. Li. */#include "superlu_ddefs.h"/* What type of supernodes we want */#define T2_SUPER/* * Internal protypes */static void relax_snode(int_t, int_t *, int_t, int_t *, int_t *);static int_t snode_dfs(SuperMatrix *, const int_t, const int_t, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *);static int_t column_dfs(SuperMatrix *, const int_t, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *);static int_t pivotL(const int_t, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *);static int_t set_usub(const int_t, const int_t, const int_t, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *);static void pruneL(const int_t, const int_t *, const int_t, const int_t, const int_t *, const int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *);/************************************************************************/int_t symbfact/************************************************************************/( superlu_options_t *options, /* input options */ int pnum, /* process number */ SuperMatrix *A, /* original matrix A permuted by columns (input) */ int_t *perm_c, /* column permutation vector (input) */ int_t *etree, /* column elimination tree (input) */ Glu_persist_t *Glu_persist, /* output */ Glu_freeable_t *Glu_freeable /* output */ ){/* * Purpose * ======= * symbfact() performs a symbolic factorization on matrix A and sets up * the nonzero data structures which are suitable for supernodal Gaussian * elimination with no pivoting (GENP). This routine features: * o depth-first search (DFS) * o supernodes * o symmetric structure pruning * * Return value * ============ * < 0, number of bytes needed for LSUB. * = 0, matrix dimension is 1. * > 0, number of bytes allocated when out of memory. * */ int_t m, n, min_mn, j, i, k, irep, nseg, pivrow, info; int_t *iwork, *perm_r, *segrep, *repfnz; int_t *xprune, *marker, *parent, *xplore; int_t relax, *desc, *relax_end; int_t nnzL, nnzU;#if ( DEBUGlevel>=1 ) CHECK_MALLOC(pnum, "Enter symbfact()");#endif m = A->nrow; n = A->ncol; min_mn = SUPERLU_MIN(m, n); /* Allocate storage common to the symbolic factor routines */ info = symbfact_SubInit(DOFACT, NULL, 0, m, n, ((NCPformat*)A->Store)->nnz, Glu_persist, Glu_freeable); iwork = (int_t *) intMalloc_dist(6*m+2*n); perm_r = iwork; segrep = iwork + m; repfnz = segrep + m; marker = repfnz + m; parent = marker + m; xplore = parent + m; xprune = xplore + m; relax_end = xprune + n; relax = sp_ienv_dist(2); ifill_dist(perm_r, m, EMPTY); ifill_dist(repfnz, m, EMPTY); ifill_dist(marker, m, EMPTY); Glu_persist->supno[0] = -1; Glu_persist->xsup[0] = 0; Glu_freeable->xlsub[0] = 0; Glu_freeable->xusub[0] = 0; /*for (j = 0; j < n; ++j) iperm_c[perm_c[j]] = j;*/ /* Identify relaxed supernodes. */ if ( !(desc = intMalloc_dist(n+1)) ) ABORT("Malloc fails for desc[]");; relax_snode(n, etree, relax, desc, relax_end); SUPERLU_FREE(desc); for (j = 0; j < min_mn; ) { if ( relax_end[j] != EMPTY ) { /* beginning of a relaxed snode */ k = relax_end[j]; /* end of the relaxed snode */ /* Determine union of the row structure of supernode (j:k). */ if ( (info = snode_dfs(A, j, k, xprune, marker, Glu_persist, Glu_freeable)) != 0 ) return info; for (i = j; i <= k; ++i) pivotL(i, perm_r, &pivrow, Glu_persist, Glu_freeable); j = k+1; } else { /* Perform a symbolic factorization on column j, and detects whether column j starts a new supernode. */ if ((info = column_dfs(A, j, perm_r, &nseg, segrep, repfnz, xprune, marker, parent, xplore, Glu_persist, Glu_freeable)) != 0) return info; /* Copy the U-segments to usub[*]. */ if ((info = set_usub(min_mn, j, nseg, segrep, repfnz, Glu_persist, Glu_freeable)) != 0) return info; pivotL(j, perm_r, &pivrow, Glu_persist, Glu_freeable); /* Prune columns [0:j-1] using column j. */ pruneL(j, perm_r, pivrow, nseg, segrep, repfnz, xprune, Glu_persist, Glu_freeable); /* Reset repfnz[*] to prepare for the next column. */ for (i = 0; i < nseg; i++) { irep = segrep[i]; repfnz[irep] = EMPTY; } ++j; } /* else */ } /* for j ... */ countnz_dist(min_mn, xprune, &nnzL, &nnzU, Glu_persist, Glu_freeable); /* Apply perm_r to L; Compress LSUB array. */ i = fixupL_dist(min_mn, perm_r, Glu_persist, Glu_freeable); if ( !pnum && (options->PrintStat == YES)) { printf("\tNonzeros in L %ld\n", nnzL); printf("\tNonzeros in U %ld\n", nnzU); printf("\tnonzeros in L+U %ld\n", nnzL + nnzU - min_mn); printf("\tnonzeros in LSUB %ld\n", i); } SUPERLU_FREE(iwork);#if ( PRNTlevel>=3 ) PrintInt10("lsub", Glu_freeable->xlsub[n], Glu_freeable->lsub); PrintInt10("xlsub", n+1, Glu_freeable->xlsub); PrintInt10("xprune", n, xprune); PrintInt10("usub", Glu_freeable->xusub[n], Glu_freeable->usub); PrintInt10("xusub", n+1, Glu_freeable->xusub); PrintInt10("supno", n, Glu_persist->supno); PrintInt10("xsup", (Glu_persist->supno[n])+2, Glu_persist->xsup);#endif#if ( DEBUGlevel>=1 ) CHECK_MALLOC(pnum, "Exit symbfact()");#endif return (-i);} /* SYMBFACT *//************************************************************************/static void relax_snode/************************************************************************/( const int_t n, /* number of columns in the matrix (input) */ int_t *et, /* column elimination tree (input) */ const int_t relax, /* max no of columns allowed in a relaxed snode (input) */ int_t *desc, /* number of descendants of each etree node. */ int_t *relax_end /* last column in a supernode (output) */ ){/* * Purpose * ======= * relax_snode() identifies the initial relaxed supernodes, assuming that * the matrix has been reordered according to an postorder of the etree. * */ register int_t j, parent, nsuper; register int_t fsupc; /* beginning of a snode */ ifill_dist(relax_end, n, EMPTY); ifill_dist(desc, n+1, 0); nsuper = 0; /* Compute the number of descendants of each node in the etree. */ for (j = 0; j < n; j++) { parent = et[j]; if ( parent != n ) /* not the dummy root */ desc[parent] += desc[j] + 1; } /* Identify the relaxed supernodes by postorder traversal of the etree. */ for (j = 0; j < n; ) { parent = et[j]; fsupc = j; while ( parent != n && desc[parent] < relax ) { j = parent; parent = et[j]; } /* Found a supernode with j being the last column. */ relax_end[fsupc] = j; /* Last column is recorded. */ ++nsuper; ++j; /* Search for a new leaf. */ while ( desc[j] != 0 && j < n ) ++j; }#if ( DEBUGlevel>=1 ) printf(".. No of relaxed snodes: %d\trelax: %d\n", nsuper, relax);#endif} /* RELAX_SNODE *//************************************************************************/static int_t snode_dfs/************************************************************************/( SuperMatrix *A, /* original matrix A permuted by columns (input) */ const int_t jcol, /* beginning of the supernode (input) */ const int_t kcol, /* end of the supernode (input) */ int_t *xprune, /* pruned location in each adjacency list (output) */ int_t *marker, /* working array of size m */ Glu_persist_t *Glu_persist, /* global LU data structures (modified) */ Glu_freeable_t *Glu_freeable ){/* * Purpose * ======= * snode_dfs() determines the union of the row structures of those * columns within the relaxed snode. * Note: The relaxed snodes are leaves of the supernodal etree, therefore, * the part outside the rectangular supernode must be zero. * * Return value * ============ * 0 success; * >0 number of bytes allocated when run out of memory. * */ NCPformat *Astore; int_t *asub, *xa_begin, *xa_end; register int_t i, k, ifrom, ito, nextl, new_next; int_t nsuper, krow, kmark, mem_error; int_t *xsup, *supno; int_t *lsub, *xlsub; int_t nzlmax, nextu; Astore = A->Store; asub = Astore->rowind; xa_begin = Astore->colbeg; xa_end = Astore->colend; xsup = Glu_persist->xsup; supno = Glu_persist->supno; lsub = Glu_freeable->lsub; xlsub = Glu_freeable->xlsub; nzlmax = Glu_freeable->nzlmax; nsuper = ++supno[jcol]; /* Next available supernode number */ nextl = xlsub[jcol]; nextu = Glu_freeable->xusub[jcol]; for (i = jcol; i <= kcol; i++) { /* For each nonzero in A[*,i] */ for (k = xa_begin[i]; k < xa_end[i]; ++k) { krow = asub[k]; kmark = marker[krow]; if ( kmark != kcol ) { /* First time visit krow */ marker[krow] = kcol; lsub[nextl++] = krow; if ( nextl >= nzlmax ) { if (mem_error = symbfact_SubXpand(A->ncol, jcol, nextl, LSUB, &nzlmax, Glu_freeable)) return (mem_error); lsub = Glu_freeable->lsub; } } } supno[i] = nsuper; Glu_freeable->xusub[i+1] = nextu; /* Tidy up the pointers in usub[*]. */ } /* Supernode > 1, then make a copy of the subscripts for pruning */ if ( jcol < kcol ) { new_next = nextl + (nextl - xlsub[jcol]); while ( new_next > nzlmax ) { if (mem_error = symbfact_SubXpand(A->ncol, jcol, nextl, LSUB, &nzlmax, Glu_freeable)) return (mem_error); lsub = Glu_freeable->lsub; } ito = nextl; for (ifrom = xlsub[jcol]; ifrom < nextl; ) lsub[ito++] = lsub[ifrom++]; for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl; nextl = ito; } xsup[nsuper+1] = kcol + 1; supno[kcol+1] = nsuper; xprune[kcol] = nextl; xlsub[kcol+1] = nextl;#if ( PRNTlevel>=3 ) printf(".. snode_dfs(): (%8d:%8d) nextl %d\n", jcol, kcol, nextl);#endif return 0;} /* SNODE_DFS *//************************************************************************/static int_t column_dfs/************************************************************************/( SuperMatrix *A, /* original matrix A permuted by columns (input) */ const int_t jcol, /* current column number (input) */ int_t *perm_r, /* row permutation vector (input) */ int_t *nseg, /* number of U-segments in column jcol (output) */ int_t *segrep, /* list of U-segment representatives (output) */ int_t *repfnz, /* list of first nonzeros in the U-segments (output) */ int_t *xprune, /* pruned location in each adjacency list (output) */ int_t *marker, /* working array of size m */ int_t *parent, /* working array of size m */ int_t *xplore, /* working array of size m */ Glu_persist_t *Glu_persist, /* global LU data structures (modified) */ Glu_freeable_t *Glu_freeable ){/* * Purpose * ======= * column_dfs() performs a symbolic factorization on column jcol, and * detects the supernode boundary. This routine uses the row indices of * A[*,jcol] to start the depth-first search (DFS). * * Output * ====== * A supernode representative is the last column of a supernode. * The nonzeros in U[*,j] are segments that end at supernodal * representatives. The routine returns a list of such supernodal * representatives ( segrep[*] ) in topological order of the DFS that * generates them. The location of the first nonzero in each such * supernodal segment is also returned ( repfnz[*] ). * * Data structure * ============== * (lsub, xlsub): * lsub[*] contains the compressed subscripts of the supernodes; * xlsub[j] points to the starting location of the j-th column in * lsub[*]; * Storage: original row subscripts in A. * * During the course of symbolic factorization, we also use * (lsub, xlsub, xprune) for the purpose of symmetric pruning. * For each supernode {s,s+1,...,t=s+r} with first column s and last * column t, there are two subscript sets, the last column * structures (for pruning) will be removed in the end. * o lsub[j], j = xlsub[s], ..., xlsub[s+1]-1 * is the structure of column s (i.e. structure of this supernode). * It is used for the storage of numerical values. * o lsub[j], j = xlsub[t], ..., xlsub[t+1]-1 * is the structure of the last column t of this supernode. * It is for the purpose of symmetric pruning. Therefore, the * structural subscripts can be rearranged without making physical * interchanges among the numerical values. * * (1) if t > s, only the subscript sets for column s and column t * are stored. Column t represents pruned adjacency structure. * * -------------------------------------------- * lsub[*] ... | col s | col t | ... * -------------------------------------------- * ^ ^ ^ * xlsub[s] xlsub[s+1] xlsub[t+1] * : : * : xprune[t] * xlsub[t] * xprune[s] * * (2) if t == s, i.e., a singleton supernode, the same subscript set * is used for both G(L) and pruned graph: * * -------------------------------------- * lsub[*] ... | s | ... * -------------------------------------- * ^ ^ * xlsub[s] xlsub[s+1] * xprune[s] * * DFS will traverse the second subscript list, i.e., the part of the * pruned graph. * * Local parameters * ================ * nseg: no of segments in current U[*,j] * jsuper: jsuper=EMPTY if column j does not belong to the same * supernode as j-1. Otherwise, jsuper=nsuper. *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -