📄 zmemory.c
字号:
/* * -- SuperLU routine (version 3.0) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, * and Lawrence Berkeley National Lab. * October 15, 2003 * */#include "slu_zdefs.h"/* Constants */#define NO_MEMTYPE 4 /* 0: lusup; 1: ucol; 2: lsub; 3: usub */#define GluIntArray(n) (5 * (n) + 5)/* Internal prototypes */void *zexpand (int *, MemType,int, int, GlobalLU_t *);int zLUWorkInit (int, int, int, int **, doublecomplex **, LU_space_t);void copy_mem_doublecomplex (int, void *, void *);void zStackCompress (GlobalLU_t *);void zSetupSpace (void *, int, LU_space_t *);void *zuser_malloc (int, int);void zuser_free (int, int);/* External prototypes (in memory.c - prec-indep) */extern void copy_mem_int (int, void *, void *);extern void user_bcopy (char *, char *, int);/* Headers for 4 types of dynamatically managed memory */typedef struct e_node { int size; /* length of the memory that has been used */ void *mem; /* pointer to the new malloc'd store */} ExpHeader;typedef struct { int size; int used; int top1; /* grow upward, relative to &array[0] */ int top2; /* grow downward */ void *array;} LU_stack_t;/* Variables local to this file */static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */static LU_stack_t stack;static int no_expand;/* Macros to manipulate stack */#define StackFull(x) ( x + stack.used >= stack.size )#define NotDoubleAlign(addr) ( (long int)addr & 7 )#define DoubleAlign(addr) ( ((long int)addr + 7) & ~7L )#define TempSpace(m, w) ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \ (w + 1) * m * sizeof(doublecomplex) )#define Reduce(alpha) ((alpha + 1) / 2) /* i.e. (alpha-1)/2 + 1 *//* * Setup the memory model to be used for factorization. * lwork = 0: use system malloc; * lwork > 0: use user-supplied work[] space. */void zSetupSpace(void *work, int lwork, LU_space_t *MemModel){ if ( lwork == 0 ) { *MemModel = SYSTEM; /* malloc/free */ } else if ( lwork > 0 ) { *MemModel = USER; /* user provided space */ stack.used = 0; stack.top1 = 0; stack.top2 = (lwork/4)*4; /* must be word addressable */ stack.size = stack.top2; stack.array = (void *) work; }}void *zuser_malloc(int bytes, int which_end){ void *buf; if ( StackFull(bytes) ) return (NULL); if ( which_end == HEAD ) { buf = (char*) stack.array + stack.top1; stack.top1 += bytes; } else { stack.top2 -= bytes; buf = (char*) stack.array + stack.top2; } stack.used += bytes; return buf;}void zuser_free(int bytes, int which_end){ if ( which_end == HEAD ) { stack.top1 -= bytes; } else { stack.top2 += bytes; } stack.used -= bytes;}/* * mem_usage consists of the following fields: * - for_lu (float) * The amount of space used in bytes for the L\U data structures. * - total_needed (float) * The amount of space needed in bytes to perform factorization. * - expansions (int) * Number of memory expansions during the LU factorization. */int zQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage){ SCformat *Lstore; NCformat *Ustore; register int n, iword, dword, panel_size = sp_ienv(1); Lstore = L->Store; Ustore = U->Store; n = L->ncol; iword = sizeof(int); dword = sizeof(doublecomplex); /* For LU factors */ mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] * dword + Lstore->rowind_colptr[n] * iword ); mem_usage->for_lu += (float)( (n + 1) * iword + Ustore->colptr[n] * (dword + iword) ); /* Working storage to support factorization */ mem_usage->total_needed = mem_usage->for_lu + (float)( (2 * panel_size + 4 + NO_MARKER) * n * iword + (panel_size + 1) * n * dword ); mem_usage->expansions = --no_expand; return 0;} /* zQuerySpace *//* * Allocate storage for the data structures common to all factor routines. * For those unpredictable size, make a guess as FILL * nnz(A). * Return value: * If lwork = -1, return the estimated amount of space required, plus n; * otherwise, return the amount of space actually allocated when * memory allocation failure occurred. */intzLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz, int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu, int **iwork, doublecomplex **dwork){ int info, iword, dword; SCformat *Lstore; NCformat *Ustore; int *xsup, *supno; int *lsub, *xlsub; doublecomplex *lusup; int *xlusup; doublecomplex *ucol; int *usub, *xusub; int nzlmax, nzumax, nzlumax; int FILL = sp_ienv(6); Glu->n = n; no_expand = 0; iword = sizeof(int); dword = sizeof(doublecomplex); if ( !expanders ) expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader)); if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders"); if ( fact != SamePattern_SameRowPerm ) { /* Guess for L\U factors */ nzumax = nzlumax = FILL * annz; nzlmax = SUPERLU_MAX(1, FILL/4.) * annz; if ( lwork == -1 ) { return ( GluIntArray(n) * iword + TempSpace(m, panel_size) + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n ); } else { zSetupSpace(work, lwork, &Glu->MemModel); } #if ( PRNTlevel >= 1 ) printf("zLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n", FILL, nzlmax, nzumax); fflush(stdout);#endif /* Integer pointers for L\U factors */ if ( Glu->MemModel == SYSTEM ) { xsup = intMalloc(n+1); supno = intMalloc(n+1); xlsub = intMalloc(n+1); xlusup = intMalloc(n+1); xusub = intMalloc(n+1); } else { xsup = (int *)zuser_malloc((n+1) * iword, HEAD); supno = (int *)zuser_malloc((n+1) * iword, HEAD); xlsub = (int *)zuser_malloc((n+1) * iword, HEAD); xlusup = (int *)zuser_malloc((n+1) * iword, HEAD); xusub = (int *)zuser_malloc((n+1) * iword, HEAD); } lusup = (doublecomplex *) zexpand( &nzlumax, LUSUP, 0, 0, Glu ); ucol = (doublecomplex *) zexpand( &nzumax, UCOL, 0, 0, Glu ); lsub = (int *) zexpand( &nzlmax, LSUB, 0, 0, Glu ); usub = (int *) zexpand( &nzumax, USUB, 0, 1, Glu ); while ( !lusup || !ucol || !lsub || !usub ) { if ( Glu->MemModel == SYSTEM ) { SUPERLU_FREE(lusup); SUPERLU_FREE(ucol); SUPERLU_FREE(lsub); SUPERLU_FREE(usub); } else { zuser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD); } nzlumax /= 2; nzumax /= 2; nzlmax /= 2; if ( nzlumax < annz ) { printf("Not enough memory to perform factorization.\n"); return (zmemory_usage(nzlmax, nzumax, nzlumax, n) + n); }#if ( PRNTlevel >= 1) printf("zLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", nzlmax, nzumax); fflush(stdout);#endif lusup = (doublecomplex *) zexpand( &nzlumax, LUSUP, 0, 0, Glu ); ucol = (doublecomplex *) zexpand( &nzumax, UCOL, 0, 0, Glu ); lsub = (int *) zexpand( &nzlmax, LSUB, 0, 0, Glu ); usub = (int *) zexpand( &nzumax, USUB, 0, 1, Glu ); } } else { /* fact == SamePattern_SameRowPerm */ Lstore = L->Store; Ustore = U->Store; xsup = Lstore->sup_to_col; supno = Lstore->col_to_sup; xlsub = Lstore->rowind_colptr; xlusup = Lstore->nzval_colptr; xusub = Ustore->colptr; nzlmax = Glu->nzlmax; /* max from previous factorization */ nzumax = Glu->nzumax; nzlumax = Glu->nzlumax; if ( lwork == -1 ) { return ( GluIntArray(n) * iword + TempSpace(m, panel_size) + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n ); } else if ( lwork == 0 ) { Glu->MemModel = SYSTEM; } else { Glu->MemModel = USER; stack.top2 = (lwork/4)*4; /* must be word-addressable */ stack.size = stack.top2; } lsub = expanders[LSUB].mem = Lstore->rowind; lusup = expanders[LUSUP].mem = Lstore->nzval; usub = expanders[USUB].mem = Ustore->rowind; ucol = expanders[UCOL].mem = Ustore->nzval;; expanders[LSUB].size = nzlmax; expanders[LUSUP].size = nzlumax; expanders[USUB].size = nzumax; expanders[UCOL].size = nzumax; } Glu->xsup = xsup; Glu->supno = supno; Glu->lsub = lsub; Glu->xlsub = xlsub; Glu->lusup = lusup; Glu->xlusup = xlusup; Glu->ucol = ucol; Glu->usub = usub; Glu->xusub = xusub; Glu->nzlmax = nzlmax; Glu->nzumax = nzumax; Glu->nzlumax = nzlumax; info = zLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel); if ( info ) return ( info + zmemory_usage(nzlmax, nzumax, nzlumax, n) + n); ++no_expand; return 0; } /* zLUMemInit *//* Allocate known working storage. Returns 0 if success, otherwise returns the number of bytes allocated so far when failure occurred. */intzLUWorkInit(int m, int n, int panel_size, int **iworkptr, doublecomplex **dworkptr, LU_space_t MemModel){ int isize, dsize, extra; doublecomplex *old_ptr; int maxsuper = sp_ienv(3), rowblk = sp_ienv(4); isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int); dsize = (m * panel_size + NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(doublecomplex); if ( MemModel == SYSTEM ) *iworkptr = (int *) intCalloc(isize/sizeof(int)); else *iworkptr = (int *) zuser_malloc(isize, TAIL); if ( ! *iworkptr ) { fprintf(stderr, "zLUWorkInit: malloc fails for local iworkptr[]\n"); return (isize + n); } if ( MemModel == SYSTEM ) *dworkptr = (doublecomplex *) SUPERLU_MALLOC(dsize); else { *dworkptr = (doublecomplex *) zuser_malloc(dsize, TAIL); if ( NotDoubleAlign(*dworkptr) ) { old_ptr = *dworkptr; *dworkptr = (doublecomplex*) DoubleAlign(*dworkptr); *dworkptr = (doublecomplex*) ((double*)*dworkptr - 1); extra = (char*)old_ptr - (char*)*dworkptr;#ifdef DEBUG printf("zLUWorkInit: not aligned, extra %d\n", extra);#endif stack.top2 -= extra;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -