📄 superlu_defs.h.bak
字号:
/* * -- Distributed SuperLU routine (version 2.1) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley. * October 1, 2007 * */#ifndef __SUPERLU_DEFS /* allow multiple inclusions */#define __SUPERLU_DEFS/* * File name: superlu_defs.h * Purpose: Definitions which are precision-neutral */#ifdef _CRAY#include <fortran.h>#include <string.h>#endif#include <mpi.h>#include <stdlib.h>#include <stdio.h>/* Define my integer size int_t */#ifdef _CRAYtypedef short int_t;/*#undef int Revert back to int of default size. */#define mpi_int_t MPI_SHORT#elif defined (_LONGINT)typedef long int int_t;#define mpi_int_t MPI_LONG#else /* Default */typedef int int_t;#define mpi_int_t MPI_INT#endif#define SuperLU_timer_ SuperLU_timer_dist_/*********************************************************************** * Enumerated types ***********************************************************************//*typedef enum {FALSE, TRUE} boolean_t;*/typedef enum {NO, YES} yes_no_t;typedef enum {DOFACT, SamePattern, SamePattern_SameRowPerm, FACTORED} fact_t;typedef enum {NOROWPERM, LargeDiag, MY_PERMR} rowperm_t;typedef enum {NATURAL, MMD_AT_PLUS_A, MMD_ATA, METIS_AT_PLUS_A, PARMETIS, MY_PERMC} colperm_t;typedef enum {NOTRANS, TRANS, CONJ} trans_t;typedef enum {NOEQUIL, ROW, COL, BOTH} DiagScale_t;typedef enum {NOREFINE, SINGLE=1, DOUBLE, EXTRA} IterRefine_t;typedef enum {LUSUP, UCOL, LSUB, USUB} MemType;typedef enum {HEAD, TAIL} stack_end_t;typedef enum {SYSTEM, USER} LU_space_t;#include "Cnames.h"#include "supermatrix.h"#include "util_dist.h"#include "psymbfact.h"/*********************************************************************** * Constants ***********************************************************************//* * For each block column of L, the index[] array contains both the row * subscripts and the integers describing the size of the blocks. * The organization of index[] looks like: * * [ BLOCK COLUMN HEADER (size BC_HEADER) * number of blocks * number of row subscripts, i.e., LDA of nzval[] * BLOCK 0 <---- * BLOCK DESCRIPTOR (of size LB_DESCRIPTOR) | * block number (global) | * number of full rows in the block | * actual row subscripts | * BLOCK 1 | Repeat ... * BLOCK DESCRIPTOR | number of blocks * block number (global) | * number of full rows in the block | * actual row subscripts | * . | * . | * . <---- * ] * * For each block row of U, the organization of index[] looks like: * * [ BLOCK ROW HEADER (of size BR_HEADER) * number of blocks * number of entries in nzval[] * number of entries in index[] * BLOCK 0 <---- * BLOCK DESCRIPTOR (of size UB_DESCRIPTOR) | * block number (global) | * number of nonzeros in the block | * actual fstnz subscripts | * BLOCK 1 | Repeat ... * BLOCK DESCRIPTOR | number of blocks * block number (global) | * number of nonzeros in the block | * actual fstnz subscripts | * . | * . | * . <---- * ] * */#define BC_HEADER 2#define LB_DESCRIPTOR 2#define BR_HEADER 3#define UB_DESCRIPTOR 2#define NBUFFERS 5/* * Communication tags */ /* For numeric factorization. */#define NTAGS 10000#define UjROW 10#define UkSUB 11#define UkVAL 12#define LkSUB 13#define LkVAL 14#define LkkDIAG 15 /* For triangular solves. */#define XK_H 1 /* The header preceeding each X block. */#define LSUM_H 1 /* The header preceeding each MOD block. */#define GSUM 20 #define Xk 21#define Yk 22#define LSUM 23/* * Communication scopes */#define COMM_ALL 100#define COMM_COLUMN 101#define COMM_ROW 102/* * Matrix distribution for sparse matrix-vector multiplication */#define SUPER_LINEAR 11#define SUPER_BLOCK 12/* * No of marker arrays used in the symbolic factorization, each of size n */#define NO_MARKER 3/*********************************************************************** * Macros ***********************************************************************/#define IAM(comm) { int rank; MPI_Comm_rank ( comm, &rank ); rank};#define MYROW(iam,grid) ( (iam) / grid->npcol )#define MYCOL(iam,grid) ( (iam) % grid->npcol )#define BlockNum(i) ( supno[i] )#define FstBlockC(bnum) ( xsup[bnum] )#define SuperSize(bnum) ( xsup[bnum+1]-xsup[bnum] )#define LBi(bnum,grid) ( (bnum)/grid->nprow )/* Global to local block rowwise */#define LBj(bnum,grid) ( (bnum)/grid->npcol )/* Global to local block columnwise*/#define PROW(bnum,grid) ( (bnum) % grid->nprow )#define PCOL(bnum,grid) ( (bnum) % grid->npcol )#define PNUM(i,j,grid) ( (i)*grid->npcol + j ) /* Process number at coord(i,j) */#define CEILING(a,b) ( ((a)%(b)) ? ((a)/(b) + 1) : ((a)/(b)) ) /* For triangular solves */#define RHS_ITERATE(i) \ for (i = 0; i < nrhs; ++i)#define X_BLK(i) \ ilsum[i] * nrhs + (i+1) * XK_H#define LSUM_BLK(i) \ ilsum[i] * nrhs + (i+1) * LSUM_H#if ( VAMPIR>=1 ) #define VT_TRACEON VT_traceon()#define VT_TRACEOFF VT_traceoff()#else#define VT_TRACEON #define VT_TRACEOFF#endif/*********************************************************************** * New data types ***********************************************************************//* * Define the 2D mapping of matrix blocks to process grid. * * Process grid: * Processes are numbered (0 : P-1). * P = Pr x Pc, where Pr, Pc are the number of process rows and columns. * (pr,pc) is the coordinate of IAM; 0 <= pr < Pr, 0 <= pc < Pc. * * Matrix blocks: * Matrix is partitioned according to supernode partitions, both * column and row-wise. * The k-th block columns (rows) contains columns (rows) (s:t), where * s=xsup[k], t=xsup[k+1]-1. * Block A(I,J) contains * rows from (xsup[I]:xsup[I+1]-1) and * columns from (xsup[J]:xsup[J+1]-1) * * Mapping of matrix entry (i,j) to matrix block (I,J): * (I,J) = ( supno[i], supno[j] ) * * Mapping of matrix block (I,J) to process grid (pr,pc): * (pr,pc) = ( MOD(I,NPROW), MOD(J,NPCOL) ) * * (xsup[nsupers],supno[n]) are replicated on all processors. * *//*-- Communication subgroup */typedef struct { MPI_Comm comm; /* MPI communicator */ int Np; /* number of processes */ int Iam; /* my process number */} superlu_scope_t;/*-- Process grid definition */typedef struct { MPI_Comm comm; /* MPI communicator */ superlu_scope_t rscp; /* row scope */ superlu_scope_t cscp; /* column scope */ int iam; /* my process number in this scope */ int_t nprow; /* number of process rows */ int_t npcol; /* number of process columns */} gridinfo_t;/* *-- The structures are determined by SYMBFACT and used thereafter. * * (xsup,supno) describes mapping between supernode and column: * xsup[s] is the leading column of the s-th supernode. * supno[i] is the supernode no to which column i belongs; * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12) * xsup 0 1 2 4 7 12 * Note: dfs will be performed on supernode rep. relative to the new * row pivoting ordering * * This is allocated during symbolic factorization SYMBFACT. */typedef struct { int_t *xsup; int_t *supno;} Glu_persist_t;/* *-- The structures are determined by SYMBFACT and used by DDISTRIBUTE. * * (xlsub,lsub): lsub[*] contains the compressed subscript of * rectangular supernodes; xlsub[j] points to the starting * location of the j-th column in lsub[*]. Note that xlsub * is indexed by column. * Storage: original row subscripts * * During the course of sparse LU factorization, we also use * (xlsub,lsub) for the purpose of symmetric pruning. For each * supernode {s,s+1,...,t=s+r} with first column s and last * column t, the subscript set * lsub[j], j=xlsub[s], .., xlsub[s+1]-1 * is the structure of column s (i.e. structure of this supernode). * It is used for the storage of numerical values. * Furthermore, * lsub[j], j=xlsub[t], .., xlsub[t+1]-1 * is the structure of the last column t of this supernode. * It is for the purpose of symmetric pruning. Therefore, the * structural subscripts can be rearranged without making physical * interchanges among the numerical values. * * However, if the supernode has only one column, then we * only keep one set of subscripts. For any subscript interchange * performed, similar interchange must be done on the numerical * values. * * The last column structures (for pruning) will be removed * after the numercial LU factorization phase. *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -