⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zdistribute.c

📁 LU分解求解矩阵方程组的解
💻 C
📖 第 1 页 / 共 2 页
字号:
#include "superlu_zdefs.h"int_tzdistribute(fact_t fact, int_t n, SuperMatrix *A,             Glu_freeable_t *Glu_freeable,	    LUstruct_t *LUstruct, gridinfo_t *grid)/* * -- Distributed SuperLU routine (version 1.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley. * September 1, 1999 * * * Purpose * ======= *   Distribute the matrix onto the 2D process mesh. *  * Arguments * ========= *  * fact (input) fact_t *        Specifies whether or not the L and U structures will be re-used. *        = SamePattern_SameRowPerm: L and U structures are input, and *                                   unchanged on exit. *        = DOFACT or SamePattern: L and U structures are computed and output. * * n      (input) int *        Dimension of the matrix. * * A      (input) SuperMatrix* *	  The original matrix A, permuted by columns, of dimension *        (A->nrow, A->ncol). The type of A can be: *        Stype = SLU_NCP; Dtype = SLU_Z; Mtype = SLU_GE. * * LUstruct (input) LUstruct_t* *        Data structures for L and U factors. * * grid   (input) gridinfo_t* *        The 2D process mesh. * * Return value * ============ *   > 0, working storage required (in bytes). * */{    Glu_persist_t *Glu_persist = LUstruct->Glu_persist;    LocalLU_t *Llu = LUstruct->Llu;    int_t bnnz, fsupc, fsupc1, i, ii, irow, istart, j, jb, jj, k,           len, len1, nsupc;    int_t ljb;  /* local block column number */    int_t nrbl; /* number of L blocks in current block column */    int_t nrbu; /* number of U blocks in current block column */    int_t gb;   /* global block number; 0 < gb <= nsuper */    int_t lb;   /* local block number; 0 < lb <= ceil(NSUPERS/Pr) */    int iam, jbrow, kcol, mycol, myrow, pc, pr;    int_t mybufmax[NBUFFERS];    NCPformat *Astore;    doublecomplex *a;    int_t *asub;    int_t *xa_begin, *xa_end;    int_t *xsup = Glu_persist->xsup;    /* supernode and column mapping */    int_t *supno = Glu_persist->supno;       int_t *lsub, *xlsub, *usub, *xusub;    int_t nsupers;    int_t next_lind;      /* next available position in index[*] */    int_t next_lval;      /* next available position in nzval[*] */    int_t *index;         /* indices consist of headers and row subscripts */    doublecomplex *lusup, *uval; /* nonzero values in L and U */    doublecomplex **Lnzval_bc_ptr;  /* size ceil(NSUPERS/Pc) */    int_t  **Lrowind_bc_ptr; /* size ceil(NSUPERS/Pc) */    doublecomplex **Unzval_br_ptr;  /* size ceil(NSUPERS/Pr) */    int_t  **Ufstnz_br_ptr;  /* size ceil(NSUPERS/Pr) */    /*-- Counts to be used in factorization. --*/    int_t  *ToRecv, *ToSendD, **ToSendR;    /*-- Counts to be used in lower triangular solve. --*/    int_t  *fmod;          /* Modification count for L-solve.        */    int_t  **fsendx_plist; /* Column process list to send down Xk.   */    int_t  nfrecvx = 0;    /* Number of Xk I will receive.           */    int_t  nfsendx = 0;    /* Number of Xk I will send               */    int_t  kseen;    /*-- Counts to be used in upper triangular solve. --*/    int_t  *bmod;          /* Modification count for U-solve.        */    int_t  **bsendx_plist; /* Column process list to send down Xk.   */    int_t  nbrecvx = 0;    /* Number of Xk I will receive.           */    int_t  nbsendx = 0;    /* Number of Xk I will send               */    int_t  *ilsum;         /* starting position of each supernode in 			      the full array (local)                 */    /*-- Auxiliary arrays; freed on return --*/    int_t *rb_marker;  /* block hit marker; size ceil(NSUPERS/Pr)           */    int_t *Urb_length; /* U block length; size ceil(NSUPERS/Pr)             */    int_t *Urb_indptr; /* pointers to U index[]; size ceil(NSUPERS/Pr)      */    int_t *Urb_fstnz;  /* # of fstnz in a block row; size ceil(NSUPERS/Pr)  */    int_t *Ucbs;       /* number of column blocks in a block row            */    int_t *Lrb_length; /* L block length; size ceil(NSUPERS/Pr)             */    int_t *Lrb_number; /* global block number; size ceil(NSUPERS/Pr)        */    int_t *Lrb_indptr; /* pointers to L index[]; size ceil(NSUPERS/Pr)      */    int_t *Lrb_valptr; /* pointers to L nzval[]; size ceil(NSUPERS/Pr)      */    doublecomplex *dense, *dense_col; /* SPA */    doublecomplex zero = {0.0, 0.0};    int_t  ldaspa;     /* LDA of SPA */    int_t mem_use = 0, iword, zword;#if ( PRNTlevel>=1 )    int_t nLblocks = 0, nUblocks = 0;#endif#if ( PROFlevel>=1 )     double t, t_u, t_l;    int_t u_blks;#endif    /* Initialization. */    iam = grid->iam;    myrow = MYROW( iam, grid );    mycol = MYCOL( iam, grid );    for (i = 0; i < NBUFFERS; ++i) mybufmax[i] = 0;    nsupers  = supno[n-1] + 1;    Astore   = A->Store;    a        = Astore->nzval;    asub     = Astore->rowind;    xa_begin = Astore->colbeg;    xa_end   = Astore->colend;#if ( PRNTlevel>=1 )    iword = sizeof(int_t);    zword = sizeof(doublecomplex);#endif#if ( DEBUGlevel>=1 )    CHECK_MALLOC(iam, "Enter zdistribute()");#endif    if ( fact == SamePattern_SameRowPerm ) {#if ( PROFlevel>=1 )	t_l = t_u = 0; u_blks = 0;#endif	/* We can propagate the new values of A into the existing	   L and U data structures.            */	ilsum = Llu->ilsum;	ldaspa = Llu->ldalsum;	if ( !(dense = doublecomplexCalloc_dist(((size_t)ldaspa) * sp_ienv_dist(3))) )	    ABORT("Calloc fails for SPA dense[].");	nrbu = CEILING( nsupers, grid->nprow ); /* No. of local block rows */	if ( !(Urb_length = intCalloc_dist(nrbu)) )	    ABORT("Calloc fails for Urb_length[].");	if ( !(Urb_indptr = intMalloc_dist(nrbu)) )	    ABORT("Malloc fails for Urb_indptr[].");	Lrowind_bc_ptr = Llu->Lrowind_bc_ptr;	Lnzval_bc_ptr = Llu->Lnzval_bc_ptr;	Ufstnz_br_ptr = Llu->Ufstnz_br_ptr;	Unzval_br_ptr = Llu->Unzval_br_ptr;#if ( PRNTlevel>=1 )	mem_use += 2*nrbu*iword + ldaspa*sp_ienv_dist(3)*zword;#endif#if ( PROFlevel>=1 )	t = SuperLU_timer_();#endif	/* Initialize Uval to zero. */	for (lb = 0; lb < nrbu; ++lb) {	    Urb_indptr[lb] = BR_HEADER; /* Skip header in U index[]. */	    index = Ufstnz_br_ptr[lb];	    if ( index ) {		uval = Unzval_br_ptr[lb];		len = index[1];		for (i = 0; i < len; ++i) uval[i] = zero;	    } /* if index != NULL */	} /* for lb ... */	for (jb = 0; jb < nsupers; ++jb) { /* Loop through each block column */	    pc = PCOL( jb, grid );	    if ( mycol == pc ) { /* Block column jb in my process column */		fsupc = FstBlockC( jb );		nsupc = SuperSize( jb ); 		/* Scatter A into SPA (for L), or into U directly. */		for (j = fsupc, dense_col = dense; j < FstBlockC(jb+1); ++j) {		    for (i = xa_begin[j]; i < xa_end[j]; ++i) {			irow = asub[i];			gb = BlockNum( irow );			if ( myrow == PROW( gb, grid ) ) {			    lb = LBi( gb, grid ); 			    if ( gb < jb ) { /* in U */ 				index = Ufstnz_br_ptr[lb]; 				uval = Unzval_br_ptr[lb]; 				while (  (k = index[Urb_indptr[lb]]) < jb ) { 				    /* Skip nonzero values in this block */ 				    Urb_length[lb] += index[Urb_indptr[lb]+1]; 				    /* Move pointer to the next block */ 				    Urb_indptr[lb] += UB_DESCRIPTOR 					+ SuperSize( k ); 				} 				/*assert(k == jb);*/ 				/* start fstnz */ 				istart = Urb_indptr[lb] + UB_DESCRIPTOR; 				len = Urb_length[lb]; 				fsupc1 = FstBlockC( gb+1 ); 				k = j - fsupc; 				/* Sum the lengths of the leading columns */ 				for (jj = 0; jj < k; ++jj)				    len += fsupc1 - index[istart++];				/*assert(irow>=index[istart]);*/				uval[len + irow - index[istart]] = a[i];			    } else { /* in L; put in SPA first */  				irow = ilsum[lb] + irow - FstBlockC( gb );  				dense_col[irow] = a[i];  			    }  			}		    } /* for i ... */  		    dense_col += ldaspa;		} /* for j ... */#if ( PROFlevel>=1 )		t_u += SuperLU_timer_() - t;		t = SuperLU_timer_();#endif		/* Gather the values of A from SPA into Lnzval[]. */		ljb = LBj( jb, grid ); /* Local block number */		index = Lrowind_bc_ptr[ljb];		if ( index ) {		    nrbl = index[0];   /* Number of row blocks. */		    len = index[1];    /* LDA of lusup[]. */		    lusup = Lnzval_bc_ptr[ljb];		    next_lind = BC_HEADER;		    next_lval = 0;		    for (jj = 0; jj < nrbl; ++jj) {			gb = index[next_lind++];			len1 = index[next_lind++]; /* Rows in the block. */			lb = LBi( gb, grid );			for (bnnz = 0; bnnz < len1; ++bnnz) {			    irow = index[next_lind++]; /* Global index. */			    irow = ilsum[lb] + irow - FstBlockC( gb );			    k = next_lval++;			    for (j = 0, dense_col = dense; j < nsupc; ++j) {				lusup[k] = dense_col[irow];				dense_col[irow] = zero;				k += len;				dense_col += ldaspa;			    }			} /* for bnnz ... */		    } /* for jj ... */		} /* if index ... */#if ( PROFlevel>=1 )		t_l += SuperLU_timer_() - t;#endif	    } /* if mycol == pc */	} /* for jb ... */	SUPERLU_FREE(dense);	SUPERLU_FREE(Urb_length);	SUPERLU_FREE(Urb_indptr);#if ( PROFlevel>=1 )	if ( !iam ) printf(".. 2nd distribute time: L %.2f\tU %.2f\tu_blks %d\tnrbu %d\n",			   t_l, t_u, u_blks, nrbu);#endif    } else { /* First time creating the L and U data structure. */#if ( PROFlevel>=1 )	t_l = t_u = 0; u_blks = 0;#endif	/* No L and U data structures are available yet.	   We need to set up the L and U data structures and propagate	   the values of A into them.          */	lsub = Glu_freeable->lsub;    /* compressed L subscripts */	xlsub = Glu_freeable->xlsub;	usub = Glu_freeable->usub;    /* compressed U subscripts */	xusub = Glu_freeable->xusub;    	if ( !(ToRecv = intCalloc_dist(nsupers)) )	    ABORT("Calloc fails for ToRecv[].");	k = CEILING( nsupers, grid->npcol );/* Number of local column blocks */	if ( !(ToSendR = (int_t **) SUPERLU_MALLOC(k*sizeof(int_t*))) )	    ABORT("Malloc fails for ToSendR[].");	j = k * grid->npcol;	if ( !(index = intMalloc_dist(j)) )	    ABORT("Malloc fails for index[].");#if ( PRNTlevel>=1 )	mem_use += k*sizeof(int_t*) + (j + nsupers)*iword;#endif	for (i = 0; i < j; ++i) index[i] = EMPTY;	for (i = 0,j = 0; i < k; ++i, j += grid->npcol) ToSendR[i] = &index[j];	k = CEILING( nsupers, grid->nprow ); /* Number of local block rows */	/* Pointers to the beginning of each block row of U. */	if ( !(Unzval_br_ptr =                (doublecomplex**)SUPERLU_MALLOC(k * sizeof(doublecomplex*))) )	    ABORT("Malloc fails for Unzval_br_ptr[].");	if ( !(Ufstnz_br_ptr = (int_t**)SUPERLU_MALLOC(k * sizeof(int_t*))) )	    ABORT("Malloc fails for Ufstnz_br_ptr[].");		if ( !(ToSendD = intCalloc_dist(k)) )	    ABORT("Malloc fails for ToSendD[].");	if ( !(ilsum = intMalloc_dist(k+1)) )	    ABORT("Malloc fails for ilsum[].");	/* Auxiliary arrays used to set up U block data structures.	   They are freed on return. */	if ( !(rb_marker = intCalloc_dist(k)) )	    ABORT("Calloc fails for rb_marker[].");	if ( !(Urb_length = intCalloc_dist(k)) )	    ABORT("Calloc fails for Urb_length[].");	if ( !(Urb_indptr = intMalloc_dist(k)) )	    ABORT("Malloc fails for Urb_indptr[].");	if ( !(Urb_fstnz = intCalloc_dist(k)) )	    ABORT("Calloc fails for Urb_fstnz[].");	if ( !(Ucbs = intCalloc_dist(k)) )	    ABORT("Calloc fails for Ucbs[].");#if ( PRNTlevel>=1 )		mem_use += 2*k*sizeof(int_t*) + (7*k+1)*iword;#endif	/* Compute ldaspa and ilsum[]. */	ldaspa = 0;	ilsum[0] = 0;	for (gb = 0; gb < nsupers; ++gb) {	    if ( myrow == PROW( gb, grid ) ) {		i = SuperSize( gb );		ldaspa += i;		lb = LBi( gb, grid );		ilsum[lb + 1] = ilsum[lb] + i;	    }	}	            	/* ------------------------------------------------------------	   COUNT NUMBER OF ROW BLOCKS AND THE LENGTH OF EACH BLOCK IN U.	   THIS ACCOUNTS FOR ONE-PASS PROCESSING OF G(U).	   ------------------------------------------------------------*/		/* Loop through each supernode column. */	for (jb = 0; jb < nsupers; ++jb) {	    pc = PCOL( jb, grid );	    fsupc = FstBlockC( jb );	    nsupc = SuperSize( jb );	    /* Loop through each column in the block. */	    for (j = fsupc; j < fsupc + nsupc; ++j) {		/* usub[*] contains only "first nonzero" in each segment. */		for (i = xusub[j]; i < xusub[j+1]; ++i) {		    irow = usub[i]; /* First nonzero of the segment. */		    gb = BlockNum( irow );		    kcol = PCOL( gb, grid );		    ljb = LBj( gb, grid );		    if ( mycol == kcol && mycol != pc ) ToSendR[ljb][pc] = YES;		    pr = PROW( gb, grid );		    lb = LBi( gb, grid );		    if ( mycol == pc ) {			if  ( myrow == pr ) {			    ToSendD[lb] = YES;			    /* Count nonzeros in entire block row. */			    Urb_length[lb] += FstBlockC( gb+1 ) - irow;			    if (rb_marker[lb] <= jb) {/* First see the block */				rb_marker[lb] = jb + 1;				Urb_fstnz[lb] += nsupc;				++Ucbs[lb]; /* Number of column blocks

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -