⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zdistribute.c

📁 LU分解求解矩阵方程组的解
💻 C
📖 第 1 页 / 共 2 页
字号:
					       in block row lb. */#if ( PRNTlevel>=1 )				++nUblocks;#endif			    }			    ToRecv[gb] = 1;			} else ToRecv[gb] = 2; /* Do I need 0, 1, 2 ? */		    }		} /* for i ... */	    } /* for j ... */	} /* for jb ... */		/* Set up the initial pointers for each block row in U. */	nrbu = CEILING( nsupers, grid->nprow );/* Number of local block rows */	for (lb = 0; lb < nrbu; ++lb) {	    len = Urb_length[lb];	    rb_marker[lb] = 0; /* Reset block marker. */	    if ( len ) {		/* Add room for descriptors */		len1 = Urb_fstnz[lb] + BR_HEADER + Ucbs[lb] * UB_DESCRIPTOR;		if ( !(index = intMalloc_dist(len1+1)) )		    ABORT("Malloc fails for Uindex[].");		Ufstnz_br_ptr[lb] = index;		if ( !(Unzval_br_ptr[lb] = doublecomplexMalloc_dist(len)) )		    ABORT("Malloc fails for Unzval_br_ptr[*][].");		mybufmax[2] = SUPERLU_MAX( mybufmax[2], len1 );		mybufmax[3] = SUPERLU_MAX( mybufmax[3], len );		index[0] = Ucbs[lb]; /* Number of column blocks */		index[1] = len;      /* Total length of nzval[] */		index[2] = len1;     /* Total length of index[] */		index[len1] = -1;    /* End marker */	    } else {		Ufstnz_br_ptr[lb] = NULL;		Unzval_br_ptr[lb] = NULL;	    }	    Urb_length[lb] = 0; /* Reset block length. */	    Urb_indptr[lb] = BR_HEADER; /* Skip header in U index[]. */ 	    Urb_fstnz[lb] = BR_HEADER;	} /* for lb ... */	SUPERLU_FREE(Ucbs);#if ( PROFlevel>=1 )	t = SuperLU_timer_() - t;	if ( !iam) printf(".. Phase 2 - setup U strut time: %.2f\t\n", t);#endif#if ( PRNTlevel>=1 )        mem_use -= 2*k * iword;#endif	/* Auxiliary arrays used to set up L block data structures.	   They are freed on return.	   k is the number of local row blocks.   */	if ( !(Lrb_length = intCalloc_dist(k)) )	    ABORT("Calloc fails for Lrb_length[].");	if ( !(Lrb_number = intMalloc_dist(k)) )	    ABORT("Malloc fails for Lrb_number[].");	if ( !(Lrb_indptr = intMalloc_dist(k)) )	    ABORT("Malloc fails for Lrb_indptr[].");	if ( !(Lrb_valptr = intMalloc_dist(k)) )	    ABORT("Malloc fails for Lrb_valptr[].");	if (!(dense=doublecomplexCalloc_dist(SUPERLU_MAX(1,((size_t)ldaspa)              *sp_ienv_dist(3)))))	    ABORT("Calloc fails for SPA dense[].");	/* These counts will be used for triangular solves. */	if ( !(fmod = intCalloc_dist(k)) )	    ABORT("Calloc fails for fmod[].");	if ( !(bmod = intCalloc_dist(k)) )	    ABORT("Calloc fails for bmod[].");#if ( PRNTlevel>=1 )		mem_use += 6*k*iword + ldaspa*sp_ienv_dist(3)*zword;#endif	k = CEILING( nsupers, grid->npcol );/* Number of local block columns */	/* Pointers to the beginning of each block column of L. */	if ( !(Lnzval_bc_ptr = (doublecomplex**)SUPERLU_MALLOC(k * sizeof(doublecomplex*))) )	    ABORT("Malloc fails for Lnzval_bc_ptr[].");	if ( !(Lrowind_bc_ptr = (int_t**)SUPERLU_MALLOC(k * sizeof(int_t*))) )	    ABORT("Malloc fails for Lrowind_bc_ptr[].");	Lrowind_bc_ptr[k-1] = NULL;	/* These lists of processes will be used for triangular solves. */	if ( !(fsendx_plist = (int_t **) SUPERLU_MALLOC(k*sizeof(int_t*))) )	    ABORT("Malloc fails for fsendx_plist[].");	len = k * grid->nprow;	if ( !(index = intMalloc_dist(len)) )	    ABORT("Malloc fails for fsendx_plist[0]");	for (i = 0; i < len; ++i) index[i] = EMPTY;	for (i = 0, j = 0; i < k; ++i, j += grid->nprow)	    fsendx_plist[i] = &index[j];	if ( !(bsendx_plist = (int_t **) SUPERLU_MALLOC(k*sizeof(int_t*))) )	    ABORT("Malloc fails for bsendx_plist[].");	if ( !(index = intMalloc_dist(len)) )	    ABORT("Malloc fails for bsendx_plist[0]");	for (i = 0; i < len; ++i) index[i] = EMPTY;	for (i = 0, j = 0; i < k; ++i, j += grid->nprow)	    bsendx_plist[i] = &index[j];#if ( PRNTlevel>=1 )	mem_use += 4*k*sizeof(int_t*) + 2*len*iword;#endif	/*------------------------------------------------------------	  PROPAGATE ROW SUBSCRIPTS AND VALUES OF A INTO L AND U BLOCKS.	  THIS ACCOUNTS FOR ONE-PASS PROCESSING OF A, L AND U.	  ------------------------------------------------------------*/	for (jb = 0; jb < nsupers; ++jb) {	    pc = PCOL( jb, grid );	    if ( mycol == pc ) { /* Block column jb in my process column */		fsupc = FstBlockC( jb );		nsupc = SuperSize( jb );		ljb = LBj( jb, grid ); /* Local block number */				/* Scatter A into SPA. */		for (j = fsupc, dense_col = dense; j < FstBlockC( jb+1 ); ++j){		    for (i = xa_begin[j]; i < xa_end[j]; ++i) {			irow = asub[i];			gb = BlockNum( irow );			if ( myrow == PROW( gb, grid ) ) {			    lb = LBi( gb, grid );			    irow = ilsum[lb] + irow - FstBlockC( gb );			    dense_col[irow] = a[i];			}		    }		    dense_col += ldaspa;		}		jbrow = PROW( jb, grid );#if ( PROFlevel>=1 )		t = SuperLU_timer_();#endif		/*------------------------------------------------		 * SET UP U BLOCKS.		 *------------------------------------------------*/		kseen = 0;		dense_col = dense;		/* Loop through each column in the block column. */		for (j = fsupc; j < FstBlockC( jb+1 ); ++j) {		    istart = xusub[j];		    /* NOTE: Only the first nonzero index of the segment		       is stored in usub[]. */		    for (i = istart; i < xusub[j+1]; ++i) {			irow = usub[i]; /* First nonzero in the segment. */			gb = BlockNum( irow );			pr = PROW( gb, grid );			if ( pr != jbrow &&			     myrow == jbrow &&  /* diag. proc. owning jb */			     bsendx_plist[ljb][pr] == EMPTY ) {			    bsendx_plist[ljb][pr] = YES;			    ++nbsendx;                        }			if ( myrow == pr ) {			    lb = LBi( gb, grid ); /* Local block number */			    index = Ufstnz_br_ptr[lb];			    uval = Unzval_br_ptr[lb];			    fsupc1 = FstBlockC( gb+1 );			    if (rb_marker[lb] <= jb) { /* First time see 							  the block       */				rb_marker[lb] = jb + 1;				Urb_indptr[lb] = Urb_fstnz[lb];;				index[Urb_indptr[lb]] = jb; /* Descriptor */				Urb_indptr[lb] += UB_DESCRIPTOR;				/* Record the first location in index[] of the				   next block */				Urb_fstnz[lb] = Urb_indptr[lb] + nsupc;				len = Urb_indptr[lb];/* Start fstnz in index */				index[len-1] = 0;				for (k = 0; k < nsupc; ++k)				    index[len+k] = fsupc1;				if ( gb != jb )/* Exclude diagonal block. */				    ++bmod[lb];/* Mod. count for back solve */				if ( kseen == 0 && myrow != jbrow ) {				    ++nbrecvx;				    kseen = 1;				}			    } else { /* Already saw the block */				len = Urb_indptr[lb];/* Start fstnz in index */			    }			    jj = j - fsupc;			    index[len+jj] = irow;			    /* Load the numerical values */			    k = fsupc1 - irow; /* No. of nonzeros in segment */			    index[len-1] += k; /* Increment block length in						  Descriptor */			    irow = ilsum[lb] + irow - FstBlockC( gb );			    for (ii = 0; ii < k; ++ii) {				uval[Urb_length[lb]++] = dense_col[irow + ii];				dense_col[irow + ii] = zero;			    }			} /* if myrow == pr ... */		    } /* for i ... */                    dense_col += ldaspa;		} /* for j ... */#if ( PROFlevel>=1 )		t_u += SuperLU_timer_() - t;		t = SuperLU_timer_();#endif		/*------------------------------------------------		 * SET UP L BLOCKS.		 *------------------------------------------------*/		/* Count number of blocks and length of each block. */		nrbl = 0;		len = 0; /* Number of row subscripts I own. */		kseen = 0;		istart = xlsub[fsupc];		for (i = istart; i < xlsub[fsupc+1]; ++i) {		    irow = lsub[i];		    gb = BlockNum( irow ); /* Global block number */		    pr = PROW( gb, grid ); /* Process row owning this block */		    if ( pr != jbrow &&			 myrow == jbrow &&  /* diag. proc. owning jb */			 fsendx_plist[ljb][pr] == EMPTY /* first time */ ) {			fsendx_plist[ljb][pr] = YES;			++nfsendx;                    }		    if ( myrow == pr ) {			lb = LBi( gb, grid );  /* Local block number */			if (rb_marker[lb] <= jb) { /* First see this block */			    rb_marker[lb] = jb + 1;			    Lrb_length[lb] = 1;			    Lrb_number[nrbl++] = gb;			    if ( gb != jb ) /* Exclude diagonal block. */				++fmod[lb]; /* Mod. count for forward solve */			    if ( kseen == 0 && myrow != jbrow ) {				++nfrecvx;				kseen = 1;			    }#if ( PRNTlevel>=1 )			    ++nLblocks;#endif			} else {			    ++Lrb_length[lb];			}			++len;		    }		} /* for i ... */		if ( nrbl ) { /* Do not ensure the blocks are sorted! */		    /* Set up the initial pointers for each block in 		       index[] and nzval[]. */		    /* Add room for descriptors */		    len1 = len + BC_HEADER + nrbl * LB_DESCRIPTOR;		    if ( !(index = intMalloc_dist(len1)) ) 			ABORT("Malloc fails for index[]");		    Lrowind_bc_ptr[ljb] = index;		    if (!(Lnzval_bc_ptr[ljb] = doublecomplexMalloc_dist(((size_t)len)*nsupc))) {			fprintf(stderr, "col block %d ", jb);			ABORT("Malloc fails for Lnzval_bc_ptr[*][]");		    }		    mybufmax[0] = SUPERLU_MAX( mybufmax[0], len1 );		    mybufmax[1] = SUPERLU_MAX( mybufmax[1], len*nsupc );		    mybufmax[4] = SUPERLU_MAX( mybufmax[4], len );		    index[0] = nrbl;  /* Number of row blocks */		    index[1] = len;   /* LDA of the nzval[] */		    next_lind = BC_HEADER;		    next_lval = 0;		    for (k = 0; k < nrbl; ++k) {			gb = Lrb_number[k];			lb = LBi( gb, grid );			len = Lrb_length[lb];			Lrb_length[lb] = 0;  /* Reset vector of block length */			index[next_lind++] = gb; /* Descriptor */			index[next_lind++] = len; 			Lrb_indptr[lb] = next_lind;			Lrb_valptr[lb] = next_lval;			next_lind += len;			next_lval += len;		    }		    /* Propagate the compressed row subscripts to Lindex[], and		       the initial values of A from SPA into Lnzval[]. */		    lusup = Lnzval_bc_ptr[ljb];		    len = index[1];  /* LDA of lusup[] */		    for (i = istart; i < xlsub[fsupc+1]; ++i) {			irow = lsub[i];			gb = BlockNum( irow );			if ( myrow == PROW( gb, grid ) ) {			    lb = LBi( gb, grid );			    k = Lrb_indptr[lb]++; /* Random access a block */			    index[k] = irow;			    k = Lrb_valptr[lb]++;			    irow = ilsum[lb] + irow - FstBlockC( gb );			    for (j = 0, dense_col = dense; j < nsupc; ++j) {				lusup[k] = dense_col[irow];				dense_col[irow] = zero;				k += len;				dense_col += ldaspa;			    }			}		    } /* for i ... */		} else {		    Lrowind_bc_ptr[ljb] = NULL;		    Lnzval_bc_ptr[ljb] = NULL;		} /* if nrbl ... */#if ( PROFlevel>=1 )		t_l += SuperLU_timer_() - t;#endif	    } /* if mycol == pc */	} /* for jb ... */	Llu->Lrowind_bc_ptr = Lrowind_bc_ptr;	Llu->Lnzval_bc_ptr = Lnzval_bc_ptr;	Llu->Ufstnz_br_ptr = Ufstnz_br_ptr;	Llu->Unzval_br_ptr = Unzval_br_ptr;	Llu->ToRecv = ToRecv;	Llu->ToSendD = ToSendD;	Llu->ToSendR = ToSendR;	Llu->fmod = fmod;	Llu->fsendx_plist = fsendx_plist;	Llu->nfrecvx = nfrecvx;	Llu->nfsendx = nfsendx;	Llu->bmod = bmod;	Llu->bsendx_plist = bsendx_plist;	Llu->nbrecvx = nbrecvx;	Llu->nbsendx = nbsendx;	Llu->ilsum = ilsum;	Llu->ldalsum = ldaspa;	#if ( PRNTlevel>=1 )	if ( !iam ) printf(".. # L blocks %d\t# U blocks %d\n",			   nLblocks, nUblocks);#endif	SUPERLU_FREE(rb_marker);	SUPERLU_FREE(Urb_fstnz);	SUPERLU_FREE(Urb_length);	SUPERLU_FREE(Urb_indptr);	SUPERLU_FREE(Lrb_length);	SUPERLU_FREE(Lrb_number);	SUPERLU_FREE(Lrb_indptr);	SUPERLU_FREE(Lrb_valptr);	SUPERLU_FREE(dense);	/* Find the maximum buffer size. */	MPI_Allreduce(mybufmax, Llu->bufmax, NBUFFERS, mpi_int_t, 		      MPI_MAX, grid->comm);#if ( PROFlevel>=1 )	if ( !iam ) printf(".. 1st distribute time:\n "			   "\tL\t%.2f\n\tU\t%.2f\n"			   "\tu_blks %d\tnrbu %d\n--------\n",  			   t_l, t_u, u_blks, nrbu);#endif    } /* if fact == SamePattern_SameRowPerm */#if ( DEBUGlevel>=1 )    /* Memory allocated but not freed:       ilsum, fmod, fsendx_plist, bmod, bsendx_plist  */    CHECK_MALLOC(iam, "Exit zdistribute()");#endif    return (mem_use);} /* ZDISTRIBUTE */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -