⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pdgstrf_x1.c

📁 LU分解求解矩阵方程组的解
💻 C
📖 第 1 页 / 共 3 页
字号:
			}			/* Skip descriptor.  Now point to fstnz index of 			   block U(i,j). */			iuip[lib] += UB_DESCRIPTOR;			tempv = tempv2d;			for (jj = 0; jj < nsupc; ++jj) {			    segsize = klst - usub[iukp + jj];			    fnz = index[iuip[lib]++];			    if ( segsize ) { /* Nonzero segment in U(k.j). */				ucol = &Unzval_br_ptr[lib][ruip[lib]];				for (i = 0 ; i < nbrow; ++i) {				    rel = lsub[lptr + i] - fnz;				    ucol[rel] -= tempv[i];				}				tempv += ldt;			    }			    ruip[lib] += ilst - fnz;			}		    } else { /* A(i,j) is in L. */			index = Lrowind_bc_ptr[ljb];			ldv = index[1];   /* LDA of the dest lusup. */			lptrj = BC_HEADER;			luptrj = 0;			ijb = index[lptrj];			while ( ijb != ib ) { /* Search for dest block -- 						 blocks are not ordered! */			    luptrj += index[lptrj+1];			    lptrj += LB_DESCRIPTOR + index[lptrj+1];			    ijb = index[lptrj];			}			/*			 * Build indirect table. This is needed because the			 * indices are not sorted for the L blocks.			 */			fnz = FstBlockC( ib );			lptrj += LB_DESCRIPTOR;			for (i = 0; i < index[lptrj-1]; ++i) {			    rel = index[lptrj + i] - fnz;			    indirect[rel] = i;			}			nzval = Lnzval_bc_ptr[ljb] + luptrj;			tempv = tempv2d;			for (jj = 0; jj < nsupc; ++jj) {			    segsize = klst - usub[iukp + jj];			    if ( segsize ) {/*#pragma _CRI cache_bypass nzval,tempv*/				for (i = 0; i < nbrow; ++i) {				    rel = lsub[lptr + i] - fnz;				    nzval[indirect[rel]] -= tempv[i];				}				tempv += ldt;			    }			    nzval += ldv;			}		    } /* if ib < jb ... */		    lptr += nbrow;		    luptr += nbrow;		} /* for lb ... */		rukp += usub[iukp - 1]; /* Move to block U(k,j+1) */		iukp += nsupc;	    } /* for j ... */	} /* if  k L(:,k) and U(k,:) are not empty */    }     /* ------------------------------------------       END MAIN LOOP: for k = ...       ------------------------------------------ */#if ( VAMPIR>=1 )    VT_end(100);    VT_traceoff();#endif    if ( Pr*Pc > 1 ) {	SUPERLU_FREE(Lsub_buf_2[0]); /* also free Lsub_buf_2[1] */	SUPERLU_FREE(Lval_buf_2[0]); /* also free Lval_buf_2[1] */	if ( Llu->bufmax[2] != 0 ) SUPERLU_FREE(Usub_buf);	if ( Llu->bufmax[3] != 0 ) SUPERLU_FREE(Uval_buf);	SUPERLU_FREE(send_req);    }    SUPERLU_FREE(Llu->ujrow);    SUPERLU_FREE(tempv2d);    SUPERLU_FREE(indirect);    SUPERLU_FREE(iuip);    SUPERLU_FREE(ruip);    /* Prepare error message. */    if ( *info == 0 ) *info = n + 1;#if ( PROFlevel>=1 )    TIC(t1);#endif    MPI_Allreduce( info, &iinfo, 1, mpi_int_t, MPI_MIN, grid->comm );#if ( PROFlevel>=1 )    TOC(t2, t1);    stat->utime[COMM] += t2;    {	float msg_vol_max, msg_vol_sum, msg_cnt_max, msg_cnt_sum;		MPI_Reduce( &msg_cnt, &msg_cnt_sum,		   1, MPI_FLOAT, MPI_SUM, 0, grid->comm );	MPI_Reduce( &msg_cnt, &msg_cnt_max,		   1, MPI_FLOAT, MPI_MAX, 0, grid->comm );	MPI_Reduce( &msg_vol, &msg_vol_sum,		   1, MPI_FLOAT, MPI_SUM, 0, grid->comm );	MPI_Reduce( &msg_vol, &msg_vol_max,		   1, MPI_FLOAT, MPI_MAX, 0, grid->comm );	if ( !iam ) {	    printf("\tPDGSTRF comm stat:"		   "\tAvg\tMax\t\tAvg\tMax\n"		   "\t\t\tCount:\t%.0f\t%.0f\tVol(MB)\t%.2f\t%.2f\n",		   msg_cnt_sum/Pr/Pc, msg_cnt_max,		   msg_vol_sum/Pr/Pc*1e-6, msg_vol_max*1e-6);	}    }#endif    if ( iinfo == n + 1 ) *info = 0;    else *info = iinfo;#if ( PRNTlevel==3 )    MPI_Allreduce( &zero_msg, &iinfo, 1, mpi_int_t, MPI_SUM, grid->comm );    if ( !iam ) printf(".. # msg of zero size\t%d\n", iinfo);    MPI_Allreduce( &total_msg, &iinfo, 1, mpi_int_t, MPI_SUM, grid->comm );    if ( !iam ) printf(".. # total msg\t%d\n", iinfo);#endif#if ( PRNTlevel==2 )    for (i = 0; i < Pr * Pc; ++i) {	if ( iam == i ) {	    dPrintLblocks(iam, nsupers, grid, Glu_persist, Llu);	    dPrintUblocks(iam, nsupers, grid, Glu_persist, Llu);	    printf("(%d)\n", iam);	    PrintInt10("Recv", nsupers, Llu->ToRecv);	}	MPI_Barrier( grid->comm );    }#endif#if ( DEBUGlevel>=3 )    printf("(%d) num_copy=%d, num_update=%d\n", iam, num_copy, num_update);#endif#if ( DEBUGlevel>=1 )    CHECK_MALLOC(iam, "Exit pdgstrf()");#endif} /* PDGSTRF *//************************************************************************/static void pdgstrf2/************************************************************************/( superlu_options_t *options, int_t k, double thresh, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat, int* info )/*  * Purpose * ======= *   Factor diagonal and subdiagonal blocks and test for exact singularity. *   Only the process column that owns block column *k* participates *   in the work. *  * Arguments * ========= * * k      (input) int (global) *        The column number of the block column to be factorized. * * thresh (input) double (global) *        The threshold value = s_eps * anorm. * * Glu_persist (input) Glu_persist_t* *        Global data structures (xsup, supno) replicated on all processes. * * grid   (input) gridinfo_t* *        The 2D process mesh. * * Llu    (input/output) LocalLU_t* *        Local data structures to store distributed L and U matrices. * * stat   (output) SuperLUStat_t* *        Record the statistics about the factorization. *        See SuperLUStat_t structure defined in util.h. * * info   (output) int* *        = 0: successful exit *        < 0: if info = -i, the i-th argument had an illegal value *        > 0: if info = i, U(i,i) is exactly zero. The factorization has *             been completed, but the factor U is exactly singular, *             and division by zero will occur if it is used to solve a *             system of equations. * */{    int    c, iam, l, pkk;    int    incx = 1, incy = 1;    int    nsupr; /* number of rows in the block (LDA) */    int    luptr;    int_t  i, krow, j, jfst, jlst;    int_t  nsupc; /* number of columns in the block */    int_t  *xsup = Glu_persist->xsup;    double *lusup, temp;    double *ujrow;    double alpha = -1;    *info = 0;    /* Quick return. */    /* Initialization. */    iam   = grid->iam;    krow  = PROW( k, grid );    pkk   = PNUM( PROW(k, grid), PCOL(k, grid), grid );    j     = LBj( k, grid ); /* Local block number */    jfst  = FstBlockC( k );    jlst  = FstBlockC( k+1 );    lusup = Llu->Lnzval_bc_ptr[j];    nsupc = SuperSize( k );    if ( Llu->Lrowind_bc_ptr[j] ) nsupr = Llu->Lrowind_bc_ptr[j][1];    ujrow = Llu->ujrow;    luptr = 0; /* Point to the diagonal entries. */    c = nsupc;    for (j = 0; j < jlst - jfst; ++j) {	/* Broadcast the j-th row (nsupc - j) elements to	   the process column. */	if ( iam == pkk ) { /* Diagonal process. */	    i = luptr;	    if ( options->ReplaceTinyPivot == YES || lusup[i] == 0.0 ) {		if ( fabs(lusup[i]) < thresh ) { /* Diagonal */#if ( PRNTlevel>=2 )		    printf("(%d) .. col %d, tiny pivot %e  ",			   iam, jfst+j, lusup[i]);#endif		    /* Keep the replaced diagonal with the same sign. */		    if ( lusup[i] < 0 ) lusup[i] = -thresh;		    else lusup[i] = thresh;#if ( PRNTlevel>=2 )		    printf("replaced by %e\n", lusup[i]);#endif		    ++(stat->TinyPivots);		}	    }	    for (l = 0; l < c; ++l, i += nsupr)	ujrow[l] = lusup[i];	}#if 0	dbcast_col(ujrow, c, pkk, UjROW, grid, &c);#else	MPI_Bcast(ujrow, c, MPI_DOUBLE, krow, (grid->cscp).comm);	/*bcast_tree(ujrow, c, MPI_DOUBLE, krow, (24*k+j)%NTAGS,		   grid, COMM_COLUMN, &c);*/#endif#if ( DEBUGlevel>=2 )if ( k == 3329 && j == 2 ) {	if ( iam == pkk ) {	    printf("..(%d) k %d, j %d: Send ujrow[0] %e\n",iam,k,j,ujrow[0]);	} else {	    printf("..(%d) k %d, j %d: Recv ujrow[0] %e\n",iam,k,j,ujrow[0]);	}}#endif	if ( !lusup ) { /* Empty block column. */	    --c;	    if ( ujrow[0] == 0.0 ) *info = j+jfst+1;	    continue;	}	/* Test for singularity. */	if ( ujrow[0] == 0.0 ) {	    *info = j+jfst+1;	} else {	    /* Scale the j-th column of the matrix. */	    temp = 1.0 / ujrow[0];	    if ( iam == pkk ) {		for (i = luptr+1; i < luptr-j+nsupr; ++i) lusup[i] *= temp;		stat->ops[FACT] += nsupr-j-1;	    } else {		for (i = luptr; i < luptr+nsupr; ++i) lusup[i] *= temp;		stat->ops[FACT] += nsupr;	    }	}	    	/* Rank-1 update of the trailing submatrix. */	if ( --c ) {	    if ( iam == pkk ) {		l = nsupr - j - 1;#ifdef _CRAY		SGER(&l, &c, &alpha, &lusup[luptr+1], &incx,		     &ujrow[1], &incy, &lusup[luptr+nsupr+1], &nsupr);#else		dger_(&l, &c, &alpha, &lusup[luptr+1], &incx,		      &ujrow[1], &incy, &lusup[luptr+nsupr+1], &nsupr);#endif		stat->ops[FACT] += 2 * l * c;	    } else {#ifdef _CRAY		SGER(&nsupr, &c, &alpha, &lusup[luptr], &incx, 		     &ujrow[1], &incy, &lusup[luptr+nsupr], &nsupr);#else		dger_(&nsupr, &c, &alpha, &lusup[luptr], &incx, 		      &ujrow[1], &incy, &lusup[luptr+nsupr], &nsupr);#endif		stat->ops[FACT] += 2 * nsupr * c;	    }	}		/* Move to the next column. */	if ( iam == pkk ) luptr += nsupr + 1;	else luptr += nsupr;    } /* for j ... */} /* PDGSTRF2 *//************************************************************************/static void pdgstrs2/************************************************************************/#ifdef _CRAY( int_t m, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat, _fcd ftcs1, _fcd ftcs2, _fcd ftcs3 )#else( int_t m, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat )#endif/*  * Purpose * ======= *   Perform parallel triangular solves *           U(k,:) := A(k,:) \ L(k,k).  *   Only the process column that owns block column *k* participates *   in the work. *  * Arguments * ========= * * m      (input) int (global) *        Number of rows in the matrix. * * k      (input) int (global) *        The row number of the block row to be factorized. * * Glu_persist (input) Glu_persist_t* *        Global data structures (xsup, supno) replicated on all processes. * * grid   (input) gridinfo_t* *        The 2D process mesh. * * Llu    (input/output) LocalLU_t* *        Local data structures to store distributed L and U matrices. * * stat   (output) SuperLUStat_t* *        Record the statistics about the factorization;  *        See SuperLUStat_t structure defined in util.h. * */{    int    iam, pkk;    int    incx = 1;    int    nsupr; /* number of rows in the block L(:,k) (LDA) */    int    segsize;    int_t  nsupc; /* number of columns in the block */    int_t  luptr, iukp, rukp;    int_t  b, gb, j, klst, knsupc, lk, nb;    int_t  *xsup = Glu_persist->xsup;    int_t  *usub;    double *lusup, *uval;    /* Quick return. */    lk = LBi( k, grid ); /* Local block number */    if ( !Llu->Unzval_br_ptr[lk] ) return;    /* Initialization. */    iam  = grid->iam;    pkk  = PNUM( PROW(k, grid), PCOL(k, grid), grid );    klst = FstBlockC( k+1 );    knsupc = SuperSize( k );    usub = Llu->Ufstnz_br_ptr[lk]; /* index[] of block row U(k,:) */    uval = Llu->Unzval_br_ptr[lk];    nb = usub[0];    iukp = BR_HEADER;    rukp = 0;    if ( iam == pkk ) {	lk = LBj( k, grid );	nsupr = Llu->Lrowind_bc_ptr[lk][1]; /* LDA of lusup[] */	lusup = Llu->Lnzval_bc_ptr[lk];    } else {	nsupr = Llu->Lsub_buf_2[k%2][1]; /* LDA of lusup[] */	lusup = Llu->Lval_buf_2[k%2];    }    /* Loop through all the row blocks. */    for (b = 0; b < nb; ++b) {	gb = usub[iukp];	nsupc = SuperSize( gb );	iukp += UB_DESCRIPTOR;	/* Loop through all the segments in the block. */	for (j = 0; j < nsupc; ++j) {	    segsize = klst - usub[iukp++]; 	    if ( segsize ) { /* Nonzero segment. */		luptr = (knsupc - segsize) * (nsupr + 1);#ifdef _CRAY		STRSV(ftcs1, ftcs2, ftcs3, &segsize, &lusup[luptr], &nsupr, 		      &uval[rukp], &incx);#else		dtrsv_("L", "N", "U", &segsize, &lusup[luptr], &nsupr, 		       &uval[rukp], &incx);#endif		stat->ops[FACT] += segsize * (segsize + 1);		rukp += segsize;	    }	}    } /* for b ... */} /* PDGSTRS2 */static intprobe_recv(int iam, int source, int tag, MPI_Datatype datatype, MPI_Comm comm,	   int buf_size){    MPI_Status status;    int count;     MPI_Probe( source, tag, comm, &status );    MPI_Get_count( &status, datatype, &count );    if ( count > buf_size ) {        printf("(%d) Recv'ed count %d > buffer size $d\n",	       iam, count, buf_size);	exit(-1);    }    return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -