📄 pdgstrf_x1.c
字号:
} /* Skip descriptor. Now point to fstnz index of block U(i,j). */ iuip[lib] += UB_DESCRIPTOR; tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; fnz = index[iuip[lib]++]; if ( segsize ) { /* Nonzero segment in U(k.j). */ ucol = &Unzval_br_ptr[lib][ruip[lib]]; for (i = 0 ; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; ucol[rel] -= tempv[i]; } tempv += ldt; } ruip[lib] += ilst - fnz; } } else { /* A(i,j) is in L. */ index = Lrowind_bc_ptr[ljb]; ldv = index[1]; /* LDA of the dest lusup. */ lptrj = BC_HEADER; luptrj = 0; ijb = index[lptrj]; while ( ijb != ib ) { /* Search for dest block -- blocks are not ordered! */ luptrj += index[lptrj+1]; lptrj += LB_DESCRIPTOR + index[lptrj+1]; ijb = index[lptrj]; } /* * Build indirect table. This is needed because the * indices are not sorted for the L blocks. */ fnz = FstBlockC( ib ); lptrj += LB_DESCRIPTOR; for (i = 0; i < index[lptrj-1]; ++i) { rel = index[lptrj + i] - fnz; indirect[rel] = i; } nzval = Lnzval_bc_ptr[ljb] + luptrj; tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; if ( segsize ) {/*#pragma _CRI cache_bypass nzval,tempv*/ for (i = 0; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; nzval[indirect[rel]] -= tempv[i]; } tempv += ldt; } nzval += ldv; } } /* if ib < jb ... */ lptr += nbrow; luptr += nbrow; } /* for lb ... */ rukp += usub[iukp - 1]; /* Move to block U(k,j+1) */ iukp += nsupc; } /* for j ... */ } /* if k L(:,k) and U(k,:) are not empty */ } /* ------------------------------------------ END MAIN LOOP: for k = ... ------------------------------------------ */#if ( VAMPIR>=1 ) VT_end(100); VT_traceoff();#endif if ( Pr*Pc > 1 ) { SUPERLU_FREE(Lsub_buf_2[0]); /* also free Lsub_buf_2[1] */ SUPERLU_FREE(Lval_buf_2[0]); /* also free Lval_buf_2[1] */ if ( Llu->bufmax[2] != 0 ) SUPERLU_FREE(Usub_buf); if ( Llu->bufmax[3] != 0 ) SUPERLU_FREE(Uval_buf); SUPERLU_FREE(send_req); } SUPERLU_FREE(Llu->ujrow); SUPERLU_FREE(tempv2d); SUPERLU_FREE(indirect); SUPERLU_FREE(iuip); SUPERLU_FREE(ruip); /* Prepare error message. */ if ( *info == 0 ) *info = n + 1;#if ( PROFlevel>=1 ) TIC(t1);#endif MPI_Allreduce( info, &iinfo, 1, mpi_int_t, MPI_MIN, grid->comm );#if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; { float msg_vol_max, msg_vol_sum, msg_cnt_max, msg_cnt_sum; MPI_Reduce( &msg_cnt, &msg_cnt_sum, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Reduce( &msg_cnt, &msg_cnt_max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm ); MPI_Reduce( &msg_vol, &msg_vol_sum, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Reduce( &msg_vol, &msg_vol_max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm ); if ( !iam ) { printf("\tPDGSTRF comm stat:" "\tAvg\tMax\t\tAvg\tMax\n" "\t\t\tCount:\t%.0f\t%.0f\tVol(MB)\t%.2f\t%.2f\n", msg_cnt_sum/Pr/Pc, msg_cnt_max, msg_vol_sum/Pr/Pc*1e-6, msg_vol_max*1e-6); } }#endif if ( iinfo == n + 1 ) *info = 0; else *info = iinfo;#if ( PRNTlevel==3 ) MPI_Allreduce( &zero_msg, &iinfo, 1, mpi_int_t, MPI_SUM, grid->comm ); if ( !iam ) printf(".. # msg of zero size\t%d\n", iinfo); MPI_Allreduce( &total_msg, &iinfo, 1, mpi_int_t, MPI_SUM, grid->comm ); if ( !iam ) printf(".. # total msg\t%d\n", iinfo);#endif#if ( PRNTlevel==2 ) for (i = 0; i < Pr * Pc; ++i) { if ( iam == i ) { dPrintLblocks(iam, nsupers, grid, Glu_persist, Llu); dPrintUblocks(iam, nsupers, grid, Glu_persist, Llu); printf("(%d)\n", iam); PrintInt10("Recv", nsupers, Llu->ToRecv); } MPI_Barrier( grid->comm ); }#endif#if ( DEBUGlevel>=3 ) printf("(%d) num_copy=%d, num_update=%d\n", iam, num_copy, num_update);#endif#if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgstrf()");#endif} /* PDGSTRF *//************************************************************************/static void pdgstrf2/************************************************************************/( superlu_options_t *options, int_t k, double thresh, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat, int* info )/* * Purpose * ======= * Factor diagonal and subdiagonal blocks and test for exact singularity. * Only the process column that owns block column *k* participates * in the work. * * Arguments * ========= * * k (input) int (global) * The column number of the block column to be factorized. * * thresh (input) double (global) * The threshold value = s_eps * anorm. * * Glu_persist (input) Glu_persist_t* * Global data structures (xsup, supno) replicated on all processes. * * grid (input) gridinfo_t* * The 2D process mesh. * * Llu (input/output) LocalLU_t* * Local data structures to store distributed L and U matrices. * * stat (output) SuperLUStat_t* * Record the statistics about the factorization. * See SuperLUStat_t structure defined in util.h. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value * > 0: if info = i, U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly singular, * and division by zero will occur if it is used to solve a * system of equations. * */{ int c, iam, l, pkk; int incx = 1, incy = 1; int nsupr; /* number of rows in the block (LDA) */ int luptr; int_t i, krow, j, jfst, jlst; int_t nsupc; /* number of columns in the block */ int_t *xsup = Glu_persist->xsup; double *lusup, temp; double *ujrow; double alpha = -1; *info = 0; /* Quick return. */ /* Initialization. */ iam = grid->iam; krow = PROW( k, grid ); pkk = PNUM( PROW(k, grid), PCOL(k, grid), grid ); j = LBj( k, grid ); /* Local block number */ jfst = FstBlockC( k ); jlst = FstBlockC( k+1 ); lusup = Llu->Lnzval_bc_ptr[j]; nsupc = SuperSize( k ); if ( Llu->Lrowind_bc_ptr[j] ) nsupr = Llu->Lrowind_bc_ptr[j][1]; ujrow = Llu->ujrow; luptr = 0; /* Point to the diagonal entries. */ c = nsupc; for (j = 0; j < jlst - jfst; ++j) { /* Broadcast the j-th row (nsupc - j) elements to the process column. */ if ( iam == pkk ) { /* Diagonal process. */ i = luptr; if ( options->ReplaceTinyPivot == YES || lusup[i] == 0.0 ) { if ( fabs(lusup[i]) < thresh ) { /* Diagonal */#if ( PRNTlevel>=2 ) printf("(%d) .. col %d, tiny pivot %e ", iam, jfst+j, lusup[i]);#endif /* Keep the replaced diagonal with the same sign. */ if ( lusup[i] < 0 ) lusup[i] = -thresh; else lusup[i] = thresh;#if ( PRNTlevel>=2 ) printf("replaced by %e\n", lusup[i]);#endif ++(stat->TinyPivots); } } for (l = 0; l < c; ++l, i += nsupr) ujrow[l] = lusup[i]; }#if 0 dbcast_col(ujrow, c, pkk, UjROW, grid, &c);#else MPI_Bcast(ujrow, c, MPI_DOUBLE, krow, (grid->cscp).comm); /*bcast_tree(ujrow, c, MPI_DOUBLE, krow, (24*k+j)%NTAGS, grid, COMM_COLUMN, &c);*/#endif#if ( DEBUGlevel>=2 )if ( k == 3329 && j == 2 ) { if ( iam == pkk ) { printf("..(%d) k %d, j %d: Send ujrow[0] %e\n",iam,k,j,ujrow[0]); } else { printf("..(%d) k %d, j %d: Recv ujrow[0] %e\n",iam,k,j,ujrow[0]); }}#endif if ( !lusup ) { /* Empty block column. */ --c; if ( ujrow[0] == 0.0 ) *info = j+jfst+1; continue; } /* Test for singularity. */ if ( ujrow[0] == 0.0 ) { *info = j+jfst+1; } else { /* Scale the j-th column of the matrix. */ temp = 1.0 / ujrow[0]; if ( iam == pkk ) { for (i = luptr+1; i < luptr-j+nsupr; ++i) lusup[i] *= temp; stat->ops[FACT] += nsupr-j-1; } else { for (i = luptr; i < luptr+nsupr; ++i) lusup[i] *= temp; stat->ops[FACT] += nsupr; } } /* Rank-1 update of the trailing submatrix. */ if ( --c ) { if ( iam == pkk ) { l = nsupr - j - 1;#ifdef _CRAY SGER(&l, &c, &alpha, &lusup[luptr+1], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr+1], &nsupr);#else dger_(&l, &c, &alpha, &lusup[luptr+1], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr+1], &nsupr);#endif stat->ops[FACT] += 2 * l * c; } else {#ifdef _CRAY SGER(&nsupr, &c, &alpha, &lusup[luptr], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr], &nsupr);#else dger_(&nsupr, &c, &alpha, &lusup[luptr], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr], &nsupr);#endif stat->ops[FACT] += 2 * nsupr * c; } } /* Move to the next column. */ if ( iam == pkk ) luptr += nsupr + 1; else luptr += nsupr; } /* for j ... */} /* PDGSTRF2 *//************************************************************************/static void pdgstrs2/************************************************************************/#ifdef _CRAY( int_t m, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat, _fcd ftcs1, _fcd ftcs2, _fcd ftcs3 )#else( int_t m, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat )#endif/* * Purpose * ======= * Perform parallel triangular solves * U(k,:) := A(k,:) \ L(k,k). * Only the process column that owns block column *k* participates * in the work. * * Arguments * ========= * * m (input) int (global) * Number of rows in the matrix. * * k (input) int (global) * The row number of the block row to be factorized. * * Glu_persist (input) Glu_persist_t* * Global data structures (xsup, supno) replicated on all processes. * * grid (input) gridinfo_t* * The 2D process mesh. * * Llu (input/output) LocalLU_t* * Local data structures to store distributed L and U matrices. * * stat (output) SuperLUStat_t* * Record the statistics about the factorization; * See SuperLUStat_t structure defined in util.h. * */{ int iam, pkk; int incx = 1; int nsupr; /* number of rows in the block L(:,k) (LDA) */ int segsize; int_t nsupc; /* number of columns in the block */ int_t luptr, iukp, rukp; int_t b, gb, j, klst, knsupc, lk, nb; int_t *xsup = Glu_persist->xsup; int_t *usub; double *lusup, *uval; /* Quick return. */ lk = LBi( k, grid ); /* Local block number */ if ( !Llu->Unzval_br_ptr[lk] ) return; /* Initialization. */ iam = grid->iam; pkk = PNUM( PROW(k, grid), PCOL(k, grid), grid ); klst = FstBlockC( k+1 ); knsupc = SuperSize( k ); usub = Llu->Ufstnz_br_ptr[lk]; /* index[] of block row U(k,:) */ uval = Llu->Unzval_br_ptr[lk]; nb = usub[0]; iukp = BR_HEADER; rukp = 0; if ( iam == pkk ) { lk = LBj( k, grid ); nsupr = Llu->Lrowind_bc_ptr[lk][1]; /* LDA of lusup[] */ lusup = Llu->Lnzval_bc_ptr[lk]; } else { nsupr = Llu->Lsub_buf_2[k%2][1]; /* LDA of lusup[] */ lusup = Llu->Lval_buf_2[k%2]; } /* Loop through all the row blocks. */ for (b = 0; b < nb; ++b) { gb = usub[iukp]; nsupc = SuperSize( gb ); iukp += UB_DESCRIPTOR; /* Loop through all the segments in the block. */ for (j = 0; j < nsupc; ++j) { segsize = klst - usub[iukp++]; if ( segsize ) { /* Nonzero segment. */ luptr = (knsupc - segsize) * (nsupr + 1);#ifdef _CRAY STRSV(ftcs1, ftcs2, ftcs3, &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx);#else dtrsv_("L", "N", "U", &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx);#endif stat->ops[FACT] += segsize * (segsize + 1); rukp += segsize; } } } /* for b ... */} /* PDGSTRS2 */static intprobe_recv(int iam, int source, int tag, MPI_Datatype datatype, MPI_Comm comm, int buf_size){ MPI_Status status; int count; MPI_Probe( source, tag, comm, &status ); MPI_Get_count( &status, datatype, &count ); if ( count > buf_size ) { printf("(%d) Recv'ed count %d > buffer size $d\n", iam, count, buf_size); exit(-1); } return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -