📄 pdgstrs.c.bak
字号:
/* * -- Distributed SuperLU routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley. * March 15, 2003 * */#include "superlu_ddefs.h"#define ISEND_IRECV/* * Function prototypes */#ifdef _CRAYfortran void STRSM(_fcd, _fcd, _fcd, _fcd, int*, int*, double*, double*, int*, double*, int*);fortran void SGEMM(_fcd, _fcd, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*);_fcd ftcs1;_fcd ftcs2;_fcd ftcs3;#endifint_tpdReDistribute_B_to_X(double *B, int_t m_loc, int nrhs, int_t ldb, int_t fst_row, int_t *ilsum, double *x, ScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct){/* * Purpose * ======= * Re-distribute B on the diagonal processes of the 2D process mesh. * * Note * ==== * This routine can only be called after the routine pxgstrs_init(), * in which the structures of the send and receive buffers are set up. * * Arguments * ========= * * B (input) double* * The distributed right-hand side matrix of the possibly * equilibrated system. * * m_loc (input) int (local) * The local row dimension of matrix B. * * nrhs (input) int (global) * Number of right-hand sides. * * ldb (input) int (local) * Leading dimension of matrix B. * * fst_row (input) int (global) * The row number of B's first row in the global matrix. * * ilsum (input) int* (global) * Starting position of each supernode in a full array. * * x (output) double* * The solution vector. It is valid only on the diagonal processes. * * ScalePermstruct (input) ScalePermstruct_t* * The data structure to store the scaling and permutation vectors * describing the transformations performed to the original matrix A. * * grid (input) gridinfo_t* * The 2D process mesh. * * SOLVEstruct (input) SOLVEstruct_t* * Contains the information for the communication during the * solution phase. * * Return value * ============ * */ int *SendCnt, *SendCnt_nrhs, *RecvCnt, *RecvCnt_nrhs; int *sdispls, *sdispls_nrhs, *rdispls, *rdispls_nrhs; int *ptr_to_ibuf, *ptr_to_dbuf; int_t *perm_r, *perm_c; /* row and column permutation vectors */ int_t *send_ibuf, *recv_ibuf; double *send_dbuf, *recv_dbuf; int_t *xsup, *supno; int_t i, ii, irow, gbi, j, jj, k, knsupc, l, lk; int p, procs; pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm;#if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pdReDistribute_B_to_X()");#endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ perm_r = ScalePermstruct->perm_r; perm_c = ScalePermstruct->perm_c; procs = grid->nprow * grid->npcol; xsup = Glu_persist->xsup; supno = Glu_persist->supno; SendCnt = gstrs_comm->B_to_X_SendCnt; SendCnt_nrhs = gstrs_comm->B_to_X_SendCnt + procs; RecvCnt = gstrs_comm->B_to_X_SendCnt + 2*procs; RecvCnt_nrhs = gstrs_comm->B_to_X_SendCnt + 3*procs; sdispls = gstrs_comm->B_to_X_SendCnt + 4*procs; sdispls_nrhs = gstrs_comm->B_to_X_SendCnt + 5*procs; rdispls = gstrs_comm->B_to_X_SendCnt + 6*procs; rdispls_nrhs = gstrs_comm->B_to_X_SendCnt + 7*procs; ptr_to_ibuf = gstrs_comm->ptr_to_ibuf; ptr_to_dbuf = gstrs_comm->ptr_to_dbuf; /* ------------------------------------------------------------ NOW COMMUNICATE THE ACTUAL DATA. ------------------------------------------------------------*/ k = sdispls[procs-1] + SendCnt[procs-1]; /* Total number of sends */ l = rdispls[procs-1] + RecvCnt[procs-1]; /* Total number of receives */ if ( !(send_ibuf = intMalloc_dist(k + l)) ) ABORT("Malloc fails for send_ibuf[]."); recv_ibuf = send_ibuf + k; if ( !(send_dbuf = doubleMalloc_dist((k + l)* (size_t)nrhs)) ) ABORT("Malloc fails for send_dbuf[]."); recv_dbuf = send_dbuf + k * nrhs; for (p = 0; p < procs; ++p) { ptr_to_ibuf[p] = sdispls[p]; ptr_to_dbuf[p] = sdispls[p] * nrhs; } /* Copy the row indices and values to the send buffer. */ for (i = 0, l = fst_row; i < m_loc; ++i, ++l) { irow = perm_c[perm_r[l]]; /* Row number in Pc*Pr*B */ gbi = BlockNum( irow ); p = PNUM( PROW(gbi,grid), PCOL(gbi,grid), grid ); /* Diagonal process */ k = ptr_to_ibuf[p]; send_ibuf[k] = irow; k = ptr_to_dbuf[p]; RHS_ITERATE(j) { /* RHS is stored in row major in the buffer. */ send_dbuf[k++] = B[i + j*ldb]; } ++ptr_to_ibuf[p]; ptr_to_dbuf[p] += nrhs; } /* Communicate the (permuted) row indices. */ MPI_Alltoallv(send_ibuf, SendCnt, sdispls, mpi_int_t, recv_ibuf, RecvCnt, rdispls, mpi_int_t, grid->comm); /* Communicate the numerical values. */ MPI_Alltoallv(send_dbuf, SendCnt_nrhs, sdispls_nrhs, MPI_DOUBLE, recv_dbuf, RecvCnt_nrhs, rdispls_nrhs, MPI_DOUBLE, grid->comm); /* ------------------------------------------------------------ Copy buffer into X on the diagonal processes. ------------------------------------------------------------*/ ii = 0; for (p = 0; p < procs; ++p) { jj = rdispls_nrhs[p]; for (i = 0; i < RecvCnt[p]; ++i) { /* Only the diagonal processes do this; the off-diagonal processes have 0 RecvCnt. */ irow = recv_ibuf[ii]; /* The permuted row index. */ k = BlockNum( irow ); knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number. */ l = X_BLK( lk ); x[l - XK_H] = k; /* Block number prepended in the header. */ irow = irow - FstBlockC(k); /* Relative row number in X-block */ RHS_ITERATE(j) { x[l + irow + j*knsupc] = recv_dbuf[jj++]; } ++ii; } } SUPERLU_FREE(send_ibuf); SUPERLU_FREE(send_dbuf); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Exit pdReDistribute_B_to_X()");#endif return 0;} /* pdReDistribute_B_to_X */int_tpdReDistribute_X_to_B(int_t n, double *B, int_t m_loc, int_t ldb, int_t fst_row, int_t nrhs, double *x, int_t *ilsum, ScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct){/* * Purpose * ======= * Re-distribute X on the diagonal processes to B distributed on all * the processes. * * Note * ==== * This routine can only be called after the routine pxgstrs_init(), * in which the structures of the send and receive buffers are set up. * */ int_t i, ii, irow, j, jj, k, knsupc, nsupers, l, lk; int_t *xsup, *supno; int *SendCnt, *SendCnt_nrhs, *RecvCnt, *RecvCnt_nrhs; int *sdispls, *rdispls, *sdispls_nrhs, *rdispls_nrhs; int *ptr_to_ibuf, *ptr_to_dbuf; int_t *send_ibuf, *recv_ibuf; double *send_dbuf, *recv_dbuf; int_t *row_to_proc = SOLVEstruct->row_to_proc; /* row-process mapping */ pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm; int iam, p, q, pkk, procs; int_t num_diag_procs, *diag_procs;#if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pdReDistribute_X_to_B()");#endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ xsup = Glu_persist->xsup; supno = Glu_persist->supno; nsupers = Glu_persist->supno[n-1] + 1; iam = grid->iam; procs = grid->nprow * grid->npcol; SendCnt = gstrs_comm->X_to_B_SendCnt; SendCnt_nrhs = gstrs_comm->X_to_B_SendCnt + procs; RecvCnt = gstrs_comm->X_to_B_SendCnt + 2*procs; RecvCnt_nrhs = gstrs_comm->X_to_B_SendCnt + 3*procs; sdispls = gstrs_comm->X_to_B_SendCnt + 4*procs; sdispls_nrhs = gstrs_comm->X_to_B_SendCnt + 5*procs; rdispls = gstrs_comm->X_to_B_SendCnt + 6*procs; rdispls_nrhs = gstrs_comm->X_to_B_SendCnt + 7*procs; ptr_to_ibuf = gstrs_comm->ptr_to_ibuf; ptr_to_dbuf = gstrs_comm->ptr_to_dbuf; k = sdispls[procs-1] + SendCnt[procs-1]; /* Total number of sends */ l = rdispls[procs-1] + RecvCnt[procs-1]; /* Total number of receives */ if ( !(send_ibuf = intMalloc_dist(k + l)) ) ABORT("Malloc fails for send_ibuf[]."); recv_ibuf = send_ibuf + k; if ( !(send_dbuf = doubleMalloc_dist((k + l)*nrhs)) ) ABORT("Malloc fails for send_dbuf[]."); recv_dbuf = send_dbuf + k * nrhs; for (p = 0; p < procs; ++p) { ptr_to_ibuf[p] = sdispls[p]; ptr_to_dbuf[p] = sdispls_nrhs[p]; } num_diag_procs = SOLVEstruct->num_diag_procs; diag_procs = SOLVEstruct->diag_procs; for (p = 0; p < num_diag_procs; ++p) { /* For all diagonal processes. */ pkk = diag_procs[p]; if ( iam == pkk ) { for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number */ irow = FstBlockC( k ); l = X_BLK( lk ); for (i = 0; i < knsupc; ++i) {#if 0 ii = inv_perm_c[irow]; /* Apply X <== Pc'*Y */#else ii = irow;#endif q = row_to_proc[ii]; jj = ptr_to_ibuf[q]; send_ibuf[jj] = ii; jj = ptr_to_dbuf[q]; RHS_ITERATE(j) { /* RHS stored in row major in buffer. */ send_dbuf[jj++] = x[l + i + j*knsupc]; } ++ptr_to_ibuf[q]; ptr_to_dbuf[q] += nrhs; ++irow; } } } } /* ------------------------------------------------------------ COMMUNICATE THE (PERMUTED) ROW INDICES AND NUMERICAL VALUES. ------------------------------------------------------------*/ MPI_Alltoallv(send_ibuf, SendCnt, sdispls, mpi_int_t, recv_ibuf, RecvCnt, rdispls, mpi_int_t, grid->comm); MPI_Alltoallv(send_dbuf, SendCnt_nrhs, sdispls_nrhs, MPI_DOUBLE, recv_dbuf, RecvCnt_nrhs, rdispls_nrhs, MPI_DOUBLE, grid->comm); /* ------------------------------------------------------------ COPY THE BUFFER INTO B. ------------------------------------------------------------*/ for (i = 0, k = 0; i < m_loc; ++i) { irow = recv_ibuf[i]; irow -= fst_row; /* Relative row number */ RHS_ITERATE(j) { /* RHS is stored in row major in the buffer. */ B[irow + j*ldb] = recv_dbuf[k++]; } } SUPERLU_FREE(send_ibuf); SUPERLU_FREE(send_dbuf);#if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Exit pdReDistribute_X_to_B()");#endif return 0;} /* pdReDistribute_X_to_B */voidpdgstrs(int_t n, LUstruct_t *LUstruct, ScalePermstruct_t *ScalePermstruct, gridinfo_t *grid, double *B, int_t m_loc, int_t fst_row, int_t ldb, int nrhs, SOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, int *info){/* * Purpose * ======= * * PDGSTRS solves a system of distributed linear equations * A*X = B with a general N-by-N matrix A using the LU factorization * computed by PDGSTRF. * If the equilibration, and row and column permutations were performed, * the LU factorization was performed for A1 where * A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U * and the linear system solved is * A1 * Y = Pc*Pr*B1, where B was overwritten by B1 = diag(R)*B, and * the permutation to B1 by Pc*Pr is applied internally in this routine. * * Arguments * ========= * * n (input) int (global) * The order of the system of linear equations. * * LUstruct (input) LUstruct_t* * The distributed data structures storing L and U factors. * The L and U factors are obtained from PDGSTRF for * the possibly scaled and permuted matrix A. * See superlu_ddefs.h for the definition of 'LUstruct_t'. * A may be scaled and permuted into A1, so that * A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U * * grid (input) gridinfo_t* * The 2D process mesh. It contains the MPI communicator, the number * of process rows (NPROW), the number of process columns (NPCOL), * and my process rank. It is an input argument to all the * parallel routines. * Grid can be initialized by subroutine SUPERLU_GRIDINIT. * See superlu_defs.h for the definition of 'gridinfo_t'. * * B (input/output) double* * On entry, the distributed right-hand side matrix of the possibly * equilibrated system. That is, B may be overwritten by diag(R)*B. * On exit, the distributed solution matrix Y of the possibly * equilibrated system if info = 0, where Y = Pc*diag(C)^(-1)*X, * and X is the solution of the original system. * * m_loc (input) int (local) * The local row dimension of matrix B. * * fst_row (input) int (global) * The row number of B's first row in the global matrix. * * ldb (input) int (local) * The leading dimension of matrix B. * * nrhs (input) int (global) * Number of right-hand sides. * * SOLVEstruct (output) SOLVEstruct_t* (global) * Contains the information for the communication during the * solution phase. * * stat (output) SuperLUStat_t* * Record the statistics about the triangular solves. * See util.h for the definition of 'SuperLUStat_t'. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -