📄 pzgstrs.c
字号:
/* * -- Distributed SuperLU routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley. * March 15, 2003 * */#include "superlu_zdefs.h"#define ISEND_IRECV/* * Function prototypes */#ifdef _CRAYfortran void CTRSM(_fcd, _fcd, _fcd, _fcd, int*, int*, doublecomplex*, doublecomplex*, int*, doublecomplex*, int*);_fcd ftcs1;_fcd ftcs2;_fcd ftcs3;#endifint_tpzReDistribute_B_to_X(doublecomplex *B, int_t m_loc, int nrhs, int_t ldb, int_t fst_row, int_t *ilsum, doublecomplex *x, ScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct){/* * Purpose * ======= * Re-distribute B on the diagonal processes of the 2D process mesh. * * Note * ==== * This routine can only be called after the routine pxgstrs_init(), * in which the structures of the send and receive buffers are set up. * * Arguments * ========= * * B (input) doublecomplex* * The distributed right-hand side matrix of the possibly * equilibrated system. * * m_loc (input) int (local) * The local row dimension of matrix B. * * nrhs (input) int (global) * Number of right-hand sides. * * ldb (input) int (local) * Leading dimension of matrix B. * * fst_row (input) int (global) * The row number of B's first row in the global matrix. * * ilsum (input) int* (global) * Starting position of each supernode in a full array. * * x (output) doublecomplex* * The solution vector. It is valid only on the diagonal processes. * * ScalePermstruct (input) ScalePermstruct_t* * The data structure to store the scaling and permutation vectors * describing the transformations performed to the original matrix A. * * grid (input) gridinfo_t* * The 2D process mesh. * * SOLVEstruct (input) SOLVEstruct_t* * Contains the information for the communication during the * solution phase. * * Return value * ============ * */ int *SendCnt, *SendCnt_nrhs, *RecvCnt, *RecvCnt_nrhs; int *sdispls, *sdispls_nrhs, *rdispls, *rdispls_nrhs; int *ptr_to_ibuf, *ptr_to_dbuf; int_t *perm_r, *perm_c; /* row and column permutation vectors */ int_t *send_ibuf, *recv_ibuf; doublecomplex *send_dbuf, *recv_dbuf; int_t *xsup, *supno; int_t i, ii, irow, gbi, j, jj, k, knsupc, l, lk; int p, procs; pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm;#if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pzReDistribute_B_to_X()");#endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ perm_r = ScalePermstruct->perm_r; perm_c = ScalePermstruct->perm_c; procs = grid->nprow * grid->npcol; xsup = Glu_persist->xsup; supno = Glu_persist->supno; SendCnt = gstrs_comm->B_to_X_SendCnt; SendCnt_nrhs = gstrs_comm->B_to_X_SendCnt + procs; RecvCnt = gstrs_comm->B_to_X_SendCnt + 2*procs; RecvCnt_nrhs = gstrs_comm->B_to_X_SendCnt + 3*procs; sdispls = gstrs_comm->B_to_X_SendCnt + 4*procs; sdispls_nrhs = gstrs_comm->B_to_X_SendCnt + 5*procs; rdispls = gstrs_comm->B_to_X_SendCnt + 6*procs; rdispls_nrhs = gstrs_comm->B_to_X_SendCnt + 7*procs; ptr_to_ibuf = gstrs_comm->ptr_to_ibuf; ptr_to_dbuf = gstrs_comm->ptr_to_dbuf; /* ------------------------------------------------------------ NOW COMMUNICATE THE ACTUAL DATA. ------------------------------------------------------------*/ k = sdispls[procs-1] + SendCnt[procs-1]; /* Total number of sends */ l = rdispls[procs-1] + RecvCnt[procs-1]; /* Total number of receives */ if ( !(send_ibuf = intMalloc_dist(k + l)) ) ABORT("Malloc fails for send_ibuf[]."); recv_ibuf = send_ibuf + k; if ( !(send_dbuf = doublecomplexMalloc_dist((k + l)* (size_t)nrhs)) ) ABORT("Malloc fails for send_dbuf[]."); recv_dbuf = send_dbuf + k * nrhs; for (p = 0; p < procs; ++p) { ptr_to_ibuf[p] = sdispls[p]; ptr_to_dbuf[p] = sdispls[p] * nrhs; } /* Copy the row indices and values to the send buffer. */ for (i = 0, l = fst_row; i < m_loc; ++i, ++l) { irow = perm_c[perm_r[l]]; /* Row number in Pc*Pr*B */ gbi = BlockNum( irow ); p = PNUM( PROW(gbi,grid), PCOL(gbi,grid), grid ); /* Diagonal process */ k = ptr_to_ibuf[p]; send_ibuf[k] = irow; k = ptr_to_dbuf[p]; RHS_ITERATE(j) { /* RHS is stored in row major in the buffer. */ send_dbuf[k++] = B[i + j*ldb]; } ++ptr_to_ibuf[p]; ptr_to_dbuf[p] += nrhs; } /* Communicate the (permuted) row indices. */ MPI_Alltoallv(send_ibuf, SendCnt, sdispls, mpi_int_t, recv_ibuf, RecvCnt, rdispls, mpi_int_t, grid->comm); /* Communicate the numerical values. */ MPI_Alltoallv(send_dbuf, SendCnt_nrhs, sdispls_nrhs, SuperLU_MPI_DOUBLE_COMPLEX, recv_dbuf, RecvCnt_nrhs, rdispls_nrhs, SuperLU_MPI_DOUBLE_COMPLEX, grid->comm); /* ------------------------------------------------------------ Copy buffer into X on the diagonal processes. ------------------------------------------------------------*/ ii = 0; for (p = 0; p < procs; ++p) { jj = rdispls_nrhs[p]; for (i = 0; i < RecvCnt[p]; ++i) { /* Only the diagonal processes do this; the off-diagonal processes have 0 RecvCnt. */ irow = recv_ibuf[ii]; /* The permuted row index. */ k = BlockNum( irow ); knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number. */ l = X_BLK( lk ); x[l - XK_H].r = k; /* Block number prepended in the header. */ x[l - XK_H].i = 0; irow = irow - FstBlockC(k); /* Relative row number in X-block */ RHS_ITERATE(j) { x[l + irow + j*knsupc] = recv_dbuf[jj++]; } ++ii; } } SUPERLU_FREE(send_ibuf); SUPERLU_FREE(send_dbuf); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Exit pzReDistribute_B_to_X()");#endif return 0;} /* pzReDistribute_B_to_X */int_tpzReDistribute_X_to_B(int_t n, doublecomplex *B, int_t m_loc, int_t ldb, int_t fst_row, int_t nrhs, doublecomplex *x, int_t *ilsum, ScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct){/* * Purpose * ======= * Re-distribute X on the diagonal processes to B distributed on all * the processes. * * Note * ==== * This routine can only be called after the routine pxgstrs_init(), * in which the structures of the send and receive buffers are set up. * */ int_t i, ii, irow, j, jj, k, knsupc, nsupers, l, lk; int_t *xsup, *supno; int *SendCnt, *SendCnt_nrhs, *RecvCnt, *RecvCnt_nrhs; int *sdispls, *rdispls, *sdispls_nrhs, *rdispls_nrhs; int *ptr_to_ibuf, *ptr_to_dbuf; int_t *send_ibuf, *recv_ibuf; doublecomplex *send_dbuf, *recv_dbuf; int_t *row_to_proc = SOLVEstruct->row_to_proc; /* row-process mapping */ pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm; int iam, p, q, pkk, procs; int_t num_diag_procs, *diag_procs;#if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pzReDistribute_X_to_B()");#endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ xsup = Glu_persist->xsup; supno = Glu_persist->supno; nsupers = Glu_persist->supno[n-1] + 1; iam = grid->iam; procs = grid->nprow * grid->npcol; SendCnt = gstrs_comm->X_to_B_SendCnt; SendCnt_nrhs = gstrs_comm->X_to_B_SendCnt + procs; RecvCnt = gstrs_comm->X_to_B_SendCnt + 2*procs; RecvCnt_nrhs = gstrs_comm->X_to_B_SendCnt + 3*procs; sdispls = gstrs_comm->X_to_B_SendCnt + 4*procs; sdispls_nrhs = gstrs_comm->X_to_B_SendCnt + 5*procs; rdispls = gstrs_comm->X_to_B_SendCnt + 6*procs; rdispls_nrhs = gstrs_comm->X_to_B_SendCnt + 7*procs; ptr_to_ibuf = gstrs_comm->ptr_to_ibuf; ptr_to_dbuf = gstrs_comm->ptr_to_dbuf; k = sdispls[procs-1] + SendCnt[procs-1]; /* Total number of sends */ l = rdispls[procs-1] + RecvCnt[procs-1]; /* Total number of receives */ if ( !(send_ibuf = intMalloc_dist(k + l)) ) ABORT("Malloc fails for send_ibuf[]."); recv_ibuf = send_ibuf + k; if ( !(send_dbuf = doublecomplexMalloc_dist((k + l)*nrhs)) ) ABORT("Malloc fails for send_dbuf[]."); recv_dbuf = send_dbuf + k * nrhs; for (p = 0; p < procs; ++p) { ptr_to_ibuf[p] = sdispls[p]; ptr_to_dbuf[p] = sdispls_nrhs[p]; } num_diag_procs = SOLVEstruct->num_diag_procs; diag_procs = SOLVEstruct->diag_procs; for (p = 0; p < num_diag_procs; ++p) { /* For all diagonal processes. */ pkk = diag_procs[p]; if ( iam == pkk ) { for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number */ irow = FstBlockC( k ); l = X_BLK( lk ); for (i = 0; i < knsupc; ++i) {#if 0 ii = inv_perm_c[irow]; /* Apply X <== Pc'*Y */#else ii = irow;#endif q = row_to_proc[ii]; jj = ptr_to_ibuf[q]; send_ibuf[jj] = ii; jj = ptr_to_dbuf[q]; RHS_ITERATE(j) { /* RHS stored in row major in buffer. */ send_dbuf[jj++] = x[l + i + j*knsupc]; } ++ptr_to_ibuf[q]; ptr_to_dbuf[q] += nrhs; ++irow; } } } } /* ------------------------------------------------------------ COMMUNICATE THE (PERMUTED) ROW INDICES AND NUMERICAL VALUES. ------------------------------------------------------------*/ MPI_Alltoallv(send_ibuf, SendCnt, sdispls, mpi_int_t, recv_ibuf, RecvCnt, rdispls, mpi_int_t, grid->comm); MPI_Alltoallv(send_dbuf, SendCnt_nrhs, sdispls_nrhs, SuperLU_MPI_DOUBLE_COMPLEX, recv_dbuf, RecvCnt_nrhs, rdispls_nrhs, SuperLU_MPI_DOUBLE_COMPLEX, grid->comm); /* ------------------------------------------------------------ COPY THE BUFFER INTO B. ------------------------------------------------------------*/ for (i = 0, k = 0; i < m_loc; ++i) { irow = recv_ibuf[i]; irow -= fst_row; /* Relative row number */ RHS_ITERATE(j) { /* RHS is stored in row major in the buffer. */ B[irow + j*ldb] = recv_dbuf[k++]; } } SUPERLU_FREE(send_ibuf); SUPERLU_FREE(send_dbuf);#if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Exit pzReDistribute_X_to_B()");#endif return 0;} /* pzReDistribute_X_to_B */voidpzgstrs(int_t n, LUstruct_t *LUstruct, ScalePermstruct_t *ScalePermstruct, gridinfo_t *grid, doublecomplex *B, int_t m_loc, int_t fst_row, int_t ldb, int nrhs, SOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, int *info){/* * Purpose * ======= * * PZGSTRS solves a system of distributed linear equations * A*X = B with a general N-by-N matrix A using the LU factorization * computed by PZGSTRF. * If the equilibration, and row and column permutations were performed, * the LU factorization was performed for A1 where * A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U * and the linear system solved is * A1 * Y = Pc*Pr*B1, where B was overwritten by B1 = diag(R)*B, and * the permutation to B1 by Pc*Pr is applied internally in this routine. * * Arguments * ========= * * n (input) int (global) * The order of the system of linear equations. * * LUstruct (input) LUstruct_t* * The distributed data structures storing L and U factors. * The L and U factors are obtained from PZGSTRF for * the possibly scaled and permuted matrix A. * See superlu_zdefs.h for the definition of 'LUstruct_t'. * A may be scaled and permuted into A1, so that * A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U * * grid (input) gridinfo_t* * The 2D process mesh. It contains the MPI communicator, the number * of process rows (NPROW), the number of process columns (NPCOL), * and my process rank. It is an input argument to all the * parallel routines. * Grid can be initialized by subroutine SUPERLU_GRIDINIT. * See superlu_defs.h for the definition of 'gridinfo_t'. * * B (input/output) doublecomplex* * On entry, the distributed right-hand side matrix of the possibly * equilibrated system. That is, B may be overwritten by diag(R)*B. * On exit, the distributed solution matrix Y of the possibly * equilibrated system if info = 0, where Y = Pc*diag(C)^(-1)*X, * and X is the solution of the original system. * * m_loc (input) int (local) * The local row dimension of matrix B. * * fst_row (input) int (global) * The row number of B's first row in the global matrix. * * ldb (input) int (local) * The leading dimension of matrix B. * * nrhs (input) int (global) * Number of right-hand sides. * * SOLVEstruct (output) SOLVEstruct_t* (global) * Contains the information for the communication during the * solution phase. * * stat (output) SuperLUStat_t* * Record the statistics about the triangular solves. * See util.h for the definition of 'SuperLUStat_t'. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value * */ Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; doublecomplex alpha = {1.0, 0.0}; doublecomplex zero = {0.0, 0.0}; doublecomplex *lsum; /* Local running sum of the updates to B-components */ doublecomplex *x; /* X component at step k. */ /* NOTE: x and lsum are of same size. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -