📄 pdgstrs.c.bak

📁 LU分解求解矩阵方程组的解
💻 BAK
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* * -- Distributed SuperLU routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley. * March 15, 2003 * */#include "superlu_ddefs.h"#define ISEND_IRECV/* * Function prototypes */#ifdef _CRAYfortran void STRSM(_fcd, _fcd, _fcd, _fcd, int*, int*, double*,		   double*, int*, double*, int*);fortran void SGEMM(_fcd, _fcd, int*, int*, int*, double*, double*, 		   int*, double*, int*, double*, double*, int*);_fcd ftcs1;_fcd ftcs2;_fcd ftcs3;#endifint_tpdReDistribute_B_to_X(double *B, int_t m_loc, int nrhs, int_t ldb,                      int_t fst_row, int_t *ilsum, double *x,		      ScalePermstruct_t *ScalePermstruct,		      Glu_persist_t *Glu_persist,		      gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct){/* * Purpose * ======= *   Re-distribute B on the diagonal processes of the 2D process mesh. *  * Note * ==== *   This routine can only be called after the routine pxgstrs_init(), *   in which the structures of the send and receive buffers are set up. * * Arguments * ========= *  * B      (input) double* *        The distributed right-hand side matrix of the possibly *        equilibrated system. * * m_loc  (input) int (local) *        The local row dimension of matrix B. * * nrhs   (input) int (global) *        Number of right-hand sides. * * ldb    (input) int (local) *        Leading dimension of matrix B. * * fst_row (input) int (global) *        The row number of B's first row in the global matrix. * * ilsum  (input) int* (global) *        Starting position of each supernode in a full array. * * x      (output) double* *        The solution vector. It is valid only on the diagonal processes. * * ScalePermstruct (input) ScalePermstruct_t* *        The data structure to store the scaling and permutation vectors *        describing the transformations performed to the original matrix A. * * grid   (input) gridinfo_t* *        The 2D process mesh. * * SOLVEstruct (input) SOLVEstruct_t* *        Contains the information for the communication during the *        solution phase. * * Return value * ============ * */    int  *SendCnt, *SendCnt_nrhs, *RecvCnt, *RecvCnt_nrhs;    int  *sdispls, *sdispls_nrhs, *rdispls, *rdispls_nrhs;    int  *ptr_to_ibuf, *ptr_to_dbuf;    int_t  *perm_r, *perm_c; /* row and column permutation vectors */    int_t  *send_ibuf, *recv_ibuf;    double *send_dbuf, *recv_dbuf;    int_t  *xsup, *supno;    int_t  i, ii, irow, gbi, j, jj, k, knsupc, l, lk;    int    p, procs;    pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm;#if ( DEBUGlevel>=1 )    CHECK_MALLOC(grid->iam, "Enter pdReDistribute_B_to_X()");#endif    /* ------------------------------------------------------------       INITIALIZATION.       ------------------------------------------------------------*/    perm_r = ScalePermstruct->perm_r;    perm_c = ScalePermstruct->perm_c;    procs = grid->nprow * grid->npcol;    xsup = Glu_persist->xsup;    supno = Glu_persist->supno;    SendCnt      = gstrs_comm->B_to_X_SendCnt;    SendCnt_nrhs = gstrs_comm->B_to_X_SendCnt +   procs;    RecvCnt      = gstrs_comm->B_to_X_SendCnt + 2*procs;    RecvCnt_nrhs = gstrs_comm->B_to_X_SendCnt + 3*procs;    sdispls      = gstrs_comm->B_to_X_SendCnt + 4*procs;    sdispls_nrhs = gstrs_comm->B_to_X_SendCnt + 5*procs;    rdispls      = gstrs_comm->B_to_X_SendCnt + 6*procs;    rdispls_nrhs = gstrs_comm->B_to_X_SendCnt + 7*procs;    ptr_to_ibuf  = gstrs_comm->ptr_to_ibuf;    ptr_to_dbuf  = gstrs_comm->ptr_to_dbuf;    /* ------------------------------------------------------------       NOW COMMUNICATE THE ACTUAL DATA.       ------------------------------------------------------------*/    k = sdispls[procs-1] + SendCnt[procs-1]; /* Total number of sends */    l = rdispls[procs-1] + RecvCnt[procs-1]; /* Total number of receives */    if ( !(send_ibuf = intMalloc_dist(k + l)) )        ABORT("Malloc fails for send_ibuf[].");    recv_ibuf = send_ibuf + k;    if ( !(send_dbuf = doubleMalloc_dist((k + l)* (size_t)nrhs)) )        ABORT("Malloc fails for send_dbuf[].");    recv_dbuf = send_dbuf + k * nrhs;        for (p = 0; p < procs; ++p) {        ptr_to_ibuf[p] = sdispls[p];        ptr_to_dbuf[p] = sdispls[p] * nrhs;    }    /* Copy the row indices and values to the send buffer. */    for (i = 0, l = fst_row; i < m_loc; ++i, ++l) {        irow = perm_c[perm_r[l]]; /* Row number in Pc*Pr*B */	gbi = BlockNum( irow );	p = PNUM( PROW(gbi,grid), PCOL(gbi,grid), grid ); /* Diagonal process */	k = ptr_to_ibuf[p];	send_ibuf[k] = irow;	k = ptr_to_dbuf[p];	RHS_ITERATE(j) { /* RHS is stored in row major in the buffer. */	    send_dbuf[k++] = B[i + j*ldb];	}	++ptr_to_ibuf[p];	ptr_to_dbuf[p] += nrhs;    }    /* Communicate the (permuted) row indices. */    MPI_Alltoallv(send_ibuf, SendCnt, sdispls, mpi_int_t,		  recv_ibuf, RecvCnt, rdispls, mpi_int_t, grid->comm);    /* Communicate the numerical values. */    MPI_Alltoallv(send_dbuf, SendCnt_nrhs, sdispls_nrhs, MPI_DOUBLE,		  recv_dbuf, RecvCnt_nrhs, rdispls_nrhs, MPI_DOUBLE,		  grid->comm);        /* ------------------------------------------------------------       Copy buffer into X on the diagonal processes.       ------------------------------------------------------------*/    ii = 0;    for (p = 0; p < procs; ++p) {        jj = rdispls_nrhs[p];        for (i = 0; i < RecvCnt[p]; ++i) {	    /* Only the diagonal processes do this; the off-diagonal processes	       have 0 RecvCnt. */	    irow = recv_ibuf[ii]; /* The permuted row index. */	    k = BlockNum( irow );	    knsupc = SuperSize( k );	    lk = LBi( k, grid );  /* Local block number. */	    l = X_BLK( lk );	    x[l - XK_H] = k;      /* Block number prepended in the header. */	    irow = irow - FstBlockC(k); /* Relative row number in X-block */	    RHS_ITERATE(j) {	        x[l + irow + j*knsupc] = recv_dbuf[jj++];	    }	    ++ii;	}    }    SUPERLU_FREE(send_ibuf);    SUPERLU_FREE(send_dbuf);    #if ( DEBUGlevel>=1 )    CHECK_MALLOC(grid->iam, "Exit pdReDistribute_B_to_X()");#endif    return 0;} /* pdReDistribute_B_to_X */int_tpdReDistribute_X_to_B(int_t n, double *B, int_t m_loc, int_t ldb, int_t fst_row,		      int_t nrhs, double *x, int_t *ilsum,		      ScalePermstruct_t *ScalePermstruct,		      Glu_persist_t *Glu_persist, gridinfo_t *grid,		      SOLVEstruct_t *SOLVEstruct){/* * Purpose * ======= *   Re-distribute X on the diagonal processes to B distributed on all *   the processes. * * Note * ==== *   This routine can only be called after the routine pxgstrs_init(), *   in which the structures of the send and receive buffers are set up. * */    int_t  i, ii, irow, j, jj, k, knsupc, nsupers, l, lk;    int_t  *xsup, *supno;    int  *SendCnt, *SendCnt_nrhs, *RecvCnt, *RecvCnt_nrhs;    int  *sdispls, *rdispls, *sdispls_nrhs, *rdispls_nrhs;    int  *ptr_to_ibuf, *ptr_to_dbuf;    int_t  *send_ibuf, *recv_ibuf;    double *send_dbuf, *recv_dbuf;    int_t  *row_to_proc = SOLVEstruct->row_to_proc; /* row-process mapping */    pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm;    int  iam, p, q, pkk, procs;    int_t  num_diag_procs, *diag_procs;#if ( DEBUGlevel>=1 )    CHECK_MALLOC(grid->iam, "Enter pdReDistribute_X_to_B()");#endif    /* ------------------------------------------------------------       INITIALIZATION.       ------------------------------------------------------------*/    xsup = Glu_persist->xsup;    supno = Glu_persist->supno;    nsupers = Glu_persist->supno[n-1] + 1;    iam = grid->iam;    procs = grid->nprow * grid->npcol;     SendCnt      = gstrs_comm->X_to_B_SendCnt;    SendCnt_nrhs = gstrs_comm->X_to_B_SendCnt +   procs;    RecvCnt      = gstrs_comm->X_to_B_SendCnt + 2*procs;    RecvCnt_nrhs = gstrs_comm->X_to_B_SendCnt + 3*procs;    sdispls      = gstrs_comm->X_to_B_SendCnt + 4*procs;    sdispls_nrhs = gstrs_comm->X_to_B_SendCnt + 5*procs;    rdispls      = gstrs_comm->X_to_B_SendCnt + 6*procs;    rdispls_nrhs = gstrs_comm->X_to_B_SendCnt + 7*procs;    ptr_to_ibuf  = gstrs_comm->ptr_to_ibuf;    ptr_to_dbuf  = gstrs_comm->ptr_to_dbuf;    k = sdispls[procs-1] + SendCnt[procs-1]; /* Total number of sends */    l = rdispls[procs-1] + RecvCnt[procs-1]; /* Total number of receives */    if ( !(send_ibuf = intMalloc_dist(k + l)) )        ABORT("Malloc fails for send_ibuf[].");    recv_ibuf = send_ibuf + k;    if ( !(send_dbuf = doubleMalloc_dist((k + l)*nrhs)) )        ABORT("Malloc fails for send_dbuf[].");    recv_dbuf = send_dbuf + k * nrhs;    for (p = 0; p < procs; ++p) {        ptr_to_ibuf[p] = sdispls[p];        ptr_to_dbuf[p] = sdispls_nrhs[p];    }    num_diag_procs = SOLVEstruct->num_diag_procs;    diag_procs = SOLVEstruct->diag_procs;    for (p = 0; p < num_diag_procs; ++p) {  /* For all diagonal processes. */	pkk = diag_procs[p];	if ( iam == pkk ) {	    for (k = p; k < nsupers; k += num_diag_procs) {		knsupc = SuperSize( k );		lk = LBi( k, grid ); /* Local block number */		irow = FstBlockC( k );		l = X_BLK( lk );		for (i = 0; i < knsupc; ++i) {#if 0		    ii = inv_perm_c[irow]; /* Apply X <== Pc'*Y */#else		    ii = irow;#endif		    q = row_to_proc[ii];		    jj = ptr_to_ibuf[q];		    send_ibuf[jj] = ii;		    jj = ptr_to_dbuf[q];		    RHS_ITERATE(j) { /* RHS stored in row major in buffer. */		        send_dbuf[jj++] = x[l + i + j*knsupc];		    }		    ++ptr_to_ibuf[q];		    ptr_to_dbuf[q] += nrhs;		    ++irow;		}	    }	}    }        /* ------------------------------------------------------------        COMMUNICATE THE (PERMUTED) ROW INDICES AND NUMERICAL VALUES.       ------------------------------------------------------------*/    MPI_Alltoallv(send_ibuf, SendCnt, sdispls, mpi_int_t,		  recv_ibuf, RecvCnt, rdispls, mpi_int_t, grid->comm);    MPI_Alltoallv(send_dbuf, SendCnt_nrhs, sdispls_nrhs, MPI_DOUBLE, 		  recv_dbuf, RecvCnt_nrhs, rdispls_nrhs, MPI_DOUBLE,		  grid->comm);    /* ------------------------------------------------------------       COPY THE BUFFER INTO B.       ------------------------------------------------------------*/    for (i = 0, k = 0; i < m_loc; ++i) {	irow = recv_ibuf[i];	irow -= fst_row; /* Relative row number */	RHS_ITERATE(j) { /* RHS is stored in row major in the buffer. */	    B[irow + j*ldb] = recv_dbuf[k++];	}    }    SUPERLU_FREE(send_ibuf);    SUPERLU_FREE(send_dbuf);#if ( DEBUGlevel>=1 )    CHECK_MALLOC(grid->iam, "Exit pdReDistribute_X_to_B()");#endif    return 0;} /* pdReDistribute_X_to_B */voidpdgstrs(int_t n, LUstruct_t *LUstruct, 	ScalePermstruct_t *ScalePermstruct,	gridinfo_t *grid, double *B,	int_t m_loc, int_t fst_row, int_t ldb, int nrhs,	SOLVEstruct_t *SOLVEstruct,	SuperLUStat_t *stat, int *info){/* * Purpose * ======= * * PDGSTRS solves a system of distributed linear equations * A*X = B with a general N-by-N matrix A using the LU factorization * computed by PDGSTRF. * If the equilibration, and row and column permutations were performed, * the LU factorization was performed for A1 where *     A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U * and the linear system solved is *     A1 * Y = Pc*Pr*B1, where B was overwritten by B1 = diag(R)*B, and * the permutation to B1 by Pc*Pr is applied internally in this routine. *  * Arguments * ========= * * n      (input) int (global) *        The order of the system of linear equations. * * LUstruct (input) LUstruct_t* *        The distributed data structures storing L and U factors. *        The L and U factors are obtained from PDGSTRF for *        the possibly scaled and permuted matrix A. *        See superlu_ddefs.h for the definition of 'LUstruct_t'. *        A may be scaled and permuted into A1, so that *        A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U * * grid   (input) gridinfo_t* *        The 2D process mesh. It contains the MPI communicator, the number *        of process rows (NPROW), the number of process columns (NPCOL), *        and my process rank. It is an input argument to all the *        parallel routines. *        Grid can be initialized by subroutine SUPERLU_GRIDINIT. *        See superlu_defs.h for the definition of 'gridinfo_t'. * * B      (input/output) double* *        On entry, the distributed right-hand side matrix of the possibly *        equilibrated system. That is, B may be overwritten by diag(R)*B. *        On exit, the distributed solution matrix Y of the possibly *        equilibrated system if info = 0, where Y = Pc*diag(C)^(-1)*X, *        and X is the solution of the original system. * * m_loc  (input) int (local) *        The local row dimension of matrix B. * * fst_row (input) int (global) *        The row number of B's first row in the global matrix. * * ldb    (input) int (local) *        The leading dimension of matrix B. * * nrhs   (input) int (global) *        Number of right-hand sides. *  * SOLVEstruct (output) SOLVEstruct_t* (global) *        Contains the information for the communication during the *        solution phase. * * stat   (output) SuperLUStat_t* *        Record the statistics about the triangular solves. *        See util.h for the definition of 'SuperLUStat_t'. * * info   (output) int* * 	   = 0: successful exit *	   < 0: if info = -i, the i-th argument had an illegal value *
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -