⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pzdistribute.c

📁 SuperLU 2.2版本。对大型、稀疏、非对称的线性系统的直接求解
💻 C
📖 第 1 页 / 共 3 页
字号:
#include "superlu_zdefs.h"int_tzReDistribute_A(SuperMatrix *A, ScalePermstruct_t *ScalePermstruct,                Glu_freeable_t *Glu_freeable, int_t *xsup, int_t *supno,                gridinfo_t *grid, int_t *colptr[], int_t *rowind[],                doublecomplex *a[]){/* * -- Distributed SuperLU routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley. * March 15, 2003 * * Purpose * ======= *   Re-distribute A on the 2D process mesh. *  * Arguments * ========= *  * A      (input) SuperMatrix* *	  The distributed input matrix A of dimension (A->nrow, A->ncol). *        A may be overwritten by diag(R)*A*diag(C)*Pc^T. *        The type of A can be: Stype = SLU_NR_loc; Dtype = SLU_Z; Mtype = SLU_GE. * * ScalePermstruct (input) ScalePermstruct_t* *        The data structure to store the scaling and permutation vectors *        describing the transformations performed to the original matrix A. * * Glu_freeable (input) *Glu_freeable_t *        The global structure describing the graph of L and U. *  * grid   (input) gridinfo_t* *        The 2D process mesh. * * colptr (output) int* * * rowind (output) int* * * a      (output) doublecomplex* * * Return value * ============ * */    NRformat_loc *Astore;    int_t  *perm_r; /* row permutation vector */    int_t  *perm_c; /* column permutation vector */    int_t  i, irow, fst_row, j, jcol, k, gbi, gbj, n, m_loc, jsize;    int_t  nnz_loc;    /* number of local nonzeros */    int_t  nnz_remote; /* number of remote nonzeros to be sent */    int_t  SendCnt; /* number of remote nonzeros to be sent */    int_t  RecvCnt; /* number of remote nonzeros to be sent */    int_t  *nnzToSend, *nnzToRecv, maxnnzToRecv;    int_t  *ia, *ja, **ia_send, *index, *itemp;    int_t  *ptr_to_send;    doublecomplex *aij, **aij_send, *nzval, *dtemp;    doublecomplex *nzval_a;    int    iam, it, p, procs;    MPI_Request *send_req;    MPI_Status  status;        /* ------------------------------------------------------------       INITIALIZATION.       ------------------------------------------------------------*/    iam = grid->iam;#if ( DEBUGlevel>=1 )    CHECK_MALLOC(iam, "Enter zReDistribute_A()");#endif    perm_r = ScalePermstruct->perm_r;    perm_c = ScalePermstruct->perm_c;    procs = grid->nprow * grid->npcol;    Astore = (NRformat_loc *) A->Store;    n = A->ncol;    m_loc = Astore->m_loc;    fst_row = Astore->fst_row;    nnzToRecv = intCalloc_dist(2*procs);    nnzToSend = nnzToRecv + procs;    /* ------------------------------------------------------------       COUNT THE NUMBER OF NONZEROS TO BE SENT TO EACH PROCESS,       THEN ALLOCATE SPACE.       THIS ACCOUNTS FOR THE FIRST PASS OF A.       ------------------------------------------------------------*/    for (i = 0; i < m_loc; ++i) {        for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) {  	    irow = perm_c[perm_r[i+fst_row]];  /* Row number in Pc*Pr*A */	    jcol = Astore->colind[j];	    gbi = BlockNum( irow );	    gbj = BlockNum( jcol );	    p = PNUM( PROW(gbi,grid), PCOL(gbj,grid), grid );	    ++nnzToSend[p]; 	}    }    /* All-to-all communication */    MPI_Alltoall( nnzToSend, 1, mpi_int_t, nnzToRecv, 1, mpi_int_t,		  grid->comm);    maxnnzToRecv = 0;    nnz_loc = SendCnt = RecvCnt = 0;    for (p = 0; p < procs; ++p) {	if ( p != iam ) {	    SendCnt += nnzToSend[p];	    RecvCnt += nnzToRecv[p];	    maxnnzToRecv = SUPERLU_MAX( nnzToRecv[p], maxnnzToRecv );	} else {	    nnz_loc += nnzToRecv[p];	    /*assert(nnzToSend[p] == nnzToRecv[p]);*/	}    }    k = nnz_loc + RecvCnt; /* Total nonzeros ended up in my process. */    /* Allocate space for storing the triplets after redistribution. */    if ( k ) { /* count can be zero. */        if ( !(ia = intMalloc_dist(2*k)) )            ABORT("Malloc fails for ia[].");        if ( !(aij = doublecomplexMalloc_dist(k)) )            ABORT("Malloc fails for aij[].");    }    ja = ia + k;    /* Allocate temporary storage for sending/receiving the A triplets. */    if ( procs > 1 ) {      if ( !(send_req = (MPI_Request *)	     SUPERLU_MALLOC(2*procs *sizeof(MPI_Request))) )	ABORT("Malloc fails for send_req[].");      if ( !(ia_send = (int_t **) SUPERLU_MALLOC(procs*sizeof(int_t*))) )        ABORT("Malloc fails for ia_send[].");      if ( !(aij_send = (doublecomplex **)SUPERLU_MALLOC(procs*sizeof(doublecomplex*))) )        ABORT("Malloc fails for aij_send[].");      if ( SendCnt ) { /* count can be zero */          if ( !(index = intMalloc_dist(2*SendCnt)) )              ABORT("Malloc fails for index[].");          if ( !(nzval = doublecomplexMalloc_dist(SendCnt)) )              ABORT("Malloc fails for nzval[].");      }      if ( !(ptr_to_send = intCalloc_dist(procs)) )        ABORT("Malloc fails for ptr_to_send[].");      if ( maxnnzToRecv ) { /* count can be zero */          if ( !(itemp = intMalloc_dist(2*maxnnzToRecv)) )              ABORT("Malloc fails for itemp[].");          if ( !(dtemp = doublecomplexMalloc_dist(maxnnzToRecv)) )              ABORT("Malloc fails for dtemp[].");      }      for (i = 0, j = 0, p = 0; p < procs; ++p) {          if ( p != iam ) {	      ia_send[p] = &index[i];	      i += 2 * nnzToSend[p]; /* ia/ja indices alternate */	      aij_send[p] = &nzval[j];	      j += nnzToSend[p];	  }      }    } /* if procs > 1 */          if ( !(*colptr = intCalloc_dist(n+1)) )        ABORT("Malloc fails for *colptr[].");    /* ------------------------------------------------------------       LOAD THE ENTRIES OF A INTO THE (IA,JA,AIJ) STRUCTURES TO SEND.       THIS ACCOUNTS FOR THE SECOND PASS OF A.       ------------------------------------------------------------*/    nnz_loc = 0; /* Reset the local nonzero count. */    nzval_a = Astore->nzval;    for (i = 0; i < m_loc; ++i) {        for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) {  	    irow = perm_c[perm_r[i+fst_row]];  /* Row number in Pc*Pr*A */	    jcol = Astore->colind[j];	    gbi = BlockNum( irow );	    gbj = BlockNum( jcol );	    p = PNUM( PROW(gbi,grid), PCOL(gbj,grid), grid );	    if ( p != iam ) { /* remote */	        k = ptr_to_send[p];	        ia_send[p][k] = irow;	        ia_send[p][k + nnzToSend[p]] = jcol;		aij_send[p][k] = nzval_a[j];		++ptr_to_send[p]; 	    } else {          /* local */	        ia[nnz_loc] = irow;	        ja[nnz_loc] = jcol;		aij[nnz_loc] = nzval_a[j];		++nnz_loc;		++(*colptr)[jcol]; /* Count nonzeros in each column */	    }	}    }    /* ------------------------------------------------------------       PERFORM REDISTRIBUTION. THIS INVOLVES ALL-TO-ALL COMMUNICATION.       NOTE: Can possibly use MPI_Alltoallv.       ------------------------------------------------------------*/    for (p = 0; p < procs; ++p) {        if ( p != iam ) {	    it = 2*nnzToSend[p];	    MPI_Isend( ia_send[p], it, mpi_int_t,		       p, iam, grid->comm, &send_req[p] );	    it = nnzToSend[p];	    MPI_Isend( aij_send[p], it, SuperLU_MPI_DOUBLE_COMPLEX,	               p, iam+procs, grid->comm, &send_req[procs+p] ); 	}    }    for (p = 0; p < procs; ++p) {        if ( p != iam ) {	    it = 2*nnzToRecv[p];	    MPI_Recv( itemp, it, mpi_int_t, p, p, grid->comm, &status ); 	    it = nnzToRecv[p];            MPI_Recv( dtemp, it, SuperLU_MPI_DOUBLE_COMPLEX, p, p+procs,		      grid->comm, &status );	    for (i = 0; i < nnzToRecv[p]; ++i) {	        ia[nnz_loc] = itemp[i];		jcol = itemp[i + nnzToRecv[p]];		/*assert(jcol<n);*/	        ja[nnz_loc] = jcol;		aij[nnz_loc] = dtemp[i];		++nnz_loc;		++(*colptr)[jcol]; /* Count nonzeros in each column */ 	    }	}    }    for (p = 0; p < procs; ++p) {        if ( p != iam ) {	    MPI_Wait( &send_req[p], &status);	    MPI_Wait( &send_req[procs+p], &status);	}    }    /* ------------------------------------------------------------       DEALLOCATE TEMPORARY STORAGE       ------------------------------------------------------------*/    SUPERLU_FREE(nnzToRecv);    if ( procs > 1 ) {	SUPERLU_FREE(send_req);	SUPERLU_FREE(ia_send);	SUPERLU_FREE(aij_send);	if ( SendCnt ) {            SUPERLU_FREE(index);            SUPERLU_FREE(nzval);        }	SUPERLU_FREE(ptr_to_send);        if ( maxnnzToRecv ) {            SUPERLU_FREE(itemp);            SUPERLU_FREE(dtemp);        }    }    /* ------------------------------------------------------------       CONVERT THE TRIPLET FORMAT INTO THE CCS FORMAT.       ------------------------------------------------------------*/    if ( nnz_loc ) { /* nnz_loc can be zero */        if ( !(*rowind = intMalloc_dist(nnz_loc)) )            ABORT("Malloc fails for *rowind[].");        if ( !(*a = doublecomplexMalloc_dist(nnz_loc)) )            ABORT("Malloc fails for *a[].");    }    /* Initialize the array of column pointers */    k = 0;    jsize = (*colptr)[0];    (*colptr)[0] = 0;    for (j = 1; j < n; ++j) {	k += jsize;	jsize = (*colptr)[j];	(*colptr)[j] = k;    }        /* Copy the triplets into the column oriented storage */    for (i = 0; i < nnz_loc; ++i) {	j = ja[i];	k = (*colptr)[j];	(*rowind)[k] = ia[i];	(*a)[k] = aij[i];	++(*colptr)[j];    }    /* Reset the column pointers to the beginning of each column */    for (j = n; j > 0; --j) (*colptr)[j] = (*colptr)[j-1];    (*colptr)[0] = 0;    if ( nnz_loc ) {        SUPERLU_FREE(ia);        SUPERLU_FREE(aij);    }#if ( DEBUGlevel>=1 )    CHECK_MALLOC(iam, "Exit zReDistribute_A()");#endif } /* zReDistribute_A */int_tpzdistribute(fact_t fact, int_t n, SuperMatrix *A,	     ScalePermstruct_t *ScalePermstruct,	     Glu_freeable_t *Glu_freeable, LUstruct_t *LUstruct,	     gridinfo_t *grid)/* * -- Distributed SuperLU routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley. * March 15, 2003 * * * Purpose * ======= *   Distribute the matrix onto the 2D process mesh. *  * Arguments * ========= *  * fact (input) fact_t *        Specifies whether or not the L and U structures will be re-used. *        = SamePattern_SameRowPerm: L and U structures are input, and *                                   unchanged on exit. *        = DOFACT or SamePattern: L and U structures are computed and output. * * n      (input) int *        Dimension of the matrix. * * A      (input) SuperMatrix* *	  The distributed input matrix A of dimension (A->nrow, A->ncol). *        A may be overwritten by diag(R)*A*diag(C)*Pc^T. The type of A can be: *        Stype = SLU_NR_loc; Dtype = SLU_Z; Mtype = SLU_GE. * * ScalePermstruct (input) ScalePermstruct_t* *        The data structure to store the scaling and permutation vectors *        describing the transformations performed to the original matrix A. * * Glu_freeable (input) *Glu_freeable_t *        The global structure describing the graph of L and U. *  * LUstruct (input) LUstruct_t* *        Data structures for L and U factors. * * grid   (input) gridinfo_t* *        The 2D process mesh. * * Return value * ============ *   > 0, working storage required (in bytes). *

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -