📄 pzutil.c
字号:
/* * -- Distributed SuperLU routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley. * March 15, 2003 * */#include <math.h>#include "superlu_zdefs.h"/* * Gather A from the distributed compressed row format to * global A in compressed column format. */int pzCompRow_loc_to_CompCol_global( int_t need_value, /* Input. Whether need to gather numerical values */ SuperMatrix *A, /* Input. Distributed matrix in NRformat_loc format. */ gridinfo_t *grid, /* Input */ SuperMatrix *GA /* Output */){ NRformat_loc *Astore; NCformat *GAstore; doublecomplex *a, *a_loc; int_t *colind, *rowptr; int_t *colptr_loc, *rowind_loc; int_t m_loc, n, i, j, k, l; int_t colnnz, fst_row, m_loc_max, nnz_loc, nnz_max, nnz; doublecomplex *a_recv; /* Buffer to receive the blocks of values. */ doublecomplex *a_buf; /* Buffer to merge blocks into block columns. */ int_t *colcnt, *itemp; int_t *colptr_send; /* Buffer to redistribute the column pointers of the local block rows. Use n_loc+1 pointers for each block. */ int_t *colptr_blk; /* The column pointers for each block, after redistribution to the local block columns. Use n_loc+1 pointers for each block. */ int_t *rowind_recv; /* Buffer to receive the blocks of row indices. */ int_t *rowind_buf; /* Buffer to merge blocks into block columns. */ int_t *fst_rows, *n_locs; int *sendcnts, *sdispls, *recvcnts, *rdispls, *itemp_32; int it, n_loc, procs;#if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pzCompRow_loc_to_CompCol_global");#endif /* Initialization. */ n = A->ncol; Astore = (NRformat_loc *) A->Store; nnz_loc = Astore->nnz_loc; m_loc = Astore->m_loc; fst_row = Astore->fst_row; a = Astore->nzval; rowptr = Astore->rowptr; colind = Astore->colind; n_loc = m_loc; /* NOTE: CURRENTLY ONLY WORK FOR SQUARE MATRIX */ /* ------------------------------------------------------------ FIRST PHASE: TRANSFORM A INTO DISTRIBUTED COMPRESSED COLUMN. ------------------------------------------------------------*/ zCompRow_to_CompCol_dist(m_loc, n, nnz_loc, a, colind, rowptr, &a_loc, &rowind_loc, &colptr_loc); /* Change local row index numbers to global numbers. */ for (i = 0; i < nnz_loc; ++i) rowind_loc[i] += fst_row;#if ( DEBUGlevel>=2 ) printf("Proc %d\n", grid->iam); PrintInt10("rowind_loc", nnz_loc, rowind_loc); PrintInt10("colptr_loc", n+1, colptr_loc);#endif procs = grid->nprow * grid->npcol; if ( !(fst_rows = (int_t *) intMalloc_dist(2*procs)) ) ABORT("Malloc fails for fst_rows[]"); n_locs = fst_rows + procs; MPI_Allgather(&fst_row, 1, mpi_int_t, fst_rows, 1, mpi_int_t, grid->comm); for (i = 0; i < procs-1; ++i) n_locs[i] = fst_rows[i+1] - fst_rows[i]; n_locs[procs-1] = n - fst_rows[procs-1]; if ( !(recvcnts = SUPERLU_MALLOC(5*procs * sizeof(int))) ) ABORT("Malloc fails for recvcnts[]"); sendcnts = recvcnts + procs; rdispls = sendcnts + procs; sdispls = rdispls + procs; itemp_32 = sdispls + procs; /* All-to-all transfer column pointers of each block. Now the matrix view is P-by-P block-partition. */ /* n column starts for each column, and procs column ends for each block */ if ( !(colptr_send = intMalloc_dist(n + procs)) ) ABORT("Malloc fails for colptr_send[]"); if ( !(colptr_blk = intMalloc_dist( (((size_t) n_loc)+1)*procs)) ) ABORT("Malloc fails for colptr_blk[]"); for (i = 0, j = 0; i < procs; ++i) { for (k = j; k < j + n_locs[i]; ++k) colptr_send[i+k] = colptr_loc[k]; colptr_send[i+k] = colptr_loc[k]; /* Add an END marker */ sendcnts[i] = n_locs[i] + 1; assert(j == fst_rows[i]); sdispls[i] = j + i; recvcnts[i] = n_loc + 1; rdispls[i] = i * (n_loc + 1); j += n_locs[i]; /* First column of next block in colptr_loc[] */ } MPI_Alltoallv(colptr_send, sendcnts, sdispls, mpi_int_t, colptr_blk, recvcnts, rdispls, mpi_int_t, grid->comm); /* Adjust colptr_blk[] so that they contain the local indices of the column pointers in the receive buffer. */ nnz = 0; /* The running sum of the nonzeros counted by far */ k = 0; for (i = 0; i < procs; ++i) { for (j = rdispls[i]; j < rdispls[i] + n_loc; ++j) { colnnz = colptr_blk[j+1] - colptr_blk[j]; /*assert(k<=j);*/ colptr_blk[k] = nnz; nnz += colnnz; /* Start of the next column */ ++k; } colptr_blk[k++] = nnz; /* Add an END marker for each block */ } /*assert(k == (n_loc+1)*procs);*/ /* Now prepare to transfer row indices and values. */ sdispls[0] = 0; for (i = 0; i < procs-1; ++i) { sendcnts[i] = colptr_loc[fst_rows[i+1]] - colptr_loc[fst_rows[i]]; sdispls[i+1] = sdispls[i] + sendcnts[i]; } sendcnts[procs-1] = colptr_loc[n] - colptr_loc[fst_rows[procs-1]]; for (i = 0; i < procs; ++i) { j = rdispls[i]; /* Point to this block in colptr_blk[]. */ recvcnts[i] = colptr_blk[j+n_loc] - colptr_blk[j]; } rdispls[0] = 0; /* Recompute rdispls[] for row indices. */ for (i = 0; i < procs-1; ++i) rdispls[i+1] = rdispls[i] + recvcnts[i]; k = rdispls[procs-1] + recvcnts[procs-1]; /* Total received */ if ( !(rowind_recv = (int_t *) intMalloc_dist(2*k)) ) ABORT("Malloc fails for rowind_recv[]"); rowind_buf = rowind_recv + k; MPI_Alltoallv(rowind_loc, sendcnts, sdispls, mpi_int_t, rowind_recv, recvcnts, rdispls, mpi_int_t, grid->comm); if ( need_value ) { if ( !(a_recv = (doublecomplex *) doublecomplexMalloc_dist(2*k)) ) ABORT("Malloc fails for rowind_recv[]"); a_buf = a_recv + k; MPI_Alltoallv(a_loc, sendcnts, sdispls, SuperLU_MPI_DOUBLE_COMPLEX, a_recv, recvcnts, rdispls, SuperLU_MPI_DOUBLE_COMPLEX, grid->comm); } /* Reset colptr_loc[] to point to the n_loc global columns. */ colptr_loc[0] = 0; itemp = colptr_send; for (j = 0; j < n_loc; ++j) { colnnz = 0; for (i = 0; i < procs; ++i) { k = i * (n_loc + 1) + j; /* j-th column in i-th block */ colnnz += colptr_blk[k+1] - colptr_blk[k]; } colptr_loc[j+1] = colptr_loc[j] + colnnz; itemp[j] = colptr_loc[j]; /* Save a copy of the column starts */ } itemp[n_loc] = colptr_loc[n_loc]; /* Merge blocks of row indices into columns of row indices. */ for (i = 0; i < procs; ++i) { k = i * (n_loc + 1); for (j = 0; j < n_loc; ++j) { /* i-th block */ for (l = colptr_blk[k+j]; l < colptr_blk[k+j+1]; ++l) { rowind_buf[itemp[j]] = rowind_recv[l]; ++itemp[j]; } } } if ( need_value ) { for (j = 0; j < n_loc+1; ++j) itemp[j] = colptr_loc[j]; for (i = 0; i < procs; ++i) { k = i * (n_loc + 1); for (j = 0; j < n_loc; ++j) { /* i-th block */ for (l = colptr_blk[k+j]; l < colptr_blk[k+j+1]; ++l) { a_buf[itemp[j]] = a_recv[l]; ++itemp[j]; } } } } /* ------------------------------------------------------------ SECOND PHASE: GATHER TO GLOBAL A IN COMPRESSED COLUMN FORMAT. ------------------------------------------------------------*/ GA->nrow = A->nrow; GA->ncol = A->ncol; GA->Stype = SLU_NC; GA->Dtype = A->Dtype; GA->Mtype = A->Mtype; GAstore = GA->Store = (NCformat *) SUPERLU_MALLOC ( sizeof(NCformat) ); if ( !GAstore ) ABORT ("SUPERLU_MALLOC fails for GAstore"); /* First gather the size of each piece. */ nnz_loc = colptr_loc[n_loc]; MPI_Allgather(&nnz_loc, 1, mpi_int_t, itemp, 1, mpi_int_t, grid->comm); for (i = 0, nnz = 0; i < procs; ++i) nnz += itemp[i]; GAstore->nnz = nnz; if ( !(GAstore->rowind = (int_t *) intMalloc_dist (nnz)) ) ABORT ("SUPERLU_MALLOC fails for GAstore->rowind[]"); if ( !(GAstore->colptr = (int_t *) intMalloc_dist (n+1)) ) ABORT ("SUPERLU_MALLOC fails for GAstore->colptr[]"); /* Allgatherv for row indices. */ rdispls[0] = 0; for (i = 0; i < procs-1; ++i) { rdispls[i+1] = rdispls[i] + itemp[i]; itemp_32[i] = itemp[i]; } itemp_32[procs-1] = itemp[procs-1]; it = nnz_loc; MPI_Allgatherv(rowind_buf, it, mpi_int_t, GAstore->rowind, itemp_32, rdispls, mpi_int_t, grid->comm); if ( need_value ) { if ( !(GAstore->nzval = (doublecomplex *) doublecomplexMalloc_dist (nnz)) ) ABORT ("SUPERLU_MALLOC fails for GAstore->rnzval[]"); MPI_Allgatherv(a_buf, it, SuperLU_MPI_DOUBLE_COMPLEX, GAstore->nzval, itemp_32, rdispls, SuperLU_MPI_DOUBLE_COMPLEX, grid->comm); } else GAstore->nzval = NULL; /* Now gather the column pointers. */ rdispls[0] = 0; for (i = 0; i < procs-1; ++i) { rdispls[i+1] = rdispls[i] + n_locs[i]; itemp_32[i] = n_locs[i]; } itemp_32[procs-1] = n_locs[procs-1]; MPI_Allgatherv(colptr_loc, n_loc, mpi_int_t, GAstore->colptr, itemp_32, rdispls, mpi_int_t, grid->comm); /* Recompute column pointers. */ for (i = 1; i < procs; ++i) { k = rdispls[i]; for (j = 0; j < n_locs[i]; ++j) GAstore->colptr[k++] += itemp[i-1]; itemp[i] += itemp[i-1]; /* prefix sum */ } GAstore->colptr[n] = nnz;#if ( DEBUGlevel>=2 ) if ( !grid->iam ) { printf("After pdCompRow_loc_to_CompCol_global()\n"); zPrint_CompCol_Matrix_dist(GA); }#endif SUPERLU_FREE(a_loc); SUPERLU_FREE(rowind_loc); SUPERLU_FREE(colptr_loc); SUPERLU_FREE(fst_rows); SUPERLU_FREE(recvcnts); SUPERLU_FREE(colptr_send); SUPERLU_FREE(colptr_blk); SUPERLU_FREE(rowind_recv); if ( need_value) SUPERLU_FREE(a_recv);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -