pdutil.c

来自「LU分解求解矩阵方程组的解」· C语言代码 · 共 528 行 · 第 1/2 页
528 行
/* * -- Distributed SuperLU routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley. * March 15, 2003 * */#include <math.h>#include "superlu_ddefs.h"/* * Gather A from the distributed compressed row format to * global A in compressed column format. */int pdCompRow_loc_to_CompCol_global( int_t need_value, /* Input. Whether need to gather numerical values */ SuperMatrix *A,   /* Input. Distributed matrix in NRformat_loc format. */ gridinfo_t *grid, /* Input */ SuperMatrix *GA   /* Output */){    NRformat_loc *Astore;    NCformat *GAstore;    double *a, *a_loc;    int_t *colind, *rowptr;    int_t *colptr_loc, *rowind_loc;    int_t m_loc, n, i, j, k, l;    int_t colnnz, fst_row, m_loc_max, nnz_loc, nnz_max, nnz;    double *a_recv;  /* Buffer to receive the blocks of values. */    double *a_buf;   /* Buffer to merge blocks into block columns. */    int_t *colcnt, *itemp;    int_t *colptr_send; /* Buffer to redistribute the column pointers of the 			   local block rows.			   Use n_loc+1 pointers for each block. */    int_t *colptr_blk;  /* The column pointers for each block, after			   redistribution to the local block columns. 			   Use n_loc+1 pointers for each block. */    int_t *rowind_recv; /* Buffer to receive the blocks of row indices. */    int_t *rowind_buf;  /* Buffer to merge blocks into block columns. */    int_t *fst_rows, *n_locs;    int   *sendcnts, *sdispls, *recvcnts, *rdispls, *itemp_32;    int   it, n_loc, procs;#if ( DEBUGlevel>=1 )    CHECK_MALLOC(grid->iam, "Enter pdCompRow_loc_to_CompCol_global");#endif    /* Initialization. */    n = A->ncol;    Astore = (NRformat_loc *) A->Store;    nnz_loc = Astore->nnz_loc;    m_loc = Astore->m_loc;    fst_row = Astore->fst_row;    a = Astore->nzval;    rowptr = Astore->rowptr;    colind = Astore->colind;    n_loc = m_loc; /* NOTE: CURRENTLY ONLY WORK FOR SQUARE MATRIX */    /* ------------------------------------------------------------       FIRST PHASE: TRANSFORM A INTO DISTRIBUTED COMPRESSED COLUMN.       ------------------------------------------------------------*/    dCompRow_to_CompCol_dist(m_loc, n, nnz_loc, a, colind, rowptr, &a_loc,                             &rowind_loc, &colptr_loc);    /* Change local row index numbers to global numbers. */    for (i = 0; i < nnz_loc; ++i) rowind_loc[i] += fst_row;#if ( DEBUGlevel>=2 )    printf("Proc %d\n", grid->iam);    PrintInt10("rowind_loc", nnz_loc, rowind_loc);    PrintInt10("colptr_loc", n+1, colptr_loc);#endif    procs = grid->nprow * grid->npcol;    if ( !(fst_rows = (int_t *) intMalloc_dist(2*procs)) )	  ABORT("Malloc fails for fst_rows[]");    n_locs = fst_rows + procs;    MPI_Allgather(&fst_row, 1, mpi_int_t, fst_rows, 1, mpi_int_t,		  grid->comm);    for (i = 0; i < procs-1; ++i) n_locs[i] = fst_rows[i+1] - fst_rows[i];    n_locs[procs-1] = n - fst_rows[procs-1];    if ( !(recvcnts = SUPERLU_MALLOC(5*procs * sizeof(int))) )	  ABORT("Malloc fails for recvcnts[]");    sendcnts = recvcnts + procs;    rdispls = sendcnts + procs;    sdispls = rdispls + procs;    itemp_32 = sdispls + procs;    /* All-to-all transfer column pointers of each block.       Now the matrix view is P-by-P block-partition. */    /* n column starts for each column, and procs column ends for each block */    if ( !(colptr_send = intMalloc_dist(n + procs)) )	   ABORT("Malloc fails for colptr_send[]");    if ( !(colptr_blk = intMalloc_dist( (((size_t) n_loc)+1)*procs)) )	   ABORT("Malloc fails for colptr_blk[]");    for (i = 0, j = 0; i < procs; ++i) {        for (k = j; k < j + n_locs[i]; ++k) colptr_send[i+k] = colptr_loc[k];	colptr_send[i+k] = colptr_loc[k]; /* Add an END marker */	sendcnts[i] = n_locs[i] + 1;	assert(j == fst_rows[i]);	sdispls[i] = j + i;	recvcnts[i] = n_loc + 1;	rdispls[i] = i * (n_loc + 1);	j += n_locs[i]; /* First column of next block in colptr_loc[] */    }    MPI_Alltoallv(colptr_send, sendcnts, sdispls, mpi_int_t,		  colptr_blk, recvcnts, rdispls, mpi_int_t, grid->comm);    /* Adjust colptr_blk[] so that they contain the local indices of the       column pointers in the receive buffer. */    nnz = 0; /* The running sum of the nonzeros counted by far */    k = 0;    for (i = 0; i < procs; ++i) {	for (j = rdispls[i]; j < rdispls[i] + n_loc; ++j) {	    colnnz = colptr_blk[j+1] - colptr_blk[j];	    /*assert(k<=j);*/	    colptr_blk[k] = nnz;	    nnz += colnnz; /* Start of the next column */	    ++k;	}	colptr_blk[k++] = nnz; /* Add an END marker for each block */    }    /*assert(k == (n_loc+1)*procs);*/    /* Now prepare to transfer row indices and values. */    sdispls[0] = 0;    for (i = 0; i < procs-1; ++i) {        sendcnts[i] = colptr_loc[fst_rows[i+1]] - colptr_loc[fst_rows[i]];	sdispls[i+1] = sdispls[i] + sendcnts[i];    }    sendcnts[procs-1] = colptr_loc[n] - colptr_loc[fst_rows[procs-1]];    for (i = 0; i < procs; ++i) {        j = rdispls[i]; /* Point to this block in colptr_blk[]. */	recvcnts[i] = colptr_blk[j+n_loc] - colptr_blk[j];    }    rdispls[0] = 0; /* Recompute rdispls[] for row indices. */    for (i = 0; i < procs-1; ++i) rdispls[i+1] = rdispls[i] + recvcnts[i];    k = rdispls[procs-1] + recvcnts[procs-1]; /* Total received */    if ( !(rowind_recv = (int_t *) intMalloc_dist(2*k)) )        ABORT("Malloc fails for rowind_recv[]");    rowind_buf = rowind_recv + k;    MPI_Alltoallv(rowind_loc, sendcnts, sdispls, mpi_int_t,		  rowind_recv, recvcnts, rdispls, mpi_int_t, grid->comm);    if ( need_value ) {        if ( !(a_recv = (double *) doubleMalloc_dist(2*k)) )	    ABORT("Malloc fails for rowind_recv[]");	a_buf = a_recv + k;	MPI_Alltoallv(a_loc, sendcnts, sdispls, MPI_DOUBLE,                      a_recv, recvcnts, rdispls, MPI_DOUBLE,                      grid->comm);    }          /* Reset colptr_loc[] to point to the n_loc global columns. */    colptr_loc[0] = 0;    itemp = colptr_send;    for (j = 0; j < n_loc; ++j) {        colnnz = 0;	for (i = 0; i < procs; ++i) {	    k = i * (n_loc + 1) + j; /* j-th column in i-th block */	    colnnz += colptr_blk[k+1] - colptr_blk[k];	}	colptr_loc[j+1] = colptr_loc[j] + colnnz;	itemp[j] = colptr_loc[j]; /* Save a copy of the column starts */    }    itemp[n_loc] = colptr_loc[n_loc];          /* Merge blocks of row indices into columns of row indices. */    for (i = 0; i < procs; ++i) {        k = i * (n_loc + 1);	for (j = 0; j < n_loc; ++j) { /* i-th block */	    for (l = colptr_blk[k+j]; l < colptr_blk[k+j+1]; ++l) {	        rowind_buf[itemp[j]] = rowind_recv[l];		++itemp[j];	    }	}    }    if ( need_value ) {        for (j = 0; j < n_loc+1; ++j) itemp[j] = colptr_loc[j];        for (i = 0; i < procs; ++i) {	    k = i * (n_loc + 1);	    for (j = 0; j < n_loc; ++j) { /* i-th block */	        for (l = colptr_blk[k+j]; l < colptr_blk[k+j+1]; ++l) {		    a_buf[itemp[j]] = a_recv[l];		    ++itemp[j];		}	    }	}    }    /* ------------------------------------------------------------       SECOND PHASE: GATHER TO GLOBAL A IN COMPRESSED COLUMN FORMAT.       ------------------------------------------------------------*/    GA->nrow  = A->nrow;    GA->ncol  = A->ncol;    GA->Stype = SLU_NC;    GA->Dtype = A->Dtype;    GA->Mtype = A->Mtype;    GAstore = GA->Store = (NCformat *) SUPERLU_MALLOC ( sizeof(NCformat) );    if ( !GAstore ) ABORT ("SUPERLU_MALLOC fails for GAstore");    /* First gather the size of each piece. */    nnz_loc = colptr_loc[n_loc];    MPI_Allgather(&nnz_loc, 1, mpi_int_t, itemp, 1, mpi_int_t, grid->comm);    for (i = 0, nnz = 0; i < procs; ++i) nnz += itemp[i];    GAstore->nnz = nnz;        if ( !(GAstore->rowind = (int_t *) intMalloc_dist (nnz)) )        ABORT ("SUPERLU_MALLOC fails for GAstore->rowind[]");    if ( !(GAstore->colptr = (int_t *) intMalloc_dist (n+1)) )        ABORT ("SUPERLU_MALLOC fails for GAstore->colptr[]");          /* Allgatherv for row indices. */    rdispls[0] = 0;    for (i = 0; i < procs-1; ++i) {        rdispls[i+1] = rdispls[i] + itemp[i];        itemp_32[i] = itemp[i];    }    itemp_32[procs-1] = itemp[procs-1];    it = nnz_loc;    MPI_Allgatherv(rowind_buf, it, mpi_int_t, GAstore->rowind, 		   itemp_32, rdispls, mpi_int_t, grid->comm);    if ( need_value ) {      if ( !(GAstore->nzval = (double *) doubleMalloc_dist (nnz)) )          ABORT ("SUPERLU_MALLOC fails for GAstore->rnzval[]");      MPI_Allgatherv(a_buf, it, MPI_DOUBLE, GAstore->nzval, 		     itemp_32, rdispls, MPI_DOUBLE, grid->comm);    } else GAstore->nzval = NULL;    /* Now gather the column pointers. */    rdispls[0] = 0;    for (i = 0; i < procs-1; ++i) {        rdispls[i+1] = rdispls[i] + n_locs[i];        itemp_32[i] = n_locs[i];    }    itemp_32[procs-1] = n_locs[procs-1];    MPI_Allgatherv(colptr_loc, n_loc, mpi_int_t, GAstore->colptr, 		   itemp_32, rdispls, mpi_int_t, grid->comm);    /* Recompute column pointers. */    for (i = 1; i < procs; ++i) {        k = rdispls[i];	for (j = 0; j < n_locs[i]; ++j) GAstore->colptr[k++] += itemp[i-1];	itemp[i] += itemp[i-1]; /* prefix sum */    }    GAstore->colptr[n] = nnz;#if ( DEBUGlevel>=2 )    if ( !grid->iam ) {        printf("After pdCompRow_loc_to_CompCol_global()\n");	dPrint_CompCol_Matrix_dist(GA);    }#endif    SUPERLU_FREE(a_loc);    SUPERLU_FREE(rowind_loc);    SUPERLU_FREE(colptr_loc);    SUPERLU_FREE(fst_rows);    SUPERLU_FREE(recvcnts);    SUPERLU_FREE(colptr_send);    SUPERLU_FREE(colptr_blk);
pdutil.c - 源码说明

本页面展示了「LU分解求解矩阵方程组的解」中的 pdutil.c 源码文件，采用 C语言编程语言编写，共 528 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与分解相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?