📄 pdutil.c

📁 LU分解求解矩阵方程组的解
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
    SUPERLU_FREE(rowind_recv);    if ( need_value) SUPERLU_FREE(a_recv);#if ( DEBUGlevel>=1 )    if ( !grid->iam ) printf("sizeof(NCformat) %d\n", sizeof(NCformat));    CHECK_MALLOC(grid->iam, "Exit pdCompRow_loc_to_CompCol_global");#endif    return 0;} /* pdCompRow_loc_to_CompCol_global *//* * Permute the distributed dense matrix: B <= perm(X). * perm[i] = j means the i-th row of X is in the j-th row of B. */int pdPermute_Dense_Matrix( int_t fst_row, int_t m_loc, int_t row_to_proc[], int_t perm[], double X[], int ldx, double B[], int ldb, int nrhs, gridinfo_t *grid){    int_t i, j, k, l;    int p, procs;    int *sendcnts, *sendcnts_nrhs, *recvcnts, *recvcnts_nrhs;    int *sdispls, *sdispls_nrhs, *rdispls, *rdispls_nrhs;    int *ptr_to_ibuf, *ptr_to_dbuf;    int_t *send_ibuf, *recv_ibuf;    double *send_dbuf, *recv_dbuf;#if ( DEBUGlevel>=1 )    CHECK_MALLOC(grid->iam, "Enter pdPermute_Dense_Matrix()");#endif    procs = grid->nprow * grid->npcol;    if ( !(sendcnts = SUPERLU_MALLOC(10*procs * sizeof(int))) )        ABORT("Malloc fails for sendcnts[].");    sendcnts_nrhs = sendcnts + procs;    recvcnts = sendcnts_nrhs + procs;    recvcnts_nrhs = recvcnts + procs;    sdispls = recvcnts_nrhs + procs;    sdispls_nrhs = sdispls + procs;    rdispls = sdispls_nrhs + procs;    rdispls_nrhs = rdispls + procs;    ptr_to_ibuf = rdispls_nrhs + procs;    ptr_to_dbuf = ptr_to_ibuf + procs;    for (i = 0; i < procs; ++i) sendcnts[i] = 0;    /* Count the number of X entries to be sent to each process.*/    for (i = fst_row; i < fst_row + m_loc; ++i) {        p = row_to_proc[perm[i]];	++sendcnts[p];    }    MPI_Alltoall(sendcnts, 1, MPI_INT, recvcnts, 1, MPI_INT, grid->comm);    sdispls[0] = rdispls[0] = 0;    sdispls_nrhs[0] = rdispls_nrhs[0] = 0;    sendcnts_nrhs[0] = sendcnts[0] * nrhs;    recvcnts_nrhs[0] = recvcnts[0] * nrhs;    for (i = 1; i < procs; ++i) {        sdispls[i] = sdispls[i-1] + sendcnts[i-1];	sdispls_nrhs[i] = sdispls[i] * nrhs;	rdispls[i] = rdispls[i-1] + recvcnts[i-1];	rdispls_nrhs[i] = rdispls[i] * nrhs;	sendcnts_nrhs[i] = sendcnts[i] * nrhs;	recvcnts_nrhs[i] = recvcnts[i] * nrhs;    }    k = sdispls[procs-1] + sendcnts[procs-1];/* Total number of sends */    l = rdispls[procs-1] + recvcnts[procs-1];/* Total number of recvs */    /*assert(k == m_loc);*/    /*assert(l == m_loc);*/    if ( !(send_ibuf = intMalloc_dist(k + l)) )        ABORT("Malloc fails for send_ibuf[].");    recv_ibuf = send_ibuf + k;    if ( !(send_dbuf = doubleMalloc_dist((k + l)*nrhs)) )        ABORT("Malloc fails for send_dbuf[].");    recv_dbuf = send_dbuf + k * nrhs;    for (i = 0; i < procs; ++i) {        ptr_to_ibuf[i] = sdispls[i];	ptr_to_dbuf[i] = sdispls_nrhs[i];    }    /* Fill in the send buffers: send_ibuf[] and send_dbuf[]. */    for (i = fst_row; i < fst_row + m_loc; ++i) {        j = perm[i];	p = row_to_proc[j];	send_ibuf[ptr_to_ibuf[p]] = j;	j = ptr_to_dbuf[p];	RHS_ITERATE(k) { /* RHS stored in row major in the buffer */	    send_dbuf[j++] = X[i-fst_row + k*ldx];	}	++ptr_to_ibuf[p];	ptr_to_dbuf[p] += nrhs;    }	      /* Transfer the (permuted) row indices and numerical values. */    MPI_Alltoallv(send_ibuf, sendcnts, sdispls, mpi_int_t,		  recv_ibuf, recvcnts, rdispls, mpi_int_t, grid->comm);    MPI_Alltoallv(send_dbuf, sendcnts_nrhs, sdispls_nrhs, MPI_DOUBLE,		  recv_dbuf, recvcnts_nrhs, rdispls_nrhs, MPI_DOUBLE,		  grid->comm);    /* Copy the buffer into b. */    for (i = 0, l = 0; i < m_loc; ++i) {        j = recv_ibuf[i] - fst_row; /* Relative row number */	RHS_ITERATE(k) { /* RHS stored in row major in the buffer */	    B[j + k*ldb] = recv_dbuf[l++];	}    }    SUPERLU_FREE(sendcnts);    SUPERLU_FREE(send_ibuf);    SUPERLU_FREE(send_dbuf);#if ( DEBUGlevel>=1 )    CHECK_MALLOC(grid->iam, "Exit pdPermute_Dense_Matrix()");#endif    return 0;} /* pdPermute_Dense_Matrix *//* * Initialize the data structure for the solution phase. */int dSolveInit(superlu_options_t *options, SuperMatrix *A, 	       int_t perm_r[], int_t perm_c[], int_t nrhs,	       LUstruct_t *LUstruct, gridinfo_t *grid,	       SOLVEstruct_t *SOLVEstruct){    int_t *row_to_proc, *inv_perm_c, *itemp;    NRformat_loc *Astore;    int_t        i, fst_row, m_loc, p;    int          procs;    Astore = (NRformat_loc *) A->Store;    fst_row = Astore->fst_row;    m_loc = Astore->m_loc;    procs = grid->nprow * grid->npcol;        if ( !(row_to_proc = intMalloc_dist(A->nrow)) )	ABORT("Malloc fails for row_to_proc[]");    SOLVEstruct->row_to_proc = row_to_proc;    if ( !(inv_perm_c = intMalloc_dist(A->ncol)) )        ABORT("Malloc fails for inv_perm_c[].");    for (i = 0; i < A->ncol; ++i) inv_perm_c[perm_c[i]] = i;    SOLVEstruct->inv_perm_c = inv_perm_c;    /* ------------------------------------------------------------       EVERY PROCESS NEEDS TO KNOW GLOBAL PARTITION.       SET UP THE MAPPING BETWEEN ROWS AND PROCESSES.              NOTE: For those processes that do not own any row, it must             must be set so that fst_row == A->nrow.        ------------------------------------------------------------*/    if ( !(itemp = intMalloc_dist(procs+1)) )        ABORT("Malloc fails for itemp[]");    MPI_Allgather(&fst_row, 1, mpi_int_t, itemp, 1, mpi_int_t,		  grid->comm);    itemp[procs] = A->nrow;    for (p = 0; p < procs; ++p) {        for (i = itemp[p] ; i < itemp[p+1]; ++i) row_to_proc[i] = p;    }#if ( DEBUGlevel>=2 )    if ( !grid->iam ) {      printf("fst_row = %d\n", fst_row);      PrintInt10("row_to_proc", A->nrow, row_to_proc);      PrintInt10("inv_perm_c", A->ncol, inv_perm_c);    }#endif    SUPERLU_FREE(itemp);#if 0    /* Compute the mapping between rows and processes. */    /* XSL NOTE: What happens if # of mapped processes is smaller       than total Procs?  For the processes without any row, let       fst_row be EMPTY (-1). Make sure this case works! */    MPI_Allgather(&fst_row, 1, mpi_int_t, itemp, 1, mpi_int_t,		  grid->comm);    itemp[procs] = n;    for (p = 0; p < procs; ++p) {        j = itemp[p];	if ( j != EMPTY ) {	    k = itemp[p+1];	    if ( k == EMPTY ) k = n;	    for (i = j ; i < k; ++i) row_to_proc[i] = p;	}    }#endif        get_diag_procs(A->ncol, LUstruct->Glu_persist, grid,		   &SOLVEstruct->num_diag_procs,		   &SOLVEstruct->diag_procs,		   &SOLVEstruct->diag_len);    if ( !(SOLVEstruct->gstrs_comm = (pxgstrs_comm_t *)	   SUPERLU_MALLOC(sizeof(pxgstrs_comm_t))) )        ABORT("Malloc fails for gstrs_comm[]");    pxgstrs_init(A->ncol, m_loc, nrhs, fst_row, perm_r, perm_c, grid, 		 LUstruct->Glu_persist, SOLVEstruct);    if ( !(SOLVEstruct->gsmv_comm = (pdgsmv_comm_t *)           SUPERLU_MALLOC(sizeof(pdgsmv_comm_t))) )        ABORT("Malloc fails for gsmv_comm[]");    SOLVEstruct->A_colind_gsmv = NULL;        options->SolveInitialized = YES;    return 0;} /* dSolveInit *//* * Release the resources used for the solution phase. */void dSolveFinalize(superlu_options_t *options, SOLVEstruct_t *SOLVEstruct){    int_t *it;    pxgstrs_finalize(SOLVEstruct->gstrs_comm);    if ( options->RefineInitialized ) {        pdgsmv_finalize(SOLVEstruct->gsmv_comm);	options->RefineInitialized = NO;    }    SUPERLU_FREE(SOLVEstruct->gsmv_comm);    SUPERLU_FREE(SOLVEstruct->row_to_proc);    SUPERLU_FREE(SOLVEstruct->inv_perm_c);    SUPERLU_FREE(SOLVEstruct->diag_procs);    SUPERLU_FREE(SOLVEstruct->diag_len);    if ( it = SOLVEstruct->A_colind_gsmv ) SUPERLU_FREE(it);    options->SolveInitialized = NO;} /* dSolveFinalize *//*  * Check the inf-norm of the error vector  */void pdinf_norm_error(int iam, int_t n, int_t nrhs, double x[], int_t ldx,		      double xtrue[], int_t ldxtrue, gridinfo_t *grid) {    double err, xnorm, temperr, tempxnorm;    double *x_work, *xtrue_work;    int i, j;    for (j = 0; j < nrhs; j++) {      x_work = &x[j*ldx];      xtrue_work = &xtrue[j*ldxtrue];      err = xnorm = 0.0;      for (i = 0; i < n; i++) {	err = SUPERLU_MAX(err, fabs(x_work[i] - xtrue_work[i]));	xnorm = SUPERLU_MAX(xnorm, fabs(x_work[i]));      }      /* get the golbal max err & xnrom */      temperr = err;      tempxnorm = xnorm;      MPI_Allreduce( &temperr, &err, 1, MPI_DOUBLE, MPI_MAX, grid->comm);      MPI_Allreduce( &tempxnorm, &xnorm, 1, MPI_DOUBLE, MPI_MAX, grid->comm);      err = err / xnorm;      if ( !iam ) printf("\tSol %2d: ||X-Xtrue||/||X|| = %e\n", j, err);    }}
上一页 12
💿 文件大小 1005 K
👤 上传用户 fpeisheng
📂 所属分类数值算法/人工智能
🏷️ 相关标签

#分解 #矩阵 #方程
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -