📄 pdutil.c
字号:
SUPERLU_FREE(rowind_recv); if ( need_value) SUPERLU_FREE(a_recv);#if ( DEBUGlevel>=1 ) if ( !grid->iam ) printf("sizeof(NCformat) %d\n", sizeof(NCformat)); CHECK_MALLOC(grid->iam, "Exit pdCompRow_loc_to_CompCol_global");#endif return 0;} /* pdCompRow_loc_to_CompCol_global *//* * Permute the distributed dense matrix: B <= perm(X). * perm[i] = j means the i-th row of X is in the j-th row of B. */int pdPermute_Dense_Matrix( int_t fst_row, int_t m_loc, int_t row_to_proc[], int_t perm[], double X[], int ldx, double B[], int ldb, int nrhs, gridinfo_t *grid){ int_t i, j, k, l; int p, procs; int *sendcnts, *sendcnts_nrhs, *recvcnts, *recvcnts_nrhs; int *sdispls, *sdispls_nrhs, *rdispls, *rdispls_nrhs; int *ptr_to_ibuf, *ptr_to_dbuf; int_t *send_ibuf, *recv_ibuf; double *send_dbuf, *recv_dbuf;#if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pdPermute_Dense_Matrix()");#endif procs = grid->nprow * grid->npcol; if ( !(sendcnts = SUPERLU_MALLOC(10*procs * sizeof(int))) ) ABORT("Malloc fails for sendcnts[]."); sendcnts_nrhs = sendcnts + procs; recvcnts = sendcnts_nrhs + procs; recvcnts_nrhs = recvcnts + procs; sdispls = recvcnts_nrhs + procs; sdispls_nrhs = sdispls + procs; rdispls = sdispls_nrhs + procs; rdispls_nrhs = rdispls + procs; ptr_to_ibuf = rdispls_nrhs + procs; ptr_to_dbuf = ptr_to_ibuf + procs; for (i = 0; i < procs; ++i) sendcnts[i] = 0; /* Count the number of X entries to be sent to each process.*/ for (i = fst_row; i < fst_row + m_loc; ++i) { p = row_to_proc[perm[i]]; ++sendcnts[p]; } MPI_Alltoall(sendcnts, 1, MPI_INT, recvcnts, 1, MPI_INT, grid->comm); sdispls[0] = rdispls[0] = 0; sdispls_nrhs[0] = rdispls_nrhs[0] = 0; sendcnts_nrhs[0] = sendcnts[0] * nrhs; recvcnts_nrhs[0] = recvcnts[0] * nrhs; for (i = 1; i < procs; ++i) { sdispls[i] = sdispls[i-1] + sendcnts[i-1]; sdispls_nrhs[i] = sdispls[i] * nrhs; rdispls[i] = rdispls[i-1] + recvcnts[i-1]; rdispls_nrhs[i] = rdispls[i] * nrhs; sendcnts_nrhs[i] = sendcnts[i] * nrhs; recvcnts_nrhs[i] = recvcnts[i] * nrhs; } k = sdispls[procs-1] + sendcnts[procs-1];/* Total number of sends */ l = rdispls[procs-1] + recvcnts[procs-1];/* Total number of recvs */ /*assert(k == m_loc);*/ /*assert(l == m_loc);*/ if ( !(send_ibuf = intMalloc_dist(k + l)) ) ABORT("Malloc fails for send_ibuf[]."); recv_ibuf = send_ibuf + k; if ( !(send_dbuf = doubleMalloc_dist((k + l)*nrhs)) ) ABORT("Malloc fails for send_dbuf[]."); recv_dbuf = send_dbuf + k * nrhs; for (i = 0; i < procs; ++i) { ptr_to_ibuf[i] = sdispls[i]; ptr_to_dbuf[i] = sdispls_nrhs[i]; } /* Fill in the send buffers: send_ibuf[] and send_dbuf[]. */ for (i = fst_row; i < fst_row + m_loc; ++i) { j = perm[i]; p = row_to_proc[j]; send_ibuf[ptr_to_ibuf[p]] = j; j = ptr_to_dbuf[p]; RHS_ITERATE(k) { /* RHS stored in row major in the buffer */ send_dbuf[j++] = X[i-fst_row + k*ldx]; } ++ptr_to_ibuf[p]; ptr_to_dbuf[p] += nrhs; } /* Transfer the (permuted) row indices and numerical values. */ MPI_Alltoallv(send_ibuf, sendcnts, sdispls, mpi_int_t, recv_ibuf, recvcnts, rdispls, mpi_int_t, grid->comm); MPI_Alltoallv(send_dbuf, sendcnts_nrhs, sdispls_nrhs, MPI_DOUBLE, recv_dbuf, recvcnts_nrhs, rdispls_nrhs, MPI_DOUBLE, grid->comm); /* Copy the buffer into b. */ for (i = 0, l = 0; i < m_loc; ++i) { j = recv_ibuf[i] - fst_row; /* Relative row number */ RHS_ITERATE(k) { /* RHS stored in row major in the buffer */ B[j + k*ldb] = recv_dbuf[l++]; } } SUPERLU_FREE(sendcnts); SUPERLU_FREE(send_ibuf); SUPERLU_FREE(send_dbuf);#if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Exit pdPermute_Dense_Matrix()");#endif return 0;} /* pdPermute_Dense_Matrix *//* * Initialize the data structure for the solution phase. */int dSolveInit(superlu_options_t *options, SuperMatrix *A, int_t perm_r[], int_t perm_c[], int_t nrhs, LUstruct_t *LUstruct, gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct){ int_t *row_to_proc, *inv_perm_c, *itemp; NRformat_loc *Astore; int_t i, fst_row, m_loc, p; int procs; Astore = (NRformat_loc *) A->Store; fst_row = Astore->fst_row; m_loc = Astore->m_loc; procs = grid->nprow * grid->npcol; if ( !(row_to_proc = intMalloc_dist(A->nrow)) ) ABORT("Malloc fails for row_to_proc[]"); SOLVEstruct->row_to_proc = row_to_proc; if ( !(inv_perm_c = intMalloc_dist(A->ncol)) ) ABORT("Malloc fails for inv_perm_c[]."); for (i = 0; i < A->ncol; ++i) inv_perm_c[perm_c[i]] = i; SOLVEstruct->inv_perm_c = inv_perm_c; /* ------------------------------------------------------------ EVERY PROCESS NEEDS TO KNOW GLOBAL PARTITION. SET UP THE MAPPING BETWEEN ROWS AND PROCESSES. NOTE: For those processes that do not own any row, it must must be set so that fst_row == A->nrow. ------------------------------------------------------------*/ if ( !(itemp = intMalloc_dist(procs+1)) ) ABORT("Malloc fails for itemp[]"); MPI_Allgather(&fst_row, 1, mpi_int_t, itemp, 1, mpi_int_t, grid->comm); itemp[procs] = A->nrow; for (p = 0; p < procs; ++p) { for (i = itemp[p] ; i < itemp[p+1]; ++i) row_to_proc[i] = p; }#if ( DEBUGlevel>=2 ) if ( !grid->iam ) { printf("fst_row = %d\n", fst_row); PrintInt10("row_to_proc", A->nrow, row_to_proc); PrintInt10("inv_perm_c", A->ncol, inv_perm_c); }#endif SUPERLU_FREE(itemp);#if 0 /* Compute the mapping between rows and processes. */ /* XSL NOTE: What happens if # of mapped processes is smaller than total Procs? For the processes without any row, let fst_row be EMPTY (-1). Make sure this case works! */ MPI_Allgather(&fst_row, 1, mpi_int_t, itemp, 1, mpi_int_t, grid->comm); itemp[procs] = n; for (p = 0; p < procs; ++p) { j = itemp[p]; if ( j != EMPTY ) { k = itemp[p+1]; if ( k == EMPTY ) k = n; for (i = j ; i < k; ++i) row_to_proc[i] = p; } }#endif get_diag_procs(A->ncol, LUstruct->Glu_persist, grid, &SOLVEstruct->num_diag_procs, &SOLVEstruct->diag_procs, &SOLVEstruct->diag_len); if ( !(SOLVEstruct->gstrs_comm = (pxgstrs_comm_t *) SUPERLU_MALLOC(sizeof(pxgstrs_comm_t))) ) ABORT("Malloc fails for gstrs_comm[]"); pxgstrs_init(A->ncol, m_loc, nrhs, fst_row, perm_r, perm_c, grid, LUstruct->Glu_persist, SOLVEstruct); if ( !(SOLVEstruct->gsmv_comm = (pdgsmv_comm_t *) SUPERLU_MALLOC(sizeof(pdgsmv_comm_t))) ) ABORT("Malloc fails for gsmv_comm[]"); SOLVEstruct->A_colind_gsmv = NULL; options->SolveInitialized = YES; return 0;} /* dSolveInit *//* * Release the resources used for the solution phase. */void dSolveFinalize(superlu_options_t *options, SOLVEstruct_t *SOLVEstruct){ int_t *it; pxgstrs_finalize(SOLVEstruct->gstrs_comm); if ( options->RefineInitialized ) { pdgsmv_finalize(SOLVEstruct->gsmv_comm); options->RefineInitialized = NO; } SUPERLU_FREE(SOLVEstruct->gsmv_comm); SUPERLU_FREE(SOLVEstruct->row_to_proc); SUPERLU_FREE(SOLVEstruct->inv_perm_c); SUPERLU_FREE(SOLVEstruct->diag_procs); SUPERLU_FREE(SOLVEstruct->diag_len); if ( it = SOLVEstruct->A_colind_gsmv ) SUPERLU_FREE(it); options->SolveInitialized = NO;} /* dSolveFinalize *//* * Check the inf-norm of the error vector */void pdinf_norm_error(int iam, int_t n, int_t nrhs, double x[], int_t ldx, double xtrue[], int_t ldxtrue, gridinfo_t *grid) { double err, xnorm, temperr, tempxnorm; double *x_work, *xtrue_work; int i, j; for (j = 0; j < nrhs; j++) { x_work = &x[j*ldx]; xtrue_work = &xtrue[j*ldxtrue]; err = xnorm = 0.0; for (i = 0; i < n; i++) { err = SUPERLU_MAX(err, fabs(x_work[i] - xtrue_work[i])); xnorm = SUPERLU_MAX(xnorm, fabs(x_work[i])); } /* get the golbal max err & xnrom */ temperr = err; tempxnorm = xnorm; MPI_Allreduce( &temperr, &err, 1, MPI_DOUBLE, MPI_MAX, grid->comm); MPI_Allreduce( &tempxnorm, &xnorm, 1, MPI_DOUBLE, MPI_MAX, grid->comm); err = err / xnorm; if ( !iam ) printf("\tSol %2d: ||X-Xtrue||/||X|| = %e\n", j, err); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -