📄 pzgssvx_abglobal.c
字号:
* Column permutation vector, which defines the * permutation matrix Pc; perm_c[i] = j means column i of A is * in position j in A*Pc. * If options->ColPerm = MY_PERMC or options->Fact = SamePattern * or options->Fact = SamePattern_SameRowPerm, perm_c is an * input argument; otherwise, it is an output argument. * On exit, perm_c may be overwritten by the product of the input * perm_c and a permutation that postorders the elimination tree * of Pc*A'*A*Pc'; perm_c is not changed if the elimination tree * is already in postorder. * * o R (double*) dimension (A->nrow) * The row scale factors for A. * If DiagScale = ROW or BOTH, A is multiplied on the left by * diag(R). * If DiagScale = NOEQUIL or COL, R is not defined. * If options->Fact = FACTORED or SamePattern_SameRowPerm, R is * an input argument; otherwise, R is an output argument. * * o C (double*) dimension (A->ncol) * The column scale factors for A. * If DiagScale = COL or BOTH, A is multiplied on the right by * diag(C). * If DiagScale = NOEQUIL or ROW, C is not defined. * If options->Fact = FACTORED or SamePattern_SameRowPerm, C is * an input argument; otherwise, C is an output argument. * * B (input/output) doublecomplex* * On entry, the right-hand side matrix of dimension (A->nrow, nrhs). * On exit, the solution matrix if info = 0; * * NOTE: Currently, B must reside in all processes when calling * this routine. * * ldb (input) int (global) * The leading dimension of matrix B. * * nrhs (input) int (global) * The number of right-hand sides. * If nrhs = 0, only LU decomposition is performed, the forward * and back substitutions are skipped. * * grid (input) gridinfo_t* * The 2D process mesh. It contains the MPI communicator, the number * of process rows (NPROW), the number of process columns (NPCOL), * and my process rank. It is an input argument to all the * parallel routines. * Grid can be initialized by subroutine SUPERLU_GRIDINIT. * See superlu_zdefs.h for the definition of 'gridinfo_t'. * * LUstruct (input/output) LUstruct_t* * The data structures to store the distributed L and U factors. * It contains the following fields: * * o etree (int*) dimension (A->ncol) * Elimination tree of Pc*(A'+A)*Pc' or Pc*A'*A*Pc', dimension A->ncol. * It is computed in sp_colorder() during the first factorization, * and is reused in the subsequent factorizations of the matrices * with the same nonzero pattern. * On exit of sp_colorder(), the columns of A are permuted so that * the etree is in a certain postorder. This postorder is reflected * in ScalePermstruct->perm_c. * NOTE: * Etree is a vector of parent pointers for a forest whose vertices * are the integers 0 to A->ncol-1; etree[root]==A->ncol. * * o Glu_persist (Glu_persist_t*) * Global data structure (xsup, supno) replicated on all processes, * describing the supernode partition in the factored matrices * L and U: * xsup[s] is the leading column of the s-th supernode, * supno[i] is the supernode number to which column i belongs. * * o Llu (LocalLU_t*) * The distributed data structures to store L and U factors. * See superlu_ddefs.h for the definition of 'LocalLU_t'. * * berr (output) double*, dimension (nrhs) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any element of A or B that makes X(j) an exact solution). * * stat (output) SuperLUStat_t* * Record the statistics on runtime and floating-point operation count. * See util.h for the definition of 'SuperLUStat_t'. * * info (output) int* * = 0: successful exit * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly singular, * so the solution could not be computed. * > A->ncol: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. * * * See superlu_zdefs.h for the definitions of various data types. * */ SuperMatrix AC; NCformat *Astore; NCPformat *ACstore; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; Glu_freeable_t *Glu_freeable; /* The nonzero structures of L and U factors, which are replicated on all processrs. (lsub, xlsub) contains the compressed subscript of supernodes in L. (usub, xusub) contains the compressed subscript of nonzero segments in U. If options->Fact != SamePattern_SameRowPerm, they are computed by SYMBFACT routine, and then used by DDISTRIBUTE routine. They will be freed after DDISTRIBUTE routine. If options->Fact == SamePattern_SameRowPerm, these structures are not used. */ fact_t Fact; doublecomplex *a; int_t *perm_r; /* row permutations from partial pivoting */ int_t *perm_c; /* column permutation vector */ int_t *etree; /* elimination tree */ int_t *colptr, *rowind; int_t colequ, Equil, factored, job, notran, rowequ; int_t i, iinfo, j, irow, m, n, nnz, permc_spec, dist_mem_use; int iam; int ldx; /* LDA for matrix X (global). */ char equed[1], norm[1]; double *C, *R, *C1, *R1, amax, anorm, colcnd, rowcnd; doublecomplex *X, *b_col, *b_work, *x_col; double t; static mem_usage_t num_mem_usage, symb_mem_usage;#if ( PRNTlevel>= 2 ) double dmin, dsum, dprod;#endif /* Test input parameters. */ *info = 0; Fact = options->Fact; if ( Fact < 0 || Fact > FACTORED ) *info = -1; else if ( options->RowPerm < 0 || options->RowPerm > MY_PERMR ) *info = -1; else if ( options->ColPerm < 0 || options->ColPerm > MY_PERMC ) *info = -1; else if ( options->IterRefine < 0 || options->IterRefine > EXTRA ) *info = -1; else if ( options->IterRefine == EXTRA ) { *info = -1; fprintf(stderr, "Extra precise iterative refinement yet to support."); } else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NC || A->Dtype != SLU_Z || A->Mtype != SLU_GE ) *info = -2; else if ( ldb < A->nrow ) *info = -5; else if ( nrhs < 0 ) *info = -6; if ( *info ) { i = -(*info); pxerbla("pzgssvx_ABglobal", grid, -*info); return; } /* Initialization */ factored = (Fact == FACTORED); Equil = (!factored && options->Equil == YES); notran = (options->Trans == NOTRANS); iam = grid->iam; job = 5; m = A->nrow; n = A->ncol; Astore = A->Store; nnz = Astore->nnz; a = Astore->nzval; colptr = Astore->colptr; rowind = Astore->rowind; if ( factored || (Fact == SamePattern_SameRowPerm && Equil) ) { rowequ = (ScalePermstruct->DiagScale == ROW) || (ScalePermstruct->DiagScale == BOTH); colequ = (ScalePermstruct->DiagScale == COL) || (ScalePermstruct->DiagScale == BOTH); } else rowequ = colequ = FALSE;#if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pzgssvx_ABglobal()");#endif perm_r = ScalePermstruct->perm_r; perm_c = ScalePermstruct->perm_c; etree = LUstruct->etree; R = ScalePermstruct->R; C = ScalePermstruct->C; if ( Equil ) { /* Allocate storage if not done so before. */ switch ( ScalePermstruct->DiagScale ) { case NOEQUIL: if ( !(R = (double *) doubleMalloc_dist(m)) ) ABORT("Malloc fails for R[]."); if ( !(C = (double *) doubleMalloc_dist(n)) ) ABORT("Malloc fails for C[]."); ScalePermstruct->R = R; ScalePermstruct->C = C; break; case ROW: if ( !(C = (double *) doubleMalloc_dist(n)) ) ABORT("Malloc fails for C[]."); ScalePermstruct->C = C; break; case COL: if ( !(R = (double *) doubleMalloc_dist(m)) ) ABORT("Malloc fails for R[]."); ScalePermstruct->R = R; break; } } /* ------------------------------------------------------------ Diagonal scaling to equilibrate the matrix. ------------------------------------------------------------*/ if ( Equil ) {#if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter equil");#endif t = SuperLU_timer_(); if ( Fact == SamePattern_SameRowPerm ) { /* Reuse R and C. */ switch ( ScalePermstruct->DiagScale ) { case NOEQUIL: break; case ROW: for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; zd_mult(&a[i], &a[i], R[i]); /* Scale rows. */ } } break; case COL: for (j = 0; j < n; ++j) for (i = colptr[j]; i < colptr[j+1]; ++i) zd_mult(&a[i], &a[i], C[j]); /* Scale columns. */ break; case BOTH: for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; zd_mult(&a[i], &a[i], R[irow]); /* Scale rows. */ zd_mult(&a[i], &a[i], C[j]); /* Scale columns. */ } } break; } } else { if ( !iam ) { /* Compute row and column scalings to equilibrate matrix A. */ zgsequ_dist(A, R, C, &rowcnd, &colcnd, &amax, &iinfo); MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm ); if ( iinfo == 0 ) { MPI_Bcast( R, m, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( C, n, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &rowcnd, 1, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &colcnd, 1, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &amax, 1, MPI_DOUBLE, 0, grid->comm ); } else { if ( iinfo > 0 ) { if ( iinfo <= m ) fprintf(stderr, "The %d-th row of A is exactly zero\n", iinfo); else fprintf(stderr, "The %d-th column of A is exactly zero\n", iinfo-n); exit(-1); } } } else { MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm ); if ( iinfo == 0 ) { MPI_Bcast( R, m, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( C, n, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &rowcnd, 1, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &colcnd, 1, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &amax, 1, MPI_DOUBLE, 0, grid->comm ); } else { ABORT("ZGSEQU failed\n"); } } /* Equilibrate matrix A. */ zlaqgs_dist(A, R, C, rowcnd, colcnd, amax, equed); if ( lsame_(equed, "R") ) { ScalePermstruct->DiagScale = rowequ = ROW; } else if ( lsame_(equed, "C") ) { ScalePermstruct->DiagScale = colequ = COL; } else if ( lsame_(equed, "B") ) { ScalePermstruct->DiagScale = BOTH; rowequ = ROW; colequ = COL; } else ScalePermstruct->DiagScale = NOEQUIL;#if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. equilibrated? *equed = %c\n", *equed); /*fflush(stdout);*/ }#endif } /* if Fact ... */ stat->utime[EQUIL] = SuperLU_timer_() - t;#if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit equil");#endif } /* if Equil ... */ /* ------------------------------------------------------------ Permute rows of A. ------------------------------------------------------------*/ if ( options->RowPerm != NO ) { t = SuperLU_timer_(); if ( Fact == SamePattern_SameRowPerm /* Reuse perm_r. */ || options->RowPerm == MY_PERMR ) { /* Use my perm_r. */ for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; rowind[i] = perm_r[irow]; } } } else if ( !factored ) { if ( job == 5 ) { /* Allocate storage for scaling factors. */ if ( !(R1 = (double *) SUPERLU_MALLOC(m * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for R1[]"); if ( !(C1 = (double *) SUPERLU_MALLOC(n * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for C1[]"); } if ( !iam ) { /* Process 0 finds a row permutation for large diagonal. */ zldperm(job, m, nnz, colptr, rowind, a, perm_r, R1, C1); MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm ); if ( job == 5 && Equil ) { MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm ); } } else { MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm ); if ( job == 5 && Equil ) { MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm ); } }#if ( PRNTlevel>=2 ) dmin = dlamch_("Overflow"); dsum = 0.0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -