📄 pzgssvx_abglobal.c
字号:
dprod = 1.0;#endif if ( job == 5 ) { if ( Equil ) { for (i = 0; i < n; ++i) { R1[i] = exp(R1[i]); C1[i] = exp(C1[i]); } for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; zd_mult(&a[i], &a[i], R1[irow]); /* Scale rows. */ zd_mult(&a[i], &a[i], C1[j]); /* Scale columns. */ rowind[i] = perm_r[irow];#if ( PRNTlevel>=2 ) if ( rowind[i] == j ) /* New diagonal */ dprod *= z_abs1(&a[i]);#endif } } /* Multiply together the scaling factors. */ if ( rowequ ) for (i = 0; i < m; ++i) R[i] *= R1[i]; else for (i = 0; i < m; ++i) R[i] = R1[i]; if ( colequ ) for (i = 0; i < n; ++i) C[i] *= C1[i]; else for (i = 0; i < n; ++i) C[i] = C1[i]; ScalePermstruct->DiagScale = BOTH; rowequ = colequ = 1; } else { /* No equilibration. */ for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; rowind[i] = perm_r[irow]; } } } SUPERLU_FREE (R1); SUPERLU_FREE (C1); } else { /* job = 2,3,4 */ for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; rowind[i] = perm_r[irow];#if ( PRNTlevel>=2 ) if ( rowind[i] == j ) { /* New diagonal */ if ( job == 2 || job == 3 ) dmin = SUPERLU_MIN(dmin, z_abs1(&a[i])); else if ( job == 4 ) dsum += z_abs1(&a[i]); else if ( job == 5 ) dprod *= z_abs1(&a[i]); }#endif } } }#if ( PRNTlevel>=2 ) if ( job == 2 || job == 3 ) { if ( !iam ) printf("\tsmallest diagonal %e\n", dmin); } else if ( job == 4 ) { if ( !iam ) printf("\tsum of diagonal %e\n", dsum); } else if ( job == 5 ) { if ( !iam ) printf("\t product of diagonal %e\n", dprod); }#endif } /* else !factored */ t = SuperLU_timer_() - t; stat->utime[ROWPERM] = t; } /* if options->RowPerm ... */ if ( !factored || options->IterRefine ) { /* Compute norm(A), which will be used to adjust small diagonal. */ if ( notran ) *(unsigned char *)norm = '1'; else *(unsigned char *)norm = 'I'; anorm = zlangs_dist(norm, A); } /* ------------------------------------------------------------ Perform the LU factorization. ------------------------------------------------------------*/ if ( !factored ) { t = SuperLU_timer_(); /* * Get column permutation vector perm_c[], according to permc_spec: * permc_spec = NATURAL: natural ordering * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A * permc_spec = MMD_ATA: minimum degree on structure of A'*A * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] */ permc_spec = options->ColPerm; if ( permc_spec != MY_PERMC && Fact == DOFACT ) /* Use an ordering provided by SuperLU */ get_perm_c_dist(iam, permc_spec, A, perm_c); /* Compute the elimination tree of Pc*(A'+A)*Pc' or Pc*A'*A*Pc' (a.k.a. column etree), depending on the choice of ColPerm. Adjust perm_c[] to be consistent with a postorder of etree. Permute columns of A to form A*Pc'. */ sp_colorder(options, A, perm_c, etree, &AC); /* Form Pc*A*Pc' to preserve the diagonal of the matrix Pr*A. */ ACstore = AC.Store; for (j = 0; j < n; ++j) for (i = ACstore->colbeg[j]; i < ACstore->colend[j]; ++i) { irow = ACstore->rowind[i]; ACstore->rowind[i] = perm_c[irow]; } stat->utime[COLPERM] = SuperLU_timer_() - t; /* Perform a symbolic factorization on matrix A and set up the nonzero data structures which are suitable for supernodal GENP. */ if ( Fact != SamePattern_SameRowPerm ) {#if ( PRNTlevel>=1 ) if ( !iam ) printf(".. symbfact(): relax %4d, maxsuper %4d, fill %4d\n", sp_ienv_dist(2), sp_ienv_dist(3), sp_ienv_dist(6));#endif t = SuperLU_timer_(); if ( !(Glu_freeable = (Glu_freeable_t *) SUPERLU_MALLOC(sizeof(Glu_freeable_t))) ) ABORT("Malloc fails for Glu_freeable."); iinfo = symbfact(options, iam, &AC, perm_c, etree, Glu_persist, Glu_freeable); stat->utime[SYMBFAC] = SuperLU_timer_() - t; if ( iinfo < 0 ) { QuerySpace_dist(n, -iinfo, Glu_freeable, &symb_mem_usage);#if ( PRNTlevel>=1 ) if ( !iam ) { printf("\tNo of supers %ld\n", Glu_persist->supno[n-1]+1); printf("\tSize of G(L) %ld\n", Glu_freeable->xlsub[n]); printf("\tSize of G(U) %ld\n", Glu_freeable->xusub[n]); printf("\tint %d, short %d, float %d, double %d\n", sizeof(int_t), sizeof(short), sizeof(float), sizeof(double)); printf("\tSYMBfact (MB):\tL\\U %.2f\ttotal %.2f\texpansions %d\n", symb_mem_usage.for_lu*1e-6, symb_mem_usage.total*1e-6, symb_mem_usage.expansions); }#endif } else { if ( !iam ) { fprintf(stderr, "symbfact() error returns %d\n", iinfo); exit(-1); } } } /* Distribute the L and U factors onto the process grid. */ t = SuperLU_timer_(); dist_mem_use = zdistribute(Fact, n, &AC, Glu_freeable, LUstruct, grid); stat->utime[DIST] = SuperLU_timer_() - t; /* Deallocate storage used in symbolic factor. */ if ( Fact != SamePattern_SameRowPerm ) { iinfo = symbfact_SubFree(Glu_freeable); SUPERLU_FREE(Glu_freeable); } /* Perform numerical factorization in parallel. */ t = SuperLU_timer_(); pzgstrf(options, m, n, anorm, LUstruct, grid, stat, info); stat->utime[FACT] = SuperLU_timer_() - t;#if ( PRNTlevel>=1 ) { int_t TinyPivots; float for_lu, total, max, avg, temp; zQuerySpace_dist(n, LUstruct, grid, &num_mem_usage); MPI_Reduce( &num_mem_usage.for_lu, &for_lu, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Reduce( &num_mem_usage.total, &total, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); temp = SUPERLU_MAX(symb_mem_usage.total, symb_mem_usage.for_lu + (float)dist_mem_use + num_mem_usage.for_lu); temp = SUPERLU_MAX(temp, num_mem_usage.total); MPI_Reduce( &temp, &max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm ); MPI_Reduce( &temp, &avg, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Allreduce( &stat->TinyPivots, &TinyPivots, 1, mpi_int_t, MPI_SUM, grid->comm ); stat->TinyPivots = TinyPivots; if ( !iam ) { printf("\tNUMfact (MB) all PEs:\tL\\U\t%.2f\tall\t%.2f\n", for_lu*1e-6, total*1e-6); printf("\tAll space (MB):" "\t\ttotal\t%.2f\tAvg\t%.2f\tMax\t%.2f\n", avg*1e-6, avg/grid->nprow/grid->npcol*1e-6, max*1e-6); printf("\tNumber of tiny pivots: %10d\n", stat->TinyPivots); } }#endif #if ( PRNTlevel>=2 ) if ( !iam ) printf(".. pzgstrf INFO = %d\n", *info);#endif } else if ( options->IterRefine ) { /* options->Fact==FACTORED */ /* Permute columns of A to form A*Pc' using the existing perm_c. * NOTE: rows of A were previously permuted to Pc*A. */ sp_colorder(options, A, perm_c, NULL, &AC); } /* if !factored ... */ /* ------------------------------------------------------------ Compute the solution matrix X. ------------------------------------------------------------*/ if ( nrhs ) { if ( !(b_work = doublecomplexMalloc_dist(n)) ) ABORT("Malloc fails for b_work[]"); /* ------------------------------------------------------------ Scale the right-hand side if equilibration was performed. ------------------------------------------------------------*/ if ( notran ) { if ( rowequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < m; ++i) zd_mult(&b_col[i], &b_col[i], R[i]); b_col += ldb; } } } else if ( colequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < m; ++i) zd_mult(&b_col[i], &b_col[i], C[i]); b_col += ldb; } } /* ------------------------------------------------------------ Permute the right-hand side to form Pr*B. ------------------------------------------------------------*/ if ( options->RowPerm != NO ) { if ( notran ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < m; ++i) b_work[perm_r[i]] = b_col[i]; for (i = 0; i < m; ++i) b_col[i] = b_work[i]; b_col += ldb; } } } /* ------------------------------------------------------------ Permute the right-hand side to form Pc*B. ------------------------------------------------------------*/ if ( notran ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < m; ++i) b_work[perm_c[i]] = b_col[i]; for (i = 0; i < m; ++i) b_col[i] = b_work[i]; b_col += ldb; } } /* Save a copy of the right-hand side. */ ldx = ldb; if ( !(X = doublecomplexMalloc_dist(((size_t)ldx) * nrhs)) ) ABORT("Malloc fails for X[]"); x_col = X; b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < ldb; ++i) x_col[i] = b_col[i]; x_col += ldx; b_col += ldb; } /* ------------------------------------------------------------ Solve the linear system. ------------------------------------------------------------*/ pzgstrs_Bglobal(n, LUstruct, grid, X, ldb, nrhs, stat, info); /* ------------------------------------------------------------ Use iterative refinement to improve the computed solution and compute error bounds and backward error estimates for it. ------------------------------------------------------------*/ if ( options->IterRefine ) { /* Improve the solution by iterative refinement. */ t = SuperLU_timer_(); pzgsrfs_ABXglobal(n, &AC, anorm, LUstruct, grid, B, ldb, X, ldx, nrhs, berr, stat, info); stat->utime[REFINE] = SuperLU_timer_() - t; } /* Permute the solution matrix X <= Pc'*X. */ for (j = 0; j < nrhs; j++) { b_col = &B[j*ldb]; x_col = &X[j*ldx]; for (i = 0; i < n; ++i) b_col[i] = x_col[perm_c[i]]; } /* Transform the solution matrix X to a solution of the original system before the equilibration. */ if ( notran ) { if ( colequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < n; ++i) zd_mult(&b_col[i], &b_col[i], C[i]); b_col += ldb; } } } else if ( rowequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < n; ++i) zd_mult(&b_col[i], &b_col[i], R[i]); b_col += ldb; } } SUPERLU_FREE(b_work); SUPERLU_FREE(X); } /* end if nrhs != 0 */#if ( PRNTlevel>=1 ) if ( !iam ) printf(".. DiagScale = %d\n", ScalePermstruct->DiagScale);#endif /* Deallocate storage. */ if ( Equil && Fact != SamePattern_SameRowPerm ) { switch ( ScalePermstruct->DiagScale ) { case NOEQUIL: SUPERLU_FREE(R); SUPERLU_FREE(C); break; case ROW: SUPERLU_FREE(C); break; case COL: SUPERLU_FREE(R); break; } } if ( !factored || (factored && options->IterRefine) ) Destroy_CompCol_Permuted_dist(&AC);#if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pzgssvx_ABglobal()");#endif}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -