📄 pzsymbfact_distdata.c
字号:
*/{ Glu_persist_t *Glu_persist = LUstruct->Glu_persist; Glu_freeable_t Glu_freeable_n; LocalLU_t *Llu = LUstruct->Llu; int_t bnnz, fsupc, i, irow, istart, j, jb, jj, k, len, len1, nsupc, nsupc_gb, ii, nprocs; int_t ljb; /* local block column number */ int_t nrbl; /* number of L blocks in current block column */ int_t nrbu; /* number of U blocks in current block column */ int_t gb; /* global block number; 0 < gb <= nsuper */ int_t lb; /* local block number; 0 < lb <= ceil(NSUPERS/Pr) */ int iam, jbrow, jbcol, jcol, kcol, mycol, myrow, pc, pr, ljb_i, ljb_j, p; int_t mybufmax[NBUFFERS]; NRformat_loc *Astore; doublecomplex *a; int_t *asub, *xa; int_t *ainf_colptr, *ainf_rowind, *asup_rowptr, *asup_colind; doublecomplex *asup_val, *ainf_val; int_t *xsup, *supno; /* supernode and column mapping */ int_t *lsub, *xlsub, *usub, *xusub; int_t nsupers, nsupers_i, nsupers_j, nsupers_ij; int_t next_ind; /* next available position in index[*] */ int_t next_val; /* next available position in nzval[*] */ int_t *index; /* indices consist of headers and row subscripts */ doublecomplex *lusup, *uval; /* nonzero values in L and U */ int_t *recvBuf; int *ptrToRecv, *nnzToRecv, *ptrToSend, *nnzToSend; doublecomplex **Lnzval_bc_ptr; /* size ceil(NSUPERS/Pc) */ int_t **Lrowind_bc_ptr; /* size ceil(NSUPERS/Pc) */ doublecomplex **Unzval_br_ptr; /* size ceil(NSUPERS/Pr) */ int_t **Ufstnz_br_ptr; /* size ceil(NSUPERS/Pr) */ /*-- Counts to be used in factorization. --*/ int_t *ToRecv, *ToSendD, **ToSendR; /*-- Counts to be used in lower triangular solve. --*/ int_t *fmod; /* Modification count for L-solve. */ int_t **fsendx_plist; /* Column process list to send down Xk. */ int_t nfrecvx = 0; /* Number of Xk I will receive. */ int_t nfsendx = 0; /* Number of Xk I will send */ int_t kseen; /*-- Counts to be used in upper triangular solve. --*/ int_t *bmod; /* Modification count for U-solve. */ int_t **bsendx_plist; /* Column process list to send down Xk. */ int_t nbrecvx = 0; /* Number of Xk I will receive. */ int_t nbsendx = 0; /* Number of Xk I will send */ int_t *ilsum; /* starting position of each supernode in the full array (local) */ int_t *ilsum_j, ldaspa_j; /* starting position of each supernode in the full array (local, block column wise) */ /*-- Auxiliary arrays; freed on return --*/ int_t *Urb_marker; /* block hit marker; size ceil(NSUPERS/Pr) */ int_t *LUb_length; /* L,U block length; size nsupers_ij */ int_t *LUb_indptr; /* pointers to L,U index[]; size nsupers_ij */ int_t *LUb_number; /* global block number; size nsupers_ij */ int_t *LUb_valptr; /* pointers to U nzval[]; size ceil(NSUPERS/Pc) */ int_t *Lrb_marker; /* block hit marker; size ceil(NSUPERS/Pr) */ doublecomplex *dense, *dense_col; /* SPA */ doublecomplex zero = {0.0, 0.0}; int_t ldaspa; /* LDA of SPA */ int_t iword, dword; float memStrLU, memA, memDist = 0.; /* memory used for redistributing the data, which does not include the memory for the numerical values of L and U */ float memNLU = 0.; /* memory allocated for storing the numerical values of L and U, that will be used in the numeric factorization */#if ( PRNTlevel>=1 ) int_t nLblocks = 0, nUblocks = 0;#endif /* Initialization. */ iam = grid->iam;#if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter dist_psymbtonum()");#endif myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); nprocs = grid->npcol * grid->nprow; for (i = 0; i < NBUFFERS; ++i) mybufmax[i] = 0; Astore = (NRformat_loc *) A->Store; iword = sizeof(int_t); dword = sizeof(doublecomplex); if (fact == SamePattern_SameRowPerm) { ABORT ("ERROR: call of dist_psymbtonum with fact equals SamePattern_SameRowPerm."); } if ((memStrLU = dist_symbLU (n, Pslu_freeable, Glu_persist, &xlsub, &lsub, &xusub, &usub, grid)) > 0) return (memStrLU); memDist += (-memStrLU); xsup = Glu_persist->xsup; /* supernode and column mapping */ supno = Glu_persist->supno; nsupers = supno[n-1] + 1; nsupers_i = CEILING( nsupers, grid->nprow );/* No of local row blocks */ nsupers_j = CEILING( nsupers, grid->npcol );/* No of local column blocks */ nsupers_ij = SUPERLU_MAX(nsupers_i, nsupers_j); if ( !(ilsum = intMalloc_dist(nsupers_i+1)) ) { fprintf (stderr, "Malloc fails for ilsum[]."); return (memDist + memNLU); } memNLU += (nsupers_i+1) * iword; if ( !(ilsum_j = intMalloc_dist(nsupers_j+1)) ) { fprintf (stderr, "Malloc fails for ilsum_j[]."); return (memDist + memNLU); } memDist += (nsupers_j+1) * iword; /* Compute ldaspa and ilsum[], ldaspa_j and ilsum_j[]. */ ilsum[0] = 0; ldaspa = 0; for (gb = 0; gb < nsupers; gb++) if ( myrow == PROW( gb, grid ) ) { i = SuperSize( gb ); ldaspa += i; lb = LBi( gb, grid ); ilsum[lb + 1] = ilsum[lb] + i; } ilsum[nsupers_i] = ldaspa; ldaspa_j = 0; ilsum_j[0] = 0; for (gb = 0; gb < nsupers; gb++) if (mycol == PCOL( gb, grid )) { i = SuperSize( gb ); ldaspa_j += i; lb = LBj( gb, grid ); ilsum_j[lb + 1] = ilsum_j[lb] + i; } ilsum_j[nsupers_j] = ldaspa_j; if ((memA = zdist_A(A, ScalePermstruct, Glu_persist, grid, &ainf_colptr, &ainf_rowind, &ainf_val, &asup_rowptr, &asup_colind, &asup_val, ilsum, ilsum_j)) > 0) return (memDist + memA + memNLU); memDist += (-memA); /* ------------------------------------------------------------ FIRST TIME CREATING THE L AND U DATA STRUCTURES. ------------------------------------------------------------*/ /* We first need to set up the L and U data structures and then * propagate the values of A into them. */ if ( !(ToRecv = intCalloc_dist(nsupers)) ) { fprintf(stderr, "Calloc fails for ToRecv[]."); return (memDist + memNLU); } memNLU += nsupers * iword; k = CEILING( nsupers, grid->npcol ); /* Number of local column blocks */ if ( !(ToSendR = (int_t **) SUPERLU_MALLOC(k*sizeof(int_t*))) ) { fprintf(stderr, "Malloc fails for ToSendR[]."); return (memDist + memNLU); } memNLU += k*sizeof(int_t*); j = k * grid->npcol; if ( !(index = intMalloc_dist(j)) ) { fprintf(stderr, "Malloc fails for index[]."); return (memDist + memNLU); } memNLU += j*iword; for (i = 0; i < j; ++i) index[i] = EMPTY; for (i = 0,j = 0; i < k; ++i, j += grid->npcol) ToSendR[i] = &index[j]; /* Auxiliary arrays used to set up L and U block data structures. They are freed on return. */ if ( !(LUb_length = intCalloc_dist(nsupers_ij)) ) { fprintf(stderr, "Calloc fails for LUb_length[]."); return (memDist + memNLU); } if ( !(LUb_indptr = intMalloc_dist(nsupers_ij)) ) { fprintf(stderr, "Malloc fails for LUb_indptr[]."); return (memDist + memNLU); } if ( !(LUb_number = intCalloc_dist(nsupers_ij)) ) { fprintf(stderr, "Calloc fails for LUb_number[]."); return (memDist + memNLU); } if ( !(LUb_valptr = intCalloc_dist(nsupers_ij)) ) { fprintf(stderr, "Calloc fails for LUb_valptr[]."); return (memDist + memNLU); } memDist += 4 * nsupers_ij * iword; k = CEILING( nsupers, grid->nprow ); /* Pointers to the beginning of each block row of U. */ if ( !(Unzval_br_ptr = (doublecomplex**)SUPERLU_MALLOC(nsupers_i * sizeof(doublecomplex*))) ) { fprintf(stderr, "Malloc fails for Unzval_br_ptr[]."); return (memDist + memNLU); } if ( !(Ufstnz_br_ptr = (int_t**)SUPERLU_MALLOC(nsupers_i * sizeof(int_t*))) ) { fprintf(stderr, "Malloc fails for Ufstnz_br_ptr[]."); return (memDist + memNLU); } memNLU += nsupers_i*sizeof(doublecomplex*) + nsupers_i*sizeof(int_t*); Unzval_br_ptr[nsupers_i-1] = NULL; Ufstnz_br_ptr[nsupers_i-1] = NULL; if ( !(ToSendD = intCalloc_dist(nsupers_i)) ) { fprintf(stderr, "Malloc fails for ToSendD[]."); return (memDist + memNLU); } memNLU += nsupers_i*iword; if ( !(Urb_marker = intCalloc_dist(nsupers_j))) { fprintf(stderr, "Calloc fails for rb_marker[]."); return (memDist + memNLU); } if ( !(Lrb_marker = intCalloc_dist( nsupers_i ))) { fprintf(stderr, "Calloc fails for rb_marker[]."); return (memDist + memNLU); } memDist += (nsupers_i + nsupers_j)*iword; /* Auxiliary arrays used to set up L, U block data structures. They are freed on return. k is the number of local row blocks. */ if ( !(dense = doublecomplexCalloc_dist(SUPERLU_MAX(ldaspa, ldaspa_j) * sp_ienv_dist(3))) ) { fprintf(stderr, "Calloc fails for SPA dense[]."); return (memDist + memNLU); } /* These counts will be used for triangular solves. */ if ( !(fmod = intCalloc_dist(nsupers_i)) ) { fprintf(stderr, "Calloc fails for fmod[]."); return (memDist + memNLU); } if ( !(bmod = intCalloc_dist(nsupers_i)) ) { fprintf(stderr, "Calloc fails for bmod[]."); return (memDist + memNLU); } /* ------------------------------------------------ */ memNLU += 2*nsupers_i*iword + SUPERLU_MAX(ldaspa, ldaspa_j)*sp_ienv_dist(3)*dword; /* Pointers to the beginning of each block column of L. */ if ( !(Lnzval_bc_ptr = (doublecomplex**)SUPERLU_MALLOC(nsupers_j * sizeof(doublecomplex*))) ) { fprintf(stderr, "Malloc fails for Lnzval_bc_ptr[]."); return (memDist + memNLU); } if ( !(Lrowind_bc_ptr = (int_t**)SUPERLU_MALLOC(nsupers_j * sizeof(int_t*))) ) { fprintf(stderr, "Malloc fails for Lrowind_bc_ptr[]."); return (memDist + memNLU); } memNLU += nsupers_j * sizeof(doublecomplex*) + nsupers_j * sizeof(int_t*); Lnzval_bc_ptr[nsupers_j-1] = NULL; Lrowind_bc_ptr[nsupers_j-1] = NULL; /* These lists of processes will be used for triangular solves. */ if ( !(fsendx_plist = (int_t **) SUPERLU_MALLOC(nsupers_j*sizeof(int_t*))) ) { fprintf(stderr, "Malloc fails for fsendx_plist[]."); return (memDist + memNLU); } len = nsupers_j * grid->nprow; if ( !(index = intMalloc_dist(len)) ) { fprintf(stderr, "Malloc fails for fsendx_plist[0]"); return (memDist + memNLU); } for (i = 0; i < len; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < nsupers_j; ++i, j += grid->nprow) fsendx_plist[i] = &index[j]; if ( !(bsendx_plist = (int_t **) SUPERLU_MALLOC(nsupers_j*sizeof(int_t*))) ) { fprintf(stderr, "Malloc fails for bsendx_plist[]."); return (memDist + memNLU); } if ( !(index = intMalloc_dist(len)) ) { fprintf(stderr, "Malloc fails for bsendx_plist[0]"); return (memDist + memNLU); } for (i = 0; i < len; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < nsupers_j; ++i, j += grid->nprow) bsendx_plist[i] = &index[j]; /* -------------------------------------------------------------- */ memNLU += 2*nsupers_j*sizeof(int_t*) + 2*len*iword; /*------------------------------------------------------------ PROPAGATE ROW SUBSCRIPTS AND VALUES OF A INTO L AND U BLOCKS. THIS ACCOUNTS FOR ONE-PASS PROCESSING OF A, L AND U. ------------------------------------------------------------*/ for (jb = 0; jb < nsupers; jb++) { jbcol = PCOL( jb, grid ); jbrow = PROW( jb, grid ); ljb_j = LBj( jb, grid ); /* Local block number column wise */ ljb_i = LBi( jb, grid); /* Local block number row wise */ fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); if ( myrow == jbrow ) { /* Block row jb in my process row */ /* Scatter A into SPA. */ for (j = ilsum[ljb_i], dense_col = dense; j < ilsum[ljb_i]+nsupc; j++) { for (i = asup_rowptr[j]; i < asup_rowptr[j+1]; i++) { if (i >= asup_rowptr[ilsum[nsupers_i]]) printf ("ERR7\n"); jcol = asup_colind[i]; if (jcol >= n) printf ("Pe[%d] ERR distsn jb %d gb %d j %d jcol %d\n", iam, jb, gb, j, jcol); gb = BlockNum( jcol ); lb = LBj( gb, grid ); if (gb >= nsupers || lb >= nsupers_j) printf ("ERR8\n"); jcol = ilsum_j[lb] + jcol - FstBlockC( gb ); if (jcol >= ldaspa_j) printf ("Pe[%d] ERR1 jb %d gb %d j %d jcol %d\n", iam, jb, gb, j, jcol); dense_col[jcol] = asup_val[i]; } dense_col += ldaspa_j; } /*------------------------------------------------ * SET UP U BLOCKS. *------------------------------------------------*/ /* Count number of blocks and length of each block. */ nrbu = 0; len = 0; /* Number of column subscripts I own. */ len1 = 0; /* number of fstnz subscripts */ for (i = xusub[ljb_i]; i < xusub[ljb_i+1]; i++) { if (i >= xusub[nsupers_i]) printf ("ERR10\n"); jcol = usub[i]; gb = BlockNum( jcol ); /* Global block number */ /*if (fsupc <= 146445 && 146445 < fsupc + nsupc && jcol == 397986) printf ("Pe[%d] [%d %d] elt [%d] jbcol %d pc %d\n", iam, jb, gb, jcol, jbcol, pc); */ lb = LBj( gb, grid ); /* Local block number */ pc = PCOL( gb, grid ); /* Process col owning this block */ if (mycol == jbcol) ToSendR[ljb_j][pc] = YES; /* if (mycol == jbcol && mycol != pc) ToSendR[ljb_j][pc] = YES; */ pr = PROW( gb, grid ); if ( pr != jbrow && mycol == pc) bsendx_plist[lb][jbrow] = YES; if (mycol == pc) { len += nsupc; LUb_length[lb] += nsupc; ToSendD[ljb_i] = YES; if (Urb_marker[lb] <= jb) { /* First see this block */ if (Urb_marker[lb] == FALSE && gb != jb && myrow != pr) nbrecvx ++; Urb_marker[lb] = jb + 1; LUb_number[nrbu] = gb; /* if (gb == 391825 && jb == 145361) printf ("Pe[%d] T1 [%d %d] nrbu %d \n", iam, jb, gb, nrbu); */ nrbu ++; len1 += SuperSize( gb ); if ( gb != jb )/* Exclude diagonal block. */ ++bmod[ljb_i];/* Mod. count for back solve */#if ( PRNTlevel>=1 ) ++nUblocks;#endif } } } /* for i ... */ if ( nrbu ) { /* Sort the blocks of U in increasing block column index. SuperLU_DIST assumes this is true */ /* simple insert sort algorithm */ /* to be transformed in quick sort */ for (j = 1; j < nrbu; j++) { k = LUb_number[j]; for (i=j-1; i>=0 && LUb_number[i] > k; i--) { LUb_number[i+1] = LUb_number[i]; } LUb_number[i+1] = k; } /* Set up the initial pointers for each block in index[] and nzval[]. */ /* Add room for descriptors */ len1 += BR_HEADER + nrbu * UB_DESCRIPTOR; if ( !(index = intMalloc_dist(len1+1)) ) { fprintf (stderr, "Malloc fails for Uindex[]"); return (memDist + memNLU); } Ufstnz_br_ptr[ljb_i] = index; if (!(Unzval_br_ptr[ljb_i] = doublecomplexMalloc_dist(len))) { fprintf (stderr, "Malloc fails for Unzval_br_ptr[*][]"); return (memDist + memNLU); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -