📄 pzsymbfact_distdata.c
字号:
memNLU += (len1+1)*iword + len*dword; uval = Unzval_br_ptr[ljb_i]; mybufmax[2] = SUPERLU_MAX( mybufmax[2], len1 ); mybufmax[3] = SUPERLU_MAX( mybufmax[3], len ); index[0] = nrbu; /* Number of column blocks */ index[1] = len; /* Total length of nzval[] */ index[2] = len1; /* Total length of index */ index[len1] = -1; /* End marker */ next_ind = BR_HEADER; next_val = 0; for (k = 0; k < nrbu; k++) { gb = LUb_number[k]; lb = LBj( gb, grid ); len = LUb_length[lb]; LUb_length[lb] = 0; /* Reset vector of block length */ index[next_ind++] = gb; /* Descriptor */ index[next_ind++] = len; LUb_indptr[lb] = next_ind; for (; next_ind < LUb_indptr[lb] + SuperSize( gb ); next_ind++) index[next_ind] = FstBlockC( jb + 1 ); LUb_valptr[lb] = next_val; next_val += len; } /* Propagate the fstnz subscripts to Ufstnz_br_ptr[], and the initial values of A from SPA into Unzval_br_ptr[]. */ for (i = xusub[ljb_i]; i < xusub[ljb_i+1]; i++) { jcol = usub[i]; gb = BlockNum( jcol ); if ( mycol == PCOL( gb, grid ) ) { lb = LBj( gb, grid ); k = LUb_indptr[lb]; /* Start fstnz in index */ index[k + jcol - FstBlockC( gb )] = FstBlockC( jb ); } } /* for i ... */ for (i = 0; i < nrbu; i++) { gb = LUb_number[i]; lb = LBj( gb, grid ); next_ind = LUb_indptr[lb]; k = FstBlockC( jb + 1); jcol = ilsum_j[lb]; for (jj = 0; jj < SuperSize( gb ); jj++, jcol++) { dense_col = dense; j = index[next_ind+jj]; for (ii = j; ii < k; ii++) { uval[LUb_valptr[lb]++] = dense_col[jcol]; dense_col[jcol] = zero; dense_col += ldaspa_j; } } } } else { Ufstnz_br_ptr[ljb_i] = NULL; Unzval_br_ptr[ljb_i] = NULL; } /* if nrbu ... */ } /* if myrow == jbrow */ /*------------------------------------------------ * SET UP L BLOCKS. *------------------------------------------------*/ if (mycol == jbcol) { /* Block column jb in my process column */ /* Scatter A_inf into SPA. */ for (j = ilsum_j[ljb_j], dense_col = dense; j < ilsum_j[ljb_j] + nsupc; j++) { for (i = ainf_colptr[j]; i < ainf_colptr[j+1]; i++) { irow = ainf_rowind[i]; if (irow >= n) printf ("Pe[%d] ERR1\n", iam); gb = BlockNum( irow ); if (gb >= nsupers) printf ("Pe[%d] ERR5\n", iam); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); irow = ilsum[lb] + irow - FstBlockC( gb ); if (irow >= ldaspa) printf ("Pe[%d] ERR0\n", iam); dense_col[irow] = ainf_val[i]; } } dense_col += ldaspa; } /* sort the indices of the diagonal block at the beginning of xlsub */ if (myrow == jbrow) { k = xlsub[ljb_j]; for (i = xlsub[ljb_j]; i < xlsub[ljb_j+1]; i++) { irow = lsub[i]; if (irow < nsupc + fsupc && i != k+irow-fsupc) { lsub[i] = lsub[k + irow - fsupc]; lsub[k + irow - fsupc] = irow; i --; } } } /* Count number of blocks and length of each block. */ nrbl = 0; len = 0; /* Number of row subscripts I own. */ kseen = 0; for (i = xlsub[ljb_j]; i < xlsub[ljb_j+1]; i++) { irow = lsub[i]; gb = BlockNum( irow ); /* Global block number */ pr = PROW( gb, grid ); /* Process row owning this block */ if ( pr != jbrow && fsendx_plist[ljb_j][pr] == EMPTY && myrow == jbrow) { fsendx_plist[ljb_j][pr] = YES; ++nfsendx; } if ( myrow == pr ) { lb = LBi( gb, grid ); /* Local block number */ if (Lrb_marker[lb] <= jb) { /* First see this block */ Lrb_marker[lb] = jb + 1; LUb_length[lb] = 1; LUb_number[nrbl++] = gb; if ( gb != jb ) /* Exclude diagonal block. */ ++fmod[lb]; /* Mod. count for forward solve */ if ( kseen == 0 && myrow != jbrow ) { ++nfrecvx; kseen = 1; }#if ( PRNTlevel>=1 ) ++nLblocks;#endif } else ++LUb_length[lb]; ++len; } } /* for i ... */ if ( nrbl ) { /* Do not ensure the blocks are sorted! */ /* Set up the initial pointers for each block in index[] and nzval[]. */ /* If I am the owner of the diagonal block, order it first in LUb_number. Necessary for SuperLU_DIST routines */ kseen = EMPTY; for (j = 0; j < nrbl; j++) { if (LUb_number[j] == jb) kseen = j; } if (kseen != EMPTY && kseen != 0) { LUb_number[kseen] = LUb_number[0]; LUb_number[0] = jb; } /* Add room for descriptors */ len1 = len + BC_HEADER + nrbl * LB_DESCRIPTOR; if ( !(index = intMalloc_dist(len1)) ) { fprintf (stderr, "Malloc fails for index[]"); return (memDist + memNLU); } Lrowind_bc_ptr[ljb_j] = index; if (!(Lnzval_bc_ptr[ljb_j] = doublecomplexMalloc_dist(len*nsupc))) { fprintf(stderr, "Malloc fails for Lnzval_bc_ptr[*][] col block %d ", jb); return (memDist + memNLU); } memNLU += len1*iword + len*nsupc*dword; lusup = Lnzval_bc_ptr[ljb_j]; mybufmax[0] = SUPERLU_MAX( mybufmax[0], len1 ); mybufmax[1] = SUPERLU_MAX( mybufmax[1], len*nsupc ); mybufmax[4] = SUPERLU_MAX( mybufmax[4], len ); index[0] = nrbl; /* Number of row blocks */ index[1] = len; /* LDA of the nzval[] */ next_ind = BC_HEADER; next_val = 0; for (k = 0; k < nrbl; ++k) { gb = LUb_number[k]; lb = LBi( gb, grid ); len = LUb_length[lb]; LUb_length[lb] = 0; index[next_ind++] = gb; /* Descriptor */ index[next_ind++] = len; LUb_indptr[lb] = next_ind; LUb_valptr[lb] = next_val; next_ind += len; next_val += len; } /* Propagate the compressed row subscripts to Lindex[], and the initial values of A from SPA into Lnzval[]. */ len = index[1]; /* LDA of lusup[] */ for (i = xlsub[ljb_j]; i < xlsub[ljb_j+1]; i++) { irow = lsub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); k = LUb_indptr[lb]++; /* Random access a block */ index[k] = irow; k = LUb_valptr[lb]++; irow = ilsum[lb] + irow - FstBlockC( gb ); for (j = 0, dense_col = dense; j < nsupc; ++j) { lusup[k] = dense_col[irow]; dense_col[irow] = zero; k += len; dense_col += ldaspa; } } } /* for i ... */ } else { Lrowind_bc_ptr[ljb_j] = NULL; Lnzval_bc_ptr[ljb_j] = NULL; } /* if nrbl ... */ } /* if mycol == pc */ } /* for jb ... */ SUPERLU_FREE(ilsum_j); SUPERLU_FREE(Urb_marker); SUPERLU_FREE(LUb_length); SUPERLU_FREE(LUb_indptr); SUPERLU_FREE(LUb_number); SUPERLU_FREE(LUb_valptr); SUPERLU_FREE(Lrb_marker); SUPERLU_FREE(dense); /* Free the memory used for storing L and U */ SUPERLU_FREE(xlsub); SUPERLU_FREE(xusub); if (lsub != NULL) SUPERLU_FREE(lsub); if (usub != NULL) SUPERLU_FREE(usub); /* Free the memory used for storing A */ SUPERLU_FREE(ainf_colptr); if (ainf_rowind != NULL) { SUPERLU_FREE(ainf_rowind); SUPERLU_FREE(ainf_val); } SUPERLU_FREE(asup_rowptr); if (asup_colind != NULL) { SUPERLU_FREE(asup_colind); SUPERLU_FREE(asup_val); } /* exchange information about bsendx_plist in between column of processors */ k = SUPERLU_MAX( grid->nprow, grid->npcol); if ( !(recvBuf = (int_t *) SUPERLU_MALLOC(nsupers*k*iword)) ) { fprintf (stderr, "Malloc fails for recvBuf[]."); return (memDist + memNLU); } if ( !(nnzToRecv = (int *) SUPERLU_MALLOC(nprocs*sizeof(int))) ) { fprintf (stderr, "Malloc fails for nnzToRecv[]."); return (memDist + memNLU); } if ( !(ptrToRecv = (int *) SUPERLU_MALLOC(nprocs*sizeof(int))) ) { fprintf (stderr, "Malloc fails for ptrToRecv[]."); return (memDist + memNLU); } if ( !(nnzToSend = (int *) SUPERLU_MALLOC(nprocs*sizeof(int))) ) { fprintf (stderr, "Malloc fails for nnzToRecv[]."); return (memDist + memNLU); } if ( !(ptrToSend = (int *) SUPERLU_MALLOC(nprocs*sizeof(int))) ) { fprintf (stderr, "Malloc fails for ptrToRecv[]."); return (memDist + memNLU); } if (memDist < (nsupers*k*iword +4*nprocs * sizeof(int))) memDist = nsupers*k*iword +4*nprocs * sizeof(int); for (p = 0; p < nprocs; p++) nnzToRecv[p] = 0; for (jb = 0; jb < nsupers; jb++) { jbcol = PCOL( jb, grid ); jbrow = PROW( jb, grid ); p = PNUM(jbrow, jbcol, grid); nnzToRecv[p] += grid->npcol; } i = 0; for (p = 0; p < nprocs; p++) { ptrToRecv[p] = i; i += nnzToRecv[p]; ptrToSend[p] = 0; if (p != iam) nnzToSend[p] = nnzToRecv[iam]; else nnzToSend[p] = 0; } nnzToRecv[iam] = 0; i = ptrToRecv[iam]; for (jb = 0; jb < nsupers; jb++) { jbcol = PCOL( jb, grid ); jbrow = PROW( jb, grid ); p = PNUM(jbrow, jbcol, grid); if (p == iam) { ljb_j = LBj( jb, grid ); /* Local block number column wise */ for (j = 0; j < grid->npcol; j++, i++) recvBuf[i] = ToSendR[ljb_j][j]; } } MPI_Alltoallv (&(recvBuf[ptrToRecv[iam]]), nnzToSend, ptrToSend, mpi_int_t, recvBuf, nnzToRecv, ptrToRecv, mpi_int_t, grid->comm); for (jb = 0; jb < nsupers; jb++) { jbcol = PCOL( jb, grid ); jbrow = PROW( jb, grid ); p = PNUM(jbrow, jbcol, grid); ljb_j = LBj( jb, grid ); /* Local block number column wise */ ljb_i = LBi( jb, grid ); /* Local block number row wise */ /* (myrow == jbrow) { if (ToSendD[ljb_i] == YES) ToRecv[jb] = 1; } else { if (recvBuf[ptrToRecv[p] + mycol] == YES) ToRecv[jb] = 2; } */ if (recvBuf[ptrToRecv[p] + mycol] == YES) { if (myrow == jbrow) ToRecv[jb] = 1; else ToRecv[jb] = 2; } if (mycol == jbcol) { for (i = 0, j = ptrToRecv[p]; i < grid->npcol; i++, j++) ToSendR[ljb_j][i] = recvBuf[j]; ToSendR[ljb_j][mycol] = EMPTY; } ptrToRecv[p] += grid->npcol; } /* exchange information about bsendx_plist in between column of processors */ MPI_Allreduce ((*bsendx_plist), recvBuf, nsupers_j * grid->nprow, mpi_int_t, MPI_MAX, grid->cscp.comm); for (jb = 0; jb < nsupers; jb ++) { jbcol = PCOL( jb, grid); jbrow = PROW( jb, grid); if (mycol == jbcol) { ljb_j = LBj( jb, grid ); /* Local block number column wise */ if (myrow == jbrow ) { for (k = ljb_j * grid->nprow; k < (ljb_j+1) * grid->nprow; k++) { (*bsendx_plist)[k] = recvBuf[k]; if ((*bsendx_plist)[k] != EMPTY) nbsendx ++; } } else { for (k = ljb_j * grid->nprow; k < (ljb_j+1) * grid->nprow; k++) (*bsendx_plist)[k] = EMPTY; } } } SUPERLU_FREE(nnzToRecv); SUPERLU_FREE(ptrToRecv); SUPERLU_FREE(nnzToSend); SUPERLU_FREE(ptrToSend); SUPERLU_FREE(recvBuf); Llu->Lrowind_bc_ptr = Lrowind_bc_ptr; Llu->Lnzval_bc_ptr = Lnzval_bc_ptr; Llu->Ufstnz_br_ptr = Ufstnz_br_ptr; Llu->Unzval_br_ptr = Unzval_br_ptr; Llu->ToRecv = ToRecv; Llu->ToSendD = ToSendD; Llu->ToSendR = ToSendR; Llu->fmod = fmod; Llu->fsendx_plist = fsendx_plist; Llu->nfrecvx = nfrecvx; Llu->nfsendx = nfsendx; Llu->bmod = bmod; Llu->bsendx_plist = bsendx_plist; Llu->nbrecvx = nbrecvx; Llu->nbsendx = nbsendx; Llu->ilsum = ilsum; Llu->ldalsum = ldaspa; LUstruct->Glu_persist = Glu_persist; #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. # L blocks %d\t# U blocks %d\n", nLblocks, nUblocks);#endif /* Find the maximum buffer size. */ MPI_Allreduce(mybufmax, Llu->bufmax, NBUFFERS, mpi_int_t, MPI_MAX, grid->comm); #if ( DEBUGlevel>=1 ) /* Memory allocated but not freed: ilsum, fmod, fsendx_plist, bmod, bsendx_plist, ToRecv, ToSendR, ToSendD */ CHECK_MALLOC(iam, "Exit dist_psymbtonum()");#endif return (- (memDist+memNLU));} /* dist_psymbtonum */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -