📄 #psymbfact.c#
字号:
*/ int p, p_irow, code_err, ainf_data; int_t n, m_loc, fst_row; int_t i, j, k, irow, jcol; NRformat_loc *Astore; int_t nnz_loc, nnz_iam; /* number of local nonzeros */ int_t nnz_remote; /* number of remote nonzeros to be sent */ int_t SendCnt; /* number of remote nonzeros to be sent */ int_t RecvCnt; /* number of remote nonzeros to be received */ /* number of nonzeros to send/receive per processor */ int_t *nnzToSend, *nnzToRecv; int_t *nnzAinf_toSnd; /* nnz in Ainf to send */ /* VInfo data structures */ int_t *globToLoc, *begEndBlks_loc, nblks_loc, nvtcs_loc, maxNvtcsPProc; int_t neltsRow, vtx, vtx_lid, nelts, ind; int_t *snd_aind, *rcv_aind; int_t *ptr_toSnd, *buf, *ptr_toRcv; /* matrix_symbfact_t *As data */ int_t *x_ainf, *x_asup, *ind_ainf, *ind_asup; int *intBuf1, *intBuf2, *intBuf3, *intBuf4; /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ Astore = (NRformat_loc *) A->Store; n = A->ncol; m_loc = Astore->m_loc; fst_row = Astore->fst_row; globToLoc = Pslu_freeable->globToLoc; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; nnzToRecv = intCalloc_symbfact(3 * (int_t)nprocs_num); nnzToSend = nnzToRecv + nprocs_num; nnzAinf_toSnd = nnzToRecv + 2 * nprocs_num; /* --------------------------------------------------------------------- COUNT THE NUMBER OF NONZEROS TO BE SENT TO EACH PROCESS, THEN ALLOCATE SPACE. THIS ACCOUNTS FOR THE FIRST PASS OF A. ----------------------------------------------------------------------*/ /* tempArray stores the number of nonzeros in each column of ainf */ for (i = 0; i < n; i++) tempArray[i] = 0; for (i = 0; i < m_loc; i++) { irow = perm_c[perm_r[i+fst_row]]; /* Row number in Pc*Pr*A */ p_irow = OWNER(globToLoc[irow]); neltsRow = 0; for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; j++) { jcol = perm_c[Astore->colind[j]]; if (jcol <= irow) { p = OWNER(globToLoc[jcol]); if (tempArray[jcol] == 0) { nnzToSend[p] += 2; nnzAinf_toSnd[p] += 2; } tempArray[jcol] ++; nnzAinf_toSnd[p] ++; } else { p = p_irow; neltsRow ++; } nnzToSend[p] ++; } if (neltsRow != 0) { nnzToSend[p_irow] += 2; } } /* add one entry which will separate columns of Ainf from rows of Asup */ for (p = 0; p < nprocs_num; p++) if (nnzToSend[p] != 0) nnzToSend[p] ++; /* All-to-all communication */ MPI_Alltoall (nnzToSend, 1, mpi_int_t, nnzToRecv, 1, mpi_int_t, (*num_comm)); nnz_loc = SendCnt = RecvCnt = 0; for (p = 0; p < nprocs_num; p++) { if ( p != iam ) { SendCnt += nnzToSend[p]; RecvCnt += nnzToRecv[p]; } else { nnz_loc += nnzToRecv[p]; nnzToSend[p] = 0; } } nnz_iam = nnz_loc + RecvCnt; /* Total nonzeros ended up in my process. */ /* Allocate temporary storage for sending/receiving the A triplets. */ if (!(snd_aind = intMalloc_symbfact(SendCnt)) && SendCnt != 0) ABORT("Malloc fails for snd_aind[]."); if ( !(rcv_aind = intMalloc_symbfact(nnz_iam + 1))) ABORT("Malloc fails for rcv_aind[]."); if ( !(ptr_toSnd = intCalloc_symbfact((int_t) nprocs_num)) ) ABORT("Malloc fails for ptr_toSnd[]."); if ( !(ptr_toRcv = intCalloc_symbfact((int_t) nprocs_num)) ) ABORT("Malloc fails for ptr_toRcv[]."); /* setup ptr_toSnd[p] to point to data in snd_aind to be send to processor p */ for (i = 0, j = 0, p = 0; p < nprocs_num; p++) { if ( p != iam ) ptr_toSnd[p] = i; else ptr_toSnd[p] = j; i += nnzToSend[p]; j += nnzToRecv[p]; } for (i = 0; i < n; i++) { if (tempArray[i] != 0) { /* column i of Ainf will be send to a processor */ p = OWNER( globToLoc[i] ); if (p == iam) { buf = &(rcv_aind[ptr_toSnd[p]]); } else { buf = &(snd_aind[ptr_toSnd[p]]); } buf[0] = tempArray[i]; buf[1] = i; tempArray[i] = ptr_toSnd[p] + 2; ptr_toSnd[p] += 2 + buf[0]; } } /* set ptr_toSnd to point to Asup data (stored by rows) */ for (i = 0, j = 0, p = 0; p < nprocs_num; p++) { if ( p != iam ) { if (nnzToSend[p] != 0) { snd_aind[i + nnzAinf_toSnd[p]] = EMPTY; ptr_toSnd[p] = i + nnzAinf_toSnd[p] + 1; } } else { if (nnzToRecv[p] != 0) { rcv_aind[j + nnzAinf_toSnd[p]] = EMPTY; ptr_toSnd[p] = j + nnzAinf_toSnd[p] + 1; } } i += nnzToSend[p]; j += nnzToRecv[p]; } /* ------------------------------------------------------------ LOAD THE ENTRIES OF A INTO THE snd_aind STRUCTURE TO SEND. THIS ACCOUNTS FOR THE SECOND PASS OF A. For each processor, we store first the columns to be sent, and then the rows to be sent. For each row/column sent: entry 0 : x = number of elements in that row/column entry 1 : row/column number entries 2 .. x + 2 : row/column indices. ------------------------------------------------------------*/ for (i = 0; i < m_loc; i++) { irow = perm_c[perm_r[i+fst_row]]; /* Row number in Pc*A */ p_irow = OWNER( globToLoc[irow] ); ptr_toSnd[p_irow] +=2; neltsRow = 0; for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; j++) { jcol = perm_c[Astore->colind[j]]; if (jcol <= irow) { p = OWNER( globToLoc[jcol] ); k = tempArray[jcol]; tempArray[jcol] ++; if (p == iam) { /* local */ rcv_aind[k] = irow; } else { snd_aind[k] = irow; } } else { p = p_irow; neltsRow ++; k = ptr_toSnd[p]; ptr_toSnd[p] ++; if (p == iam) { /* local */ rcv_aind[k] = jcol; } else { snd_aind[k] = jcol; } } } if (neltsRow == 0) ptr_toSnd[p_irow] -= 2; else { /* store entry 0 and entry 1 */ if (p_irow == iam) { /* local */ rcv_aind[ptr_toSnd[p_irow] - neltsRow - 2] = neltsRow; rcv_aind[ptr_toSnd[p_irow] - neltsRow - 1] = irow; } else { /* remote */ snd_aind[ptr_toSnd[p_irow] - neltsRow - 2] = neltsRow; snd_aind[ptr_toSnd[p_irow] - neltsRow - 1] = irow; } } } /* reset ptr_toSnd to point to the beginning of the data for each processor (structure needed in MPI_Alltoallv */ for (i = 0, j = 0, p = 0; p < nprocs_num; p++) { ptr_toSnd[p] = i; i += nnzToSend[p]; ptr_toRcv[p] = j; j += nnzToRecv[p]; } /* ------------------------------------------------------------ PERFORM REDISTRIBUTION. THIS INVOLVES ALL-TO-ALL COMMUNICATION. Note: it uses MPI_Alltoallv. ------------------------------------------------------------*/ if (nprocs_num > 1) {#if defined (_LONGINT) intBuf1 = (int *) SUPERLU_MALLOC(4 * nprocs_num * sizeof(int)); intBuf2 = intBuf1 + nprocs_num; intBuf3 = intBuf1 + 2 * nprocs_num; intBuf4 = intBuf1 + 3 * nprocs_num; for (p=0; p<nprocs_num; p++) { if (nnzToSend[p] > INT_MAX || ptr_toSnd[p] > INT_MAX || nnzToRecv[p] > INT_MAX || ptr_toRcv[p] > INT_MAX) ABORT("ERROR in symbfact_distributeMatrix size to send > INT_MAX\n"); intBuf1[p] = (int) nnzToSend[p]; intBuf2[p] = (int) ptr_toSnd[p]; intBuf3[p] = (int) nnzToRecv[p]; intBuf4[p] = (int) ptr_toRcv[p]; }#else /* Default */ intBuf1 = nnzToSend; intBuf2 = ptr_toSnd; intBuf3 = nnzToRecv; intBuf4 = ptr_toRcv;#endif MPI_Alltoallv (snd_aind, intBuf1, intBuf2, mpi_int_t, rcv_aind, intBuf3, intBuf4, mpi_int_t, (*num_comm));#if defined (_LONGINT) SUPERLU_FREE (intBuf1);#endif } /* ------------------------------------------------------------ DEALLOCATE SEND STORAGE ------------------------------------------------------------*/ if (snd_aind) SUPERLU_FREE( snd_aind ); SUPERLU_FREE( ptr_toSnd ); /* ------------------------------------------------------------ CONVERT THE RECEIVED FORMAT INTO THE SYMBOLIC FORMAT. THIS IS PERFORMED ONLY BY NPROCS_SYMB PROCESSORS ------------------------------------------------------------*/ if (iam < nprocs_symb) { nblks_loc = VInfo->nblks_loc; begEndBlks_loc = VInfo->begEndBlks_loc; nvtcs_loc = VInfo->nvtcs_loc; /* ------------------------------------------------------------ Allocate space for storing indices of A after redistribution. ------------------------------------------------------------*/ if (!(x_ainf = intCalloc_symbfact (nvtcs_loc + 1))) ABORT("Malloc fails for x_ainf[]."); if (!(x_asup = intCalloc_symbfact (nvtcs_loc + 1))) ABORT("Malloc fails for x_asup[]."); /* Initialize the array of columns/rows pointers */ for (i = 0, p = 0; p < nprocs_num; p++) { ainf_data = TRUE; k = 0; while (k < nnzToRecv[p]) { j = rcv_aind[i + k]; if (j == EMPTY) { ainf_data = FALSE; k ++; } else { nelts = rcv_aind[i + k]; vtx = rcv_aind[i + k + 1]; vtx_lid = LOCAL_IND( globToLoc[vtx] ); k += nelts + 2; if (ainf_data) x_ainf[vtx_lid] += nelts; else x_asup[vtx_lid] = nelts; } } i += nnzToRecv[p]; } /* copy received information */ vtx_lid = 0; for (i = 0, k = 0, j = 0; i < nblks_loc; i++) { for (vtx = begEndBlks_loc[2*i]; vtx < begEndBlks_loc[2*i+1]; vtx++, vtx_lid ++) { nelts = x_ainf[vtx_lid]; x_ainf[vtx_lid] = k; k += nelts; nelts = x_asup[vtx_lid]; x_asup[vtx_lid] = j; j += nelts; tempArray[vtx] = x_ainf[vtx_lid]; } } x_ainf[nvtcs_loc] = k; x_asup[nvtcs_loc] = j; /* Allocate space for storing indices of A after conversion */ if ( !(ind_ainf = intMalloc_symbfact(x_ainf[nvtcs_loc])) && x_ainf[nvtcs_loc] != 0 ) ABORT("Malloc fails for ind_ainf[]."); if ( !(ind_asup = intMalloc_symbfact(x_asup[nvtcs_loc])) && x_asup[nvtcs_loc] != 0) ABORT("Malloc fails for ind_asup[]."); /* Copy the data into the row/column oriented storage */ for (i = 0, p = 0; p < nprocs_num; p++) { ainf_data = TRUE; k = 0; while (k < nnzToRecv[p]) { j = rcv_aind[i + k]; if (ainf_data && j == EMPTY) { ainf_data = FALSE; k ++; } else { nelts = rcv_aind[i + k]; vtx = rcv_aind[i + k + 1]; vtx_lid = LOCAL_IND( globToLoc[vtx] ); if (ainf_data) { /* traverse ainf data */ ind = tempArray[vtx]; for (j = i + k + 2; j < i + k + 2 + nelts; j++, ind ++) ind_ainf[ind] = rcv_aind[j]; tempArray[vtx] = ind; } else { /* traverse asup data */ ind = x_asup[vtx_lid]; for (j = i + k + 2; j < i + k + 2 + nelts; j++, ind ++) ind_asup[ind] = rcv_aind[j]; } k += nelts + 2; } } i += nnzToRecv[p]; } /* ------------------------------------------------------------ DEALLOCATE TEMPORARY STORAGE ------------------------------------------------------------*/ SUPERLU_FREE( ptr_toRcv ); if (rcv_aind) SUPERLU_FREE( rcv_aind ); if (nnzToRecv) SUPERLU_FREE( nnzToRecv ); AS->x_ainf = x_ainf; AS->x_asup = x_asup; AS->ind_ainf = ind_ainf; AS->ind_asup = ind_asup; VInfo->nnz_asup_loc = x_asup[nvtcs_loc]; VInfo->nnz_ainf_loc = x_ainf[nvtcs_loc]; }}staticfloat allocPrune_lvl( Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input -local info on vertices distribution */ psymbfact_stat_t *PS /* Input -statistics */ )/* * Allocate storage for data structures necessary for pruned graphs. * For those unpredictable size, make a guess as FILL * n. * Return value: * 0 if enough memory was available; * otherwise, return the amount of space intended to allocate * when memory allocation failure occurred. */{ int_t lword; int_t nzlmaxPr, nzumaxPr, *xlsubPr, *xusubPr, *lsubPr, *usubPr; int_t nvtcs_loc, no_expand_pr, x_sz; float alpha = 1.5; int_t FILL = sp_ienv_dist(6); nvtcs_loc = VInfo->nvtcs_loc; no_expand_pr = 0; lword = (int_t) sizeof(int_t); /* free memory allocated for the domain symbolic factorization */ if (Llu_symbfact->szLsubPr) SUPERLU_FREE( Llu_symbfact->lsubPr ); if (Llu_symbfact->szUsubPr) SUPERLU_FREE( Llu_symbfact->usubPr ); if (Llu_symbfact->xlsubPr) SUPERLU_FREE( Llu_symbfact->xlsubPr ); if (Llu_symbfact->xusubPr) SUPERLU_FREE( Llu_symbfact->xusubPr ); Llu_symbfact->xlsub_rcvd = intMalloc_symbfact (VInfo->maxSzBlk + 1); Llu_symbfact->xusub_rcvd = intMalloc_symbfact (VInfo->maxSzBlk + 1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -