📄 #psymbfact.c#
字号:
/* allocate memory to use during superior levels of sep_tree */ x_sz = SUPERLU_MIN( VInfo->maxNvtcsNds_loc, VInfo->maxSzBlk); nzlmaxPr = 2 * FILL * VInfo->maxNvtcsNds_loc; nzumaxPr = 2 * FILL * VInfo->maxSzBlk; /* Integer pointers for L\U factors */ if (x_sz != 0) { xlsubPr = intMalloc_symbfact(VInfo->maxNvtcsNds_loc + 1); xusubPr = intMalloc_symbfact(VInfo->maxNvtcsNds_loc + 1); lsubPr = (int_t *) SUPERLU_MALLOC (nzlmaxPr * lword); usubPr = (int_t *) SUPERLU_MALLOC (nzumaxPr * lword); while ( !lsubPr || !usubPr ) { if ( lsubPr ) SUPERLU_FREE( lsubPr ); if ( usubPr ) SUPERLU_FREE( usubPr ); nzlmaxPr /= 2; nzlmaxPr = alpha * (float) nzlmaxPr; nzumaxPr /= 2; nzumaxPr = alpha * (float) nzumaxPr; if ( nzumaxPr < x_sz ) { fprintf(stderr, "Not enough memory to perform factorization.\n"); return (PS->allocMem); } lsubPr = (int_t *) SUPERLU_MALLOC(nzlmaxPr * lword); usubPr = (int_t *) SUPERLU_MALLOC(nzumaxPr * lword); ++no_expand_pr; } } else { xlsubPr = NULL; lsubPr = NULL; xusubPr = NULL; usubPr = NULL; nzlmaxPr = 0; nzumaxPr = 0; } if (VInfo->maxNvtcsNds_loc) Llu_symbfact->cntelt_vtcsA_lvl = (int_t *) SUPERLU_MALLOC (VInfo->maxNvtcsNds_loc * lword); if (PS->maxSzLPr < Llu_symbfact->indLsubPr) PS->maxSzLPr = Llu_symbfact->indLsubPr; if (PS->maxSzUPr < Llu_symbfact->indUsubPr) PS->maxSzUPr = Llu_symbfact->indUsubPr; Llu_symbfact->lsubPr = lsubPr; Llu_symbfact->xlsubPr = xlsubPr; Llu_symbfact->usubPr = usubPr; Llu_symbfact->xusubPr = xusubPr; Llu_symbfact->szLsubPr = nzlmaxPr; Llu_symbfact->szUsubPr = nzumaxPr; Llu_symbfact->indLsubPr = 0; Llu_symbfact->indUsubPr = 0; Llu_symbfact->no_expand_pr += no_expand_pr; return 0;}static float allocPrune_domain( int_t fstVtx, /* Input - first vertex of current node */ int_t lstVtx, /* Input - last vertex of current node */ Llu_symbfact_t *Llu_symbfact, /* Output - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input -local info on vertices distribution */ psymbfact_stat_t *PS /* Input -statistics */ )/* * Allocate storage for data structures necessary for pruned graphs. * For those unpredictable size, make a guess as FILL * n. * Return value: * 0 if enough memory was available; * otherwise, return the amount of space intended to allocate * when memory allocation failure occurred. */{ int_t lword; int_t nzlmaxPr, nzumaxPr, *xlsubPr, *xusubPr, *lsubPr, *usubPr; int_t nvtcs_loc, no_expand_pr, x_sz; float alpha = 1.5; int_t FILL = 2 * sp_ienv_dist(6); nvtcs_loc = VInfo->nvtcs_loc; no_expand_pr = 0; lword = (int_t) sizeof(int_t); /* allocate memory to use during domain_symbolic routine */ /* Guess for prune graph */ x_sz = lstVtx - fstVtx; nzlmaxPr = nzumaxPr = 2*FILL * x_sz; /* Integer pointers for L\U factors */ if (x_sz != 0) { xlsubPr = intMalloc_symbfact(x_sz+1); xusubPr = intMalloc_symbfact(x_sz+1); lsubPr = (int_t *) SUPERLU_MALLOC (nzlmaxPr * lword); usubPr = (int_t *) SUPERLU_MALLOC (nzumaxPr * lword); while ( !lsubPr || !usubPr ) { if ( lsubPr ) SUPERLU_FREE(lsubPr); if ( usubPr ) SUPERLU_FREE(usubPr); nzlmaxPr /= 2; nzlmaxPr = alpha * (float) nzlmaxPr; nzumaxPr /= 2; nzumaxPr = alpha * (float) nzumaxPr; if ( nzumaxPr < x_sz ) { fprintf(stderr, "Not enough memory to perform factorization.\n"); return (PS->allocMem); } lsubPr = (void *) SUPERLU_MALLOC(nzlmaxPr * lword); usubPr = (void *) SUPERLU_MALLOC(nzumaxPr * lword); ++no_expand_pr; } } else { xlsubPr = NULL; xusubPr = NULL; } Llu_symbfact->lsubPr = lsubPr; Llu_symbfact->xlsubPr = xlsubPr; Llu_symbfact->usubPr = usubPr; Llu_symbfact->xusubPr = xusubPr; Llu_symbfact->szLsubPr = nzlmaxPr; Llu_symbfact->szUsubPr = nzumaxPr; Llu_symbfact->indLsubPr = 0; Llu_symbfact->indUsubPr = 0; Llu_symbfact->xlsub_rcvd = NULL; Llu_symbfact->xusub_rcvd = NULL; Llu_symbfact->cntelt_vtcsA_lvl = NULL; PS->maxSzLPr = Llu_symbfact->indLsubPr; PS->maxSzUPr = Llu_symbfact->indUsubPr; Llu_symbfact->no_expand_pr = no_expand_pr; Llu_symbfact->no_expcp = 0; return 0;}/************************************************************************/staticint symbfact_alloc/************************************************************************/( int_t n, /* Input - order of the matrix */ int nprocs, /* Input - number of processors for the symbolic factorization */ Pslu_freeable_t *Pslu_freeable, Llu_symbfact_t *Llu_symbfact, /* Output - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input - local info on vertices distribution */ comm_symbfact_t *CS, /* Input -information on communication */ psymbfact_stat_t *PS /* Input -statistics */ )/* * Allocate storage for the data structures common to symbolic factorization * routines. For those unpredictable size, make a guess as FILL * nnz(A). * Return value: * 0 if enough memory was available; * otherwise, return the amount of space intended to allocate * when memory allocation failure occurred. */{ int nlvls, p; /* no of levels in the separator tree */ int_t lword, no_expand; int_t *xsup, *supno; int_t *lsub, *xlsub; int_t *usub, *xusub; int_t nzlmax, nzumax, nnz_a_loc; int_t nvtcs_loc, *cntelt_vtcs; float alpha = 1.5; int_t FILL = sp_ienv_dist(6); nvtcs_loc = VInfo->nvtcs_loc; nnz_a_loc = VInfo->nnz_ainf_loc + VInfo->nnz_asup_loc; nlvls = (int) log2( (double) nprocs ) + 1; no_expand = 0; lword = sizeof(int_t); /* Guess for L\U factors */ nzlmax = nzumax = FILL * nnz_a_loc; /* Integer pointers for L\U factors */ supno = intMalloc_symbfact(nvtcs_loc+1); xlsub = intMalloc_symbfact(nvtcs_loc+1); xusub = intMalloc_symbfact(nvtcs_loc+1); lsub = (void *) SUPERLU_MALLOC(nzlmax * lword); usub = (void *) SUPERLU_MALLOC(nzumax * lword); while ( !lsub || !usub ) { if (!lsub) SUPERLU_FREE(lsub); if (!usub) SUPERLU_FREE(usub); nzlmax /= 2; nzlmax = alpha * nzlmax; nzumax /= 2; nzumax = alpha * nzumax; if ( nzumax < nnz_a_loc/2 ) { fprintf(stderr, "Not enough memory to perform factorization.\n"); return (PS->allocMem); } lsub = (void *) SUPERLU_MALLOC(nzlmax * lword); usub = (void *) SUPERLU_MALLOC(nzumax * lword); ++no_expand; } if (nprocs == 1) cntelt_vtcs = NULL; else cntelt_vtcs = intMalloc_symbfact (nvtcs_loc+1); /* allocate memory for communication data structures */ CS->rcv_interLvl = intMalloc_symbfact (2 * (int_t) nprocs + 1); CS->snd_interLvl = intMalloc_symbfact (2 * (int_t) nprocs + 1); CS->ptr_rcvBuf = intMalloc_symbfact (2 * (int_t) nprocs ); CS->rcv_intraLvl = intMalloc_symbfact ((int_t) nprocs + 1); CS->snd_intraLvl = intMalloc_symbfact ((int_t) nprocs + 1); CS->snd_interSz = intMalloc_symbfact ((int_t) nlvls + 1); CS->snd_LinterSz = intMalloc_symbfact ((int_t) nlvls + 1); CS->snd_vtxinter = intMalloc_symbfact ((int_t) nlvls + 1); CS->rcv_bufSz = 0; CS->rcv_buf = NULL; CS->snd_bufSz = 0; CS->snd_buf = NULL; for (p = 0; p < nprocs; p++) { CS->rcv_interLvl[p] = EMPTY; CS->snd_interLvl[p] = EMPTY; CS->rcv_intraLvl[p] = EMPTY; CS->snd_intraLvl[p] = EMPTY; } for (p = 0; p <= nlvls; p++) { CS->snd_vtxinter[p] = EMPTY; CS->snd_interSz[p] = 0; CS->snd_LinterSz[p] = 0; } Pslu_freeable->supno_loc = supno; Llu_symbfact->lsub = lsub; Llu_symbfact->xlsub = xlsub; Llu_symbfact->usub = usub; Llu_symbfact->xusub = xusub; Llu_symbfact->szLsub = nzlmax; Llu_symbfact->szUsub = nzumax; Llu_symbfact->cntelt_vtcs = cntelt_vtcs; Llu_symbfact->no_expand = no_expand; return SUCCES_RET;} /* SYMBFACT_ALLOC */static int_t symbfact_vtx( int_t n, /* Input - order of the matrix */ int iam, /* Input - my processor number */ int_t vtx, /* Input - vertex number to perform symbolic factorization */ int_t vtx_lid, /* Input - local vertex number */ int_t vtx_prid, /* Input - */ int_t computeL, /* Input - TRUE when compute column L(:,vtx) otheriwse compute row U(vtx, :) */ int domain_symb, /* Input - if TRUE, computation corresponds to the independent domain at the bottom of the separator tree */ int_t fstVtx, /* Input - first vertex of current node */ int_t lstVtx, /* Input - last vertex of current node */ int_t snrep_lid, /* local index of current supernode reprezentative */ int_t szSn, /* size of supernode with snrep_lid reprezentative */ int_t *p_next, /* next element in sub structure */ int_t *marker, int_t *sub_rcvd, /* elements of node */ int_t sub_rcvd_sz, /* size of sub to be explored */ Pslu_freeable_t *Pslu_freeable, Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input/Output - local info on vertices distribution */ psymbfact_stat_t *PS, int_t *p_neltsVtxInit, int_t *p_neltsVtx, int_t *p_neltsVtx_CSep, int_t *p_neltsZrVtx, int_t *p_neltsMatched, int_t mark_vtx, int_t *p_prval_curvtx, int_t vtx_bel_othSn, int_t *p_vtx_bel_mySn ){ int_t x_aind_beg, x_aind_end, upd_lstSn; int_t k, vtx_elt, ind, pr, pr_lid, mem_error, ii, jj, compRcvd; int_t *xsub, *sub, *xsubPr, *subPr, *xsub_rcvd, *xsub_src, *sub_src; int_t pr_elt, next, prval_curvtx, maxNvtcsPProc; int_t neltsVtx, neltsMatched, neltsZrVtx, neltsZrSn, neltsVtx_CSep; int_t neltsVtxInit, kk; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; upd_lstSn = FALSE; prval_curvtx = *p_prval_curvtx; neltsVtx_CSep = 0; next = *p_next; if (computeL) { xsub = Llu_symbfact->xlsub; sub = Llu_symbfact->lsub; xsub_rcvd = Llu_symbfact->xlsub_rcvd; xsubPr = Llu_symbfact->xusubPr; subPr = Llu_symbfact->usubPr; } else { xsub = Llu_symbfact->xusub; sub = Llu_symbfact->usub; xsub_rcvd = Llu_symbfact->xusub_rcvd; xsubPr = Llu_symbfact->xlsubPr; subPr = Llu_symbfact->lsubPr; } x_aind_beg = xsub[vtx_lid]; x_aind_end = xsub[vtx_lid + 1]; xsub[vtx_lid] = next; k = x_aind_beg; /* while (sub[k] != EMPTY && k < x_aind_end) { */ while (k < x_aind_end) { if (sub[k] == EMPTY) k = x_aind_end; else { vtx_elt = sub[k]; if (!computeL) if (marker[vtx_elt] == mark_vtx - 2) if (vtx_elt < prval_curvtx) prval_curvtx = vtx_elt; marker[vtx_elt] = mark_vtx; if (!computeL && vtx_elt == vtx) printf ("Pe[%d] ERROR diag elt in U part vtx %d dom_s %d fstV %d lstV %d\n", iam, vtx, domain_symb, fstVtx, lstVtx); else { sub[next] = vtx_elt; next ++; } if (vtx_elt < lstVtx) neltsVtx_CSep ++; k++; } } neltsVtxInit = k - x_aind_beg; PS->nops += neltsVtxInit; if (domain_symb) { if (computeL) VInfo->nnz_ainf_loc -= x_aind_end - x_aind_beg; else VInfo->nnz_asup_loc -= x_aind_end - x_aind_beg; }#ifdef TEST_SYMB printf ("compL %d vtx %d vtx_lid %d vtx_prid %d vtx_bel_othSn %d\n", computeL, vtx, vtx_lid, vtx_prid, vtx_bel_othSn); PrintInt10 ("A(:, v)", x_aind_end - x_aind_beg, &(sub[xsub[vtx_lid]]));#endif ind = xsubPr[vtx_prid]; if (vtx_bel_othSn == vtx) upd_lstSn = TRUE; while (ind != EMPTY || upd_lstSn) { if (upd_lstSn ) { upd_lstSn = FALSE; pr_lid = snrep_lid; } else { pr_lid = subPr[ind]; ind = subPr[ind - 1]; } if (!computeL) marker[vtx] = mark_vtx; if (pr_lid >= VInfo->nvtcs_loc) { compRcvd = TRUE; xsub_src = xsub_rcvd; sub_src = sub_rcvd; pr_lid -= VInfo->nvtcs_loc; k = xsub_src[pr_lid] + RCVD_IND; } else { compRcvd = FALSE; xsub_src = xsub; sub_src = sub; k = xsub_src[pr_lid]; } PS->nops += xsub_src[pr_lid+1] - xsub_src[pr_lid]; for (; k < xsub_src[pr_lid+1]; k++) { pr_elt = sub_src[k]; if (pr_elt >= vtx && marker[pr_elt] != mark_vtx) { /* TEST available memory */ if (next >= x_aind_end) { if (domain_symb) { if (mem_error = psymbfact_LUXpandMem (iam, n, vtx, next, 0, computeL, DOMAIN_SYMB, 1, Pslu_freeable, Llu_symbfact, VInfo, PS)) return (mem_error); } else if (mem_error = psymbfact_LUXpand (iam, n, EMPTY, vtx, &next, 0, computeL, LL_SYMB, 1, Pslu_freeable, Llu_symbfact, VInfo, PS)) return (mem
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -