📄 #psymbfact.c#
字号:
&Llu_symbfact, &VInfo, &PS)) > 0) return (flinfo); if (fstVtx < lstVtx) VInfo.fstVtx_nextLvl = VInfo.begEndBlks_loc[2]; domain_symbfact (A, iam, lvl, szSep, iSep, jSep, sizes, fstVtxSep, fstVtx, lstVtx, Pslu_freeable, &Llu_symbfact, &VInfo, &CS, &PS, tempArray, &mark, &nextl, &nextu, &neltsZr, &neltsTotal, &nsuper_loc); PS.estimLSz = nextl; PS.estimUSz = nextu; if (nprocs_symb != 1) if((flinfo = allocPrune_lvl (&Llu_symbfact, &VInfo, &PS)) > 0) return (flinfo);#if ( PROFlevel>=1 ) t2 = SuperLU_timer_(); time_lvls[lvl] = 0.; time_lvls[lvl+1] = 0.; time_lvls[lvl + 2] = t2 - t1;#endif } } else { lstP = fstP + npNode; if (fstP <= iam && iam < lstP) {#if ( PROFlevel>=1 ) t1 = SuperLU_timer_(); #endif if (VInfo.filledSep != FILLED_SEPS) initLvl_symbfact(n, iam, fstVtx, lstVtx, Pslu_freeable, &Llu_symbfact, &VInfo, &PS, commLvls[jSep], tempArray, nextl, nextu);#if ( PROFlevel>=1 ) t2 = SuperLU_timer_(); time_lvls[3*lvl] = t2 - t1;#endif interLvl_symbfact (A, iam, lvl, szSep, fstP, lstP, iSep, jSep, sizes, fstVtxSep, &nextl, &nextu, &nsuper_loc, &mark, tempArray, &Llu_symbfact, Pslu_freeable, &CS, &VInfo, &PS, commLvls[jSep], symb_comm);#if ( PROFlevel>=1 ) t1 = SuperLU_timer_(); time_lvls[3*lvl+1] = t1 - t2;#endif if (VInfo.filledSep != FILLED_SEPS) intraLvl_symbfact (A, iam, lvl, szSep, iSep, jSep, sizes, fstVtxSep, fstP, lstP, fstVtx, lstVtx, Pslu_freeable, &Llu_symbfact, &VInfo, &CS, &PS, tempArray, &mark, &nextl, &nextu, &neltsZr, &neltsTotal, &nsuper_loc, commLvls[jSep], symb_comm);#if ( PROFlevel>=1 ) t2 = SuperLU_timer_(); time_lvls[3*lvl+2] = t2 - t1; #endif } } fstP += npNode; } iSep += szSep; szSep = szSep / 2; lvl ++; } SUPERLU_FREE( tempArray ); if ( commLvls ) SUPERLU_FREE( commLvls ); /* Set up global information and collect statistics */ if (PS.maxSzLPr < Llu_symbfact.indLsubPr) PS.maxSzLPr = Llu_symbfact.indLsubPr; if (PS.maxSzUPr < Llu_symbfact.indUsubPr) PS.maxSzUPr = Llu_symbfact.indUsubPr; Llu_symbfact.xlsub[VInfo.nvtcs_loc] = nextl; Llu_symbfact.xusub[VInfo.nvtcs_loc] = nextu; fill_rcmd = SUPERLU_MAX( nextl / nnz_ainf_loc, nextu / nnz_asup_loc) + 1; Pslu_freeable->xsup_beg_loc = intMalloc_dist (nsuper_loc+1); Pslu_freeable->xsup_end_loc = intMalloc_dist (nsuper_loc+1); if (!Pslu_freeable->xsup_beg_loc || !Pslu_freeable->xsup_end_loc) { fprintf (stderr, "Malloc fails for xsup_beg_loc, xsup_end_loc."); return (PS.allocMem); } PS.allocMem += 2 * (nsuper_loc+1) * sizeof(int_t); maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; nnzL = 0; nnzU = 0; i = 0; nsuper = 0; ind_blk = 0; for (ind_blk = 0; ind_blk < VInfo.nblks_loc; ind_blk ++) { fstVtx = VInfo.begEndBlks_loc[2 * ind_blk]; lstVtx = VInfo.begEndBlks_loc[2 * ind_blk + 1]; fstVtx_lid = LOCAL_IND( Pslu_freeable->globToLoc[fstVtx] ); nsuper = Pslu_freeable->supno_loc[fstVtx_lid]; Pslu_freeable->xsup_beg_loc[nsuper] = fstVtx; szsn = 1; if (INT_MAX - nnzL <= Llu_symbfact.xlsub[fstVtx_lid + 1] - Llu_symbfact.xlsub[fstVtx_lid]) printf ("PE[%d] ERR nnzL %d\n", iam, nnzL); if (INT_MAX - nnzU <= Llu_symbfact.xusub[fstVtx_lid + 1] - Llu_symbfact.xusub[fstVtx_lid]) printf ("PE[%d] ERR nnzU %d\n", iam, nnzU); j = Llu_symbfact.xlsub[fstVtx_lid + 1] - Llu_symbfact.xlsub[fstVtx_lid]; k = Llu_symbfact.xusub[fstVtx_lid + 1] - Llu_symbfact.xusub[fstVtx_lid]; nnzL += j; nnzU += k; for (vtx = fstVtx + 1, vtx_lid = fstVtx_lid + 1; vtx < lstVtx; vtx++, vtx_lid ++) { if (Pslu_freeable->supno_loc[vtx_lid] != nsuper) { nsuper = Pslu_freeable->supno_loc[vtx_lid]; Pslu_freeable->xsup_end_loc[nsuper-1] = vtx; Pslu_freeable->xsup_beg_loc[nsuper] = vtx; szsn = 1; j = Llu_symbfact.xlsub[vtx_lid + 1] - Llu_symbfact.xlsub[vtx_lid]; k = Llu_symbfact.xusub[vtx_lid + 1] - Llu_symbfact.xusub[vtx_lid]; } else { szsn ++; } nnzL += j - szsn + 1; nnzU += k - szsn + 1; } Pslu_freeable->xsup_end_loc[nsuper] = lstVtx; } Pslu_freeable->supno_loc[VInfo.nvtcs_loc] = nsuper_loc; Pslu_freeable->nvtcs_loc = VInfo.nvtcs_loc; /* set up xsup data */ Pslu_freeable->lsub = Llu_symbfact.lsub; Pslu_freeable->xlsub = Llu_symbfact.xlsub; Pslu_freeable->usub = Llu_symbfact.usub; Pslu_freeable->xusub = Llu_symbfact.xusub; Pslu_freeable->szLsub = Llu_symbfact.szLsub; Pslu_freeable->szUsub = Llu_symbfact.szUsub; #if ( PROFlevel>=1 ) t_symbFact_loc[1] = SuperLU_timer_() - t_symbFact_loc[1];#endif #if ( PRNTlevel>=1 ) estimate_memUsage (n, iam, symb_mem_usage, &totalMemLU, &overestimMem, Pslu_freeable, &Llu_symbfact, &VInfo, &CS, &PS); stat_loc[0] = (float) nnzL; stat_loc[1] = (float) nnzU; stat_loc[2] = (float) nsuper_loc; stat_loc[3] = (float) Pslu_freeable->xlsub[VInfo.nvtcs_loc]; stat_loc[4] = (float) Pslu_freeable->xusub[VInfo.nvtcs_loc]; stat_loc[5] = totalMemLU; stat_loc[6] = overestimMem; stat_loc[7] = totalMemLU - overestimMem; stat_loc[8] = (float) PS.maxSzBuf; stat_loc[9] = (float) PS.nDnsUpSeps; stat_loc[10] = (float) PS.nDnsCurSep; stat_loc[11] = (float) (Llu_symbfact.no_expand + Llu_symbfact.no_expcp + Llu_symbfact.no_expand_pr); stat_loc[12] = (float) Llu_symbfact.no_expand; stat_loc[13] = (float) Llu_symbfact.no_expcp; stat_loc[14] = (float) Llu_symbfact.no_expand_pr; stat_loc[15] = (float) fill_rcmd; stat_loc[16] = PS.nops; stat_loc[17] = PS.fill_pelt[1]; stat_loc[18] = PS.fill_pelt[4]; stat_loc[19] = PS.fill_pelt[0]; stat_loc[20] = PS.fill_pelt[2]; stat_loc[21] = PS.fill_pelt[3]; stat_loc[22] = PS.fill_pelt[5]; MPI_Reduce (stat_loc, stat_glob, 23, MPI_FLOAT, MPI_SUM, 0, (*symb_comm)); MPI_Reduce (&(stat_loc[5]), mem_glob, 14, MPI_FLOAT, MPI_MAX, 0, (*symb_comm)); fill_rcmd = (int_t) mem_glob[10]; PS.fill_pelt[0] = stat_glob[19]; PS.fill_pelt[1] = mem_glob[12]; PS.fill_pelt[2] = stat_glob[20]; PS.fill_pelt[3] = stat_glob[21]; PS.fill_pelt[4] = mem_glob[13]; PS.fill_pelt[5] = stat_glob[22]; if (PS.fill_pelt[2] == 0.) PS.fill_pelt[2] = 1.; if (PS.fill_pelt[5] == 0.) PS.fill_pelt[5] = 1.; #if ( PROFlevel>=1 ) MPI_Reduce (t_symbFact_loc, t_symbFact, 3, MPI_DOUBLE, MPI_MAX, 0, (*symb_comm)); MPI_Gather (time_lvls, 3 * nlvls, MPI_DOUBLE, time_lvlsT, 3 * nlvls , MPI_DOUBLE, 0, (*symb_comm));#endif stat_msgs_l[0] = (float) PS.maxsz_msgSnd; stat_msgs_l[1] = (float) PS.maxsz_msgSnd; if (PS.maxsz_msgSnd < PS.maxsz_msgCol) stat_msgs_l[1] = PS.maxsz_msgCol; stat_msgs_l[2] = PS.no_shmSnd + PS.no_msgsSnd + PS.no_shmRcvd + PS.no_msgsRcvd; stat_msgs_l[3] = stat_msgs_l[2] + PS.no_msgsCol; stat_msgs_l[4] = stat_msgs_l[2]; stat_msgs_l[5] = stat_msgs_l[3]; stat_msgs_l[6] = PS.no_msgsSnd; stat_msgs_l[7] = PS.no_msgsSnd + PS.no_msgsCol; stat_msgs_l[8] = PS.sz_msgsSnd; stat_msgs_l[9] = PS.sz_msgsSnd + PS.sz_msgsCol; MPI_Reduce (stat_msgs_l, stat_msgs_g, 4, MPI_FLOAT, MPI_MAX, 0, (*symb_comm)); MPI_Reduce (&(stat_msgs_l[4]), &(stat_msgs_g[4]), 6, MPI_FLOAT, MPI_SUM, 0, (*symb_comm)); if (stat_msgs_g[6] == 0) stat_msgs_g[6] = 1; if (stat_msgs_g[7] == 0) stat_msgs_g[7] = 1; if (!iam) { nnzL = (int_t) stat_glob[0]; nnzU = (int_t) stat_glob[1]; nsuper = (int_t) stat_glob[2]; szLGr = (int_t) stat_glob[3]; szUGr = (int_t) stat_glob[4]; printf("\tMax szBlk %ld\n", VInfo.maxSzBlk);#if ( PRNTlevel>=2 ) printf("\t relax_gen %.2f, relax_curSep %.2f, relax_seps %.2f\n", PS.relax_gen, PS.relax_curSep, PS.relax_seps);#endif printf("\tParameters: fill mem %ld fill pelt %ld\n", sp_ienv_dist(6), PS.fill_par); printf("\tNonzeros in L %ld\n", nnzL); printf("\tNonzeros in U %ld\n", nnzU); printf("\tnonzeros in L+U %ld\n", nnzL + nnzU); printf("\tNo of supers %ld\n", nsuper); printf("\tSize of G(L) %ld\n", szLGr); printf("\tSize of G(U) %ld\n", szUGr); printf("\tSize of G(L+U) %ld\n", szLGr+szUGr); printf("\tParSYMBfact (MB) :\tL\\U MAX %.2f\tAVG %.2f\n", mem_glob[0]*1e-6, stat_glob[5]/nprocs_symb*1e-6);#if ( PRNTlevel>=2 ) printf("\tRL overestim (MB):\tL\\U MAX %.2f\tAVG %.2f\n", mem_glob[1]*1e-6, stat_glob[6]/nprocs_symb*1e-6); printf("\tsnd/rcv buffers (MB):\tL\\U MAX %.2f\tAVG %.2f\n", mem_glob[3]*1e-6, stat_glob[8]/nprocs_symb*1e-6); printf("\tSYMBfact 2*n+4*nvtcs_loc+2*maxNvtcsNds_loc:\tL\\U %.2f\n", (float) (2 * n * sizeof(int_t)) *1e-6); printf("\tint_t %d, int %d, long int %d, short %d, float %d, double %d\n", sizeof(int_t), sizeof(int), sizeof(long int), sizeof(short), sizeof(float), sizeof(double)); printf("\tDNS ALLSEPS:\t MAX %d\tAVG %.2f\n", (int_t) mem_glob[4], stat_glob[9]/nprocs_symb); printf("\tDNS CURSEP:\t MAX %d\tAVG %.2f\n\n", (int_t) mem_glob[5], stat_glob[10]/nprocs_symb); printf("\t MAX FILL Mem(L+U) / Mem(A) per processor %ld\n", fill_rcmd); printf("\t Per elt MAX %ld AVG %ld\n", (int_t) PS.fill_pelt[4], (int_t)(PS.fill_pelt[3]/PS.fill_pelt[5])); printf("\t Per elt RL MAX %ld AVG %ld\n", (int_t) PS.fill_pelt[1], (int_t)(PS.fill_pelt[0]/PS.fill_pelt[2])); printf("\tM Nops:\t MAX %.2f\tAVG %.2f\n", mem_glob[11]*1e-6, (stat_glob[16]/nprocs_symb)*1e-6); printf("\tEXPANSIONS: MAX/AVG\n"); printf("\tTOTAL: %d / %.2f\n", (int_t) mem_glob[6], stat_glob[11]/nprocs_symb); printf("\tREALLOC: %.f / %.2f RL_CP %.f / %.2f PR_CP %.f / %.2f\n", mem_glob[7], stat_glob[12]/nprocs_symb, mem_glob[8], stat_glob[13]/nprocs_symb, mem_glob[9], stat_glob[14]/nprocs_symb); printf ("\n\tDATA MSGS noMsgs*10^3 %.3f/%.3f size (MB) %.3f/%.3f \n", stat_msgs_g[2]*1e-3, stat_msgs_g[4]/nprocs_symb*1e-3, stat_msgs_g[0]*1e-6, stat_msgs_g[8] / stat_msgs_g[6]*1e-6); printf ("\tTOTAL MSGS noMsgs*10^3 %.3f/%.3f size (MB) %.3f/%.3f \n", stat_msgs_g[3]*1e-3, stat_msgs_g[5]/nprocs_symb*1e-3, stat_msgs_g[1]*1e-6, stat_msgs_g[9]/stat_msgs_g[7]*1e-6);#endif #if ( PROFlevel>=1 ) printf("Distribute matrix time = %8.3f\n", t_symbFact[0]); printf("Count vertices time = %8.3f\n", t_symbFact[2]); printf("Symbfact DIST time = %8.3f\n", t_symbFact[1]); printf("\nLvl\t Time\t Init\t Inter\t Intra\n"); time_lvlsg[0] = 0.; for (i = 0; i < nlvls; i++) { for (j = 1; j < 9; j++) time_lvlsg[j] = 0.; for (p = 0; p < nprocs_symb; p++) { k = p * 3 * nlvls; t = time_lvlsT[i*3+k] + time_lvlsT[i*3+k+1] + time_lvlsT[i*3+k+2]; if (t > time_lvlsg[1]) { time_lvlsg[1] = t; j = p; } time_lvlsg[2] += t; if (time_lvlsT[i*3+k] > time_lvlsg[3]) time_lvlsg[3] = time_lvlsT[i*3+k]; time_lvlsg[4] += time_lvlsT[i*3+k]; if (time_lvlsT[i*3+k+1] > time_lvlsg[5]) time_lvlsg[5] = time_lvlsT[i*3+k+1]; time_lvlsg[6] += time_lvlsT[i*3+k+1]; if (time_lvlsT[i*3+k+2] > time_lvlsg[7]) time_lvlsg[7] = time_lvlsT[i*3+k+2]; time_lvlsg[8] += time_lvlsT[i*3+k+2]; } time_lvlsg[0] += time_lvlsg[1]; printf ("%d \t%.3f/%.3f\t%.3f/%.3f\t%.3f/%.3f\t%.3f/%.3f\n", i, time_lvlsg[1], time_lvlsg[2] / nprocs_symb, time_lvlsg[3], time_lvlsg[4] / nprocs_symb, time_lvlsg[5], time_lvlsg[6] /nprocs_symb, time_lvlsg[7], time_lvlsg[8] / nprocs_symb); } printf("\t %8.3f \n", time_lvlsg[0]); #endif }#endif#if ( PROFlevel>=1 ) SUPERLU_FREE (time_lvls); SUPERLU_FREE (time_lvlsT);#endif symbfact_free (iam, nprocs_symb, &Llu_symbfact, &VInfo, &CS); } /* if (iam < nprocs_symb) */ else { /* update Pslu_freeable before returning */ Pslu_freeable->nvtcs_loc = 0; Pslu_freeable->xlsub = NULL; Pslu_freeable->lsub = NULL; Pslu_freeable->xusub = NULL; Pslu_freeable->usub = NULL; Pslu_freeable->supno_loc = NULL; Pslu_freeable->xsup_beg_loc = NULL; Pslu_freeable->xsup_end_loc = NULL; SUPERLU_FREE( tempArray ); PS.allocMem -= n * sizeof(int_t); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit psymbfact()");#endif return (- PS.allocMem);} /* SYMBFACT_DIST */static int_tinitParmsAndStats( psymbfact_stat_t *PS /* Output -statistics*/)/* * Purpose * ======= * Initialize relaxation parameters and statistics variables */{ int i; PS->nDnsCurSep = 0; PS->nDnsUpSeps = 0; PS->relax_gen = 1.0; PS->relax_curSep = 1.0; PS->relax_seps = 1.0; PS->fill_par = sp_ienv_dist(6); PS->nops = 0.; PS->no_shmSnd = 0.; PS->no_msgsSnd = 0.; PS->maxsz_msgSnd = 0; PS->sz_msgsSnd = 0.; PS->no_shmRcvd = 0.; PS->no_msgsRcvd = 0.; PS->maxsz_msgRcvd = 0; PS->sz_msgsRcvd = 0.; PS->no_msgsCol = 0.; PS->maxsz_msgCol = 0; PS->sz_msgsCol = 0.; for (i = 0; i < 6; i++) PS->fill_pelt[i] = 0.; PS->estimUSz = 0; PS->estimLSz = 0; PS->maxSzLPr = 0; PS->maxSzUPr = 0; PS->maxSzBuf = 0; PS->szDnsSep = 0; PS->allocMem = 0;}static floatcntsVtcs ( int_t n, /* Input - order of the input matrix */ int iam, /* Input - my processor number */ int nprocs_symb, /* Input - no of processors for symbolic factorization */ Pslu_freeable_t *Pslu_freeable, /* Input -globToLoc and maxNvtcsPProc */ Llu_symbfact_t *Llu_symbfact, /* Input/Output -local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input - local info on vertices distribution */ int_t *tempArray, /* Input - temporary storage */ int_t *fstVtxSep, /* Input - first vertex of each node in the tree */ int_t *sizes, /* Input - sizes of each node in the tree */ psymbfact_stat_t *PS, /* Input/Output -statistics */ MPI_Comm *commLvls )/* * Purpose
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -