⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 #psymbfact.c#

📁 SuperLU 2.2版本。对大型、稀疏、非对称的线性系统的直接求解
💻 C#
📖 第 1 页 / 共 5 页
字号:
					     &Llu_symbfact, &VInfo, &PS)) > 0)	      return (flinfo);	    if (fstVtx < lstVtx)	      VInfo.fstVtx_nextLvl = VInfo.begEndBlks_loc[2];	    	    domain_symbfact 	      (A, iam, lvl, szSep, iSep, jSep, sizes, fstVtxSep, fstVtx, lstVtx, 	       Pslu_freeable, &Llu_symbfact, &VInfo, &CS, &PS, tempArray, 	       &mark, &nextl, &nextu, &neltsZr, &neltsTotal, &nsuper_loc);	    	    PS.estimLSz = nextl;	    PS.estimUSz = nextu;	    if (nprocs_symb != 1) 	      if((flinfo = allocPrune_lvl (&Llu_symbfact, &VInfo, &PS)) > 0)		return (flinfo);#if ( PROFlevel>=1 )	    t2 = SuperLU_timer_();	    time_lvls[lvl] = 0.; time_lvls[lvl+1] = 0.;	    time_lvls[lvl + 2] = t2 - t1;#endif	  }	}	else {	  lstP = fstP + npNode;	  if (fstP <= iam && iam < lstP) {#if ( PROFlevel>=1 )	    t1 = SuperLU_timer_();	  #endif	    if (VInfo.filledSep != FILLED_SEPS)	      initLvl_symbfact(n, iam, fstVtx, lstVtx,			       Pslu_freeable, &Llu_symbfact, &VInfo, &PS, commLvls[jSep], 			       tempArray, nextl, nextu);#if ( PROFlevel>=1 )	    t2 = SuperLU_timer_();	    time_lvls[3*lvl] = t2 - t1;#endif	    interLvl_symbfact (A, iam, lvl, szSep, fstP, lstP,			       iSep, jSep, sizes, fstVtxSep, 			       &nextl, &nextu, &nsuper_loc, &mark, tempArray,			       &Llu_symbfact, Pslu_freeable, &CS, &VInfo, &PS,			       commLvls[jSep], symb_comm);#if ( PROFlevel>=1 )	    t1 = SuperLU_timer_();	    time_lvls[3*lvl+1] = t1 - t2;#endif	    if (VInfo.filledSep != FILLED_SEPS)	      intraLvl_symbfact 		(A, iam, lvl, szSep, iSep, jSep, sizes, fstVtxSep, fstP, lstP, 		 fstVtx, lstVtx, Pslu_freeable, &Llu_symbfact, &VInfo, &CS, &PS,		 tempArray, &mark, &nextl, &nextu, &neltsZr, &neltsTotal, 		 &nsuper_loc, commLvls[jSep], symb_comm);#if ( PROFlevel>=1 )	    t2 = SuperLU_timer_();	    time_lvls[3*lvl+2] = t2 - t1;		 #endif	  }	}	fstP += npNode;      }      iSep += szSep;      szSep = szSep / 2;      lvl ++;    }      SUPERLU_FREE( tempArray );    if ( commLvls ) SUPERLU_FREE( commLvls );        /* Set up global information and collect statistics */    if (PS.maxSzLPr < Llu_symbfact.indLsubPr)      PS.maxSzLPr = Llu_symbfact.indLsubPr;    if (PS.maxSzUPr < Llu_symbfact.indUsubPr)      PS.maxSzUPr = Llu_symbfact.indUsubPr;        Llu_symbfact.xlsub[VInfo.nvtcs_loc] = nextl;    Llu_symbfact.xusub[VInfo.nvtcs_loc] = nextu;    fill_rcmd = SUPERLU_MAX( nextl / nnz_ainf_loc, nextu / nnz_asup_loc) + 1;    Pslu_freeable->xsup_beg_loc = intMalloc_dist (nsuper_loc+1);    Pslu_freeable->xsup_end_loc = intMalloc_dist (nsuper_loc+1);    if (!Pslu_freeable->xsup_beg_loc || !Pslu_freeable->xsup_end_loc) {      fprintf (stderr, "Malloc fails for xsup_beg_loc, xsup_end_loc.");      return (PS.allocMem);    }    PS.allocMem += 2 * (nsuper_loc+1) * sizeof(int_t);    maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc;    nnzL = 0; nnzU = 0;        i = 0;    nsuper = 0;    ind_blk = 0;    for (ind_blk = 0; ind_blk < VInfo.nblks_loc; ind_blk ++) {      fstVtx = VInfo.begEndBlks_loc[2 * ind_blk];      lstVtx = VInfo.begEndBlks_loc[2 * ind_blk + 1];      fstVtx_lid = LOCAL_IND( Pslu_freeable->globToLoc[fstVtx] );      nsuper = Pslu_freeable->supno_loc[fstVtx_lid];      Pslu_freeable->xsup_beg_loc[nsuper] = fstVtx;      szsn = 1;      if (INT_MAX - nnzL <= Llu_symbfact.xlsub[fstVtx_lid + 1] - 	  Llu_symbfact.xlsub[fstVtx_lid])	printf ("PE[%d] ERR nnzL %d\n", iam, nnzL);       if (INT_MAX - nnzU <= Llu_symbfact.xusub[fstVtx_lid + 1] - 	  Llu_symbfact.xusub[fstVtx_lid])	printf ("PE[%d] ERR nnzU %d\n", iam, nnzU);            j = Llu_symbfact.xlsub[fstVtx_lid + 1] - Llu_symbfact.xlsub[fstVtx_lid];      k = Llu_symbfact.xusub[fstVtx_lid + 1] - Llu_symbfact.xusub[fstVtx_lid];      nnzL += j;      nnzU += k;      for (vtx = fstVtx + 1, vtx_lid = fstVtx_lid + 1; 	   vtx < lstVtx; vtx++, vtx_lid ++) {	if (Pslu_freeable->supno_loc[vtx_lid] != nsuper) {	  nsuper = Pslu_freeable->supno_loc[vtx_lid];	  Pslu_freeable->xsup_end_loc[nsuper-1] = vtx;	  Pslu_freeable->xsup_beg_loc[nsuper] = vtx;	  szsn = 1;	  j = Llu_symbfact.xlsub[vtx_lid + 1] - Llu_symbfact.xlsub[vtx_lid];	  k = Llu_symbfact.xusub[vtx_lid + 1] - Llu_symbfact.xusub[vtx_lid];	}	else {	  szsn ++;	}	nnzL += j - szsn + 1;	nnzU += k - szsn + 1;      }      Pslu_freeable->xsup_end_loc[nsuper] = lstVtx;    }    Pslu_freeable->supno_loc[VInfo.nvtcs_loc] = nsuper_loc;    Pslu_freeable->nvtcs_loc = VInfo.nvtcs_loc;     /* set up xsup data */    Pslu_freeable->lsub = Llu_symbfact.lsub;    Pslu_freeable->xlsub = Llu_symbfact.xlsub;    Pslu_freeable->usub = Llu_symbfact.usub;    Pslu_freeable->xusub = Llu_symbfact.xusub;    Pslu_freeable->szLsub = Llu_symbfact.szLsub;    Pslu_freeable->szUsub = Llu_symbfact.szUsub;    #if ( PROFlevel>=1 )    t_symbFact_loc[1] = SuperLU_timer_() - t_symbFact_loc[1];#endif  #if ( PRNTlevel>=1 )    estimate_memUsage (n, iam,  symb_mem_usage, 		       &totalMemLU, &overestimMem, 		       Pslu_freeable, &Llu_symbfact, &VInfo, &CS, &PS);    stat_loc[0] = (float) nnzL;    stat_loc[1] = (float) nnzU;      stat_loc[2] = (float) nsuper_loc;    stat_loc[3] = (float) Pslu_freeable->xlsub[VInfo.nvtcs_loc];    stat_loc[4] = (float) Pslu_freeable->xusub[VInfo.nvtcs_loc];    stat_loc[5] = totalMemLU;    stat_loc[6] = overestimMem;    stat_loc[7] = totalMemLU - overestimMem;    stat_loc[8] = (float) PS.maxSzBuf;    stat_loc[9] = (float) PS.nDnsUpSeps;    stat_loc[10] = (float) PS.nDnsCurSep;    stat_loc[11] = (float) (Llu_symbfact.no_expand + Llu_symbfact.no_expcp +			    Llu_symbfact.no_expand_pr);    stat_loc[12] = (float) Llu_symbfact.no_expand;    stat_loc[13] = (float) Llu_symbfact.no_expcp;    stat_loc[14] = (float) Llu_symbfact.no_expand_pr;    stat_loc[15] = (float) fill_rcmd;    stat_loc[16] = PS.nops;    stat_loc[17] = PS.fill_pelt[1];    stat_loc[18] = PS.fill_pelt[4];    stat_loc[19] = PS.fill_pelt[0];    stat_loc[20] = PS.fill_pelt[2];    stat_loc[21] = PS.fill_pelt[3];    stat_loc[22] = PS.fill_pelt[5];        MPI_Reduce (stat_loc, stat_glob, 23, MPI_FLOAT, 		MPI_SUM, 0, (*symb_comm));    MPI_Reduce (&(stat_loc[5]), mem_glob, 14, MPI_FLOAT, 		MPI_MAX, 0, (*symb_comm));    fill_rcmd = (int_t) mem_glob[10];    PS.fill_pelt[0] = stat_glob[19];    PS.fill_pelt[1] = mem_glob[12];    PS.fill_pelt[2] = stat_glob[20];    PS.fill_pelt[3] = stat_glob[21];    PS.fill_pelt[4] = mem_glob[13];    PS.fill_pelt[5] = stat_glob[22];    if (PS.fill_pelt[2] == 0.) PS.fill_pelt[2] = 1.;    if (PS.fill_pelt[5] == 0.) PS.fill_pelt[5] = 1.;    #if ( PROFlevel>=1 )    MPI_Reduce (t_symbFact_loc, t_symbFact, 3, MPI_DOUBLE,		MPI_MAX, 0, (*symb_comm));    MPI_Gather (time_lvls, 3 * nlvls, MPI_DOUBLE,		time_lvlsT, 3 * nlvls , MPI_DOUBLE,		0, (*symb_comm));#endif        stat_msgs_l[0] = (float) PS.maxsz_msgSnd;    stat_msgs_l[1] = (float) PS.maxsz_msgSnd;    if (PS.maxsz_msgSnd < PS.maxsz_msgCol)      stat_msgs_l[1] = PS.maxsz_msgCol;    stat_msgs_l[2] = PS.no_shmSnd + PS.no_msgsSnd +       PS.no_shmRcvd + PS.no_msgsRcvd;    stat_msgs_l[3] = stat_msgs_l[2] + PS.no_msgsCol;    stat_msgs_l[4] = stat_msgs_l[2];    stat_msgs_l[5] = stat_msgs_l[3];     stat_msgs_l[6] = PS.no_msgsSnd;    stat_msgs_l[7] = PS.no_msgsSnd + PS.no_msgsCol;      stat_msgs_l[8] = PS.sz_msgsSnd;    stat_msgs_l[9] = PS.sz_msgsSnd + PS.sz_msgsCol;    MPI_Reduce (stat_msgs_l, stat_msgs_g, 4, MPI_FLOAT,		MPI_MAX, 0, (*symb_comm));    MPI_Reduce (&(stat_msgs_l[4]), &(stat_msgs_g[4]), 6, MPI_FLOAT,		MPI_SUM, 0, (*symb_comm));    if (stat_msgs_g[6] == 0) stat_msgs_g[6] = 1;    if (stat_msgs_g[7] == 0) stat_msgs_g[7] = 1;        if (!iam) {      nnzL   = (int_t) stat_glob[0]; nnzU  = (int_t) stat_glob[1];      nsuper = (int_t) stat_glob[2];      szLGr  = (int_t) stat_glob[3]; szUGr = (int_t) stat_glob[4];      printf("\tMax szBlk          %ld\n", VInfo.maxSzBlk);#if ( PRNTlevel>=2 )      printf("\t relax_gen %.2f, relax_curSep %.2f, relax_seps %.2f\n",	     PS.relax_gen, PS.relax_curSep, PS.relax_seps);#endif      printf("\tParameters: fill mem %ld fill pelt %ld\n",	     sp_ienv_dist(6), PS.fill_par);      printf("\tNonzeros in L       %ld\n", nnzL);      printf("\tNonzeros in U       %ld\n", nnzU);      printf("\tnonzeros in L+U     %ld\n", 	     nnzL + nnzU);      printf("\tNo of supers   %ld\n", nsuper);      printf("\tSize of G(L)   %ld\n", szLGr);      printf("\tSize of G(U)   %ld\n", szUGr);      printf("\tSize of G(L+U) %ld\n", szLGr+szUGr);      printf("\tParSYMBfact (MB)      :\tL\\U MAX %.2f\tAVG %.2f\n",	     mem_glob[0]*1e-6, 	     stat_glob[5]/nprocs_symb*1e-6);#if ( PRNTlevel>=2 )      printf("\tRL overestim (MB):\tL\\U MAX %.2f\tAVG %.2f\n",	     mem_glob[1]*1e-6, 	     stat_glob[6]/nprocs_symb*1e-6);      printf("\tsnd/rcv buffers (MB):\tL\\U MAX %.2f\tAVG %.2f\n",	     mem_glob[3]*1e-6, 	     stat_glob[8]/nprocs_symb*1e-6);      printf("\tSYMBfact 2*n+4*nvtcs_loc+2*maxNvtcsNds_loc:\tL\\U %.2f\n",	     (float) (2 * n * sizeof(int_t)) *1e-6);      printf("\tint_t %d, int %d, long int %d, short %d, float %d, double %d\n", 	     sizeof(int_t), sizeof(int),  sizeof(long int), sizeof(short), sizeof(float),	     sizeof(double));      printf("\tDNS ALLSEPS:\t MAX %d\tAVG %.2f\n",	     (int_t) mem_glob[4], stat_glob[9]/nprocs_symb);      printf("\tDNS CURSEP:\t MAX %d\tAVG %.2f\n\n",	     (int_t) mem_glob[5], stat_glob[10]/nprocs_symb);      printf("\t MAX FILL Mem(L+U) / Mem(A) per processor %ld\n", fill_rcmd);          printf("\t      Per elt MAX %ld AVG %ld\n", 	     (int_t) PS.fill_pelt[4], (int_t)(PS.fill_pelt[3]/PS.fill_pelt[5]));      printf("\t      Per elt RL MAX %ld AVG %ld\n",	     (int_t) PS.fill_pelt[1], (int_t)(PS.fill_pelt[0]/PS.fill_pelt[2]));      printf("\tM Nops:\t MAX %.2f\tAVG %.2f\n",	     mem_glob[11]*1e-6, (stat_glob[16]/nprocs_symb)*1e-6);                  printf("\tEXPANSIONS: MAX/AVG\n");      printf("\tTOTAL: %d / %.2f\n",	     (int_t) mem_glob[6], stat_glob[11]/nprocs_symb);      printf("\tREALLOC: %.f / %.2f RL_CP %.f / %.2f PR_CP %.f / %.2f\n",	     mem_glob[7], stat_glob[12]/nprocs_symb,	     mem_glob[8], stat_glob[13]/nprocs_symb,	     mem_glob[9], stat_glob[14]/nprocs_symb);            printf ("\n\tDATA MSGS  noMsgs*10^3 %.3f/%.3f size (MB) %.3f/%.3f \n",	      stat_msgs_g[2]*1e-3, stat_msgs_g[4]/nprocs_symb*1e-3,	      stat_msgs_g[0]*1e-6, stat_msgs_g[8] / stat_msgs_g[6]*1e-6);      printf ("\tTOTAL MSGS noMsgs*10^3 %.3f/%.3f size (MB) %.3f/%.3f \n",	      stat_msgs_g[3]*1e-3, stat_msgs_g[5]/nprocs_symb*1e-3,	      stat_msgs_g[1]*1e-6, stat_msgs_g[9]/stat_msgs_g[7]*1e-6);#endif      #if ( PROFlevel>=1 )      printf("Distribute matrix time = %8.3f\n", t_symbFact[0]);      printf("Count vertices time    = %8.3f\n", t_symbFact[2]);      printf("Symbfact DIST time     = %8.3f\n", t_symbFact[1]);            printf("\nLvl\t    Time\t    Init\t   Inter\t    Intra\n");      time_lvlsg[0] = 0.;      for (i = 0; i < nlvls; i++) {	for (j = 1; j < 9; j++)	  time_lvlsg[j] = 0.;	for (p = 0; p < nprocs_symb; p++) {	  k = p * 3 * nlvls;	  t = time_lvlsT[i*3+k] + time_lvlsT[i*3+k+1] + time_lvlsT[i*3+k+2];	  if (t > time_lvlsg[1]) {	    time_lvlsg[1] = t; j = p;	  }	  time_lvlsg[2] += t;	  if (time_lvlsT[i*3+k] > time_lvlsg[3])	    time_lvlsg[3] = time_lvlsT[i*3+k];	  time_lvlsg[4] += time_lvlsT[i*3+k];	  if (time_lvlsT[i*3+k+1] > time_lvlsg[5])	    time_lvlsg[5] = time_lvlsT[i*3+k+1];	  time_lvlsg[6] += time_lvlsT[i*3+k+1];	  if (time_lvlsT[i*3+k+2] > time_lvlsg[7])	    time_lvlsg[7] = time_lvlsT[i*3+k+2];	  time_lvlsg[8] += time_lvlsT[i*3+k+2];	}	time_lvlsg[0] += time_lvlsg[1];	printf ("%d \t%.3f/%.3f\t%.3f/%.3f\t%.3f/%.3f\t%.3f/%.3f\n", i,		time_lvlsg[1], time_lvlsg[2] / nprocs_symb,		time_lvlsg[3], time_lvlsg[4] / nprocs_symb,		time_lvlsg[5], time_lvlsg[6] /nprocs_symb,		time_lvlsg[7], time_lvlsg[8] / nprocs_symb);       }      printf("\t   %8.3f \n", time_lvlsg[0]);    #endif    }#endif#if ( PROFlevel>=1 )    SUPERLU_FREE (time_lvls);    SUPERLU_FREE (time_lvlsT);#endif    symbfact_free (iam, nprocs_symb, &Llu_symbfact, &VInfo, &CS);  } /* if (iam < nprocs_symb) */    else {    /* update Pslu_freeable before returning */    Pslu_freeable->nvtcs_loc = 0;     Pslu_freeable->xlsub = NULL; Pslu_freeable->lsub = NULL;     Pslu_freeable->xusub = NULL; Pslu_freeable->usub = NULL;     Pslu_freeable->supno_loc = NULL;    Pslu_freeable->xsup_beg_loc = NULL;         Pslu_freeable->xsup_end_loc = NULL;        SUPERLU_FREE( tempArray );    PS.allocMem -= n * sizeof(int_t);  }  #if ( DEBUGlevel>=1 )  CHECK_MALLOC(iam, "Exit psymbfact()");#endif  return (- PS.allocMem);} /* SYMBFACT_DIST */static int_tinitParmsAndStats( psymbfact_stat_t *PS /* Output -statistics*/)/*  * Purpose * ======= * Initialize relaxation parameters and statistics variables */{  int  i;  PS->nDnsCurSep = 0;  PS->nDnsUpSeps = 0;    PS->relax_gen = 1.0;  PS->relax_curSep = 1.0;  PS->relax_seps = 1.0;  PS->fill_par = sp_ienv_dist(6);  PS->nops = 0.;  PS->no_shmSnd = 0.;  PS->no_msgsSnd = 0.;  PS->maxsz_msgSnd = 0;  PS->sz_msgsSnd = 0.;  PS->no_shmRcvd = 0.;  PS->no_msgsRcvd = 0.;  PS->maxsz_msgRcvd = 0;  PS->sz_msgsRcvd = 0.;  PS->no_msgsCol = 0.;  PS->maxsz_msgCol = 0;  PS->sz_msgsCol = 0.;  for (i = 0; i < 6; i++)    PS->fill_pelt[i] = 0.;  PS->estimUSz = 0;  PS->estimLSz = 0;  PS->maxSzLPr = 0;  PS->maxSzUPr = 0;  PS->maxSzBuf = 0;  PS->szDnsSep = 0;    PS->allocMem = 0;}static floatcntsVtcs ( int_t  n,           /* Input - order of the input matrix */ int    iam,         /* Input - my processor number */ int    nprocs_symb, /* Input - no of processors for symbolic factorization */ Pslu_freeable_t *Pslu_freeable, /* Input -globToLoc and maxNvtcsPProc */ Llu_symbfact_t  *Llu_symbfact, /* Input/Output -local L, U data structures */ vtcsInfo_symbfact_t *VInfo,  /* Input - local info on vertices distribution */ int_t            *tempArray, /* Input - temporary storage */ int_t            *fstVtxSep, /* Input - first vertex of each node in the tree */ int_t            *sizes,     /* Input - sizes of each node in the tree */ psymbfact_stat_t *PS,  /* Input/Output -statistics */ MPI_Comm         *commLvls )/*  * Purpose

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -