pzsymbfact_distdata.c

来自「SuperLU 2.2版本。对大型、稀疏、非对称的线性系统的直接求解」· C语言代码 · 共 1,943 行 · 第 1/5 页
1,943 行
/* * -- Parallel symbolic factorization auxialiary routine (version 2.2) -- * -- Distributes the data from parallel symbolic factorization  * -- to numeric factorization * INRIA France -  July 1, 2004 * Laura Grigori * * November 1, 2007 * Feburary 20, 2008 *//* limits.h:  the largest positive integer (INT_MAX) */#include <limits.h>#include "superlu_zdefs.h"#include "psymbfact.h"static floatdist_symbLU (int_t n, Pslu_freeable_t *Pslu_freeable, 	     Glu_persist_t *Glu_persist, 	     int_t **p_xlsub, int_t **p_lsub, int_t **p_xusub, int_t **p_usub,	     gridinfo_t *grid	     )/* * Purpose * ======= *  * Redistribute the symbolic structure of L and U from the distribution * used in the parallel symbolic factorization step to the distdibution * used in the parallel numeric factorization step.  On exit, the L and U * structure for the 2D distribution used in the numeric factorization step is * stored in p_xlsub, p_lsub, p_xusub, p_usub.  The global supernodal  * information is also computed and it is stored in Glu_persist->supno * and Glu_persist->xsup. * * This routine allocates memory for storing the structure of L and U * and the supernodes information.  This represents the arrays: * p_xlsub, p_lsub, p_xusub, p_usub, * Glu_persist->supno,  Glu_persist->xsup. * * This routine also deallocates memory allocated during symbolic  * factorization routine.  That is, the folloing arrays are freed: * Pslu_freeable->xlsub,  Pslu_freeable->lsub,  * Pslu_freeable->xusub, Pslu_freeable->usub,  * Pslu_freeable->globToLoc, Pslu_freeable->supno_loc,  * Pslu_freeable->xsup_beg_loc, Pslu_freeable->xsup_end_loc. * * Arguments * ========= * * n      (Input) int_t *        Order of the input matrix * Pslu_freeable  (Input) Pslu_freeable_t * *        Local L and U structure,  *        global to local indexing information. *  * Glu_persist (Output) Glu_persist_t * *        Stores on output the information on supernodes mapping. *  * p_xlsub (Output) int_t ** *         Pointer to structure of L distributed on a 2D grid  *         of processors, stored by columns. *  * p_lsub  (Output) int_t ** *         Structure of L distributed on a 2D grid of processors,  *         stored by columns. * * p_xusub (Output) int_t ** *         Pointer to structure of U distributed on a 2D grid  *         of processors, stored by rows. *  * p_usub  (Output) int_t ** *         Structure of U distributed on a 2D grid of processors,  *         stored by rows. *  * grid   (Input) gridinfo_t* *        The 2D process mesh. * * Return value * ============ *   < 0, number of bytes allocated on return from the dist_symbLU. *   > 0, number of bytes allocated in this routine when out of memory. *        (an approximation). */{  int   iam, nprocs, pc, pr, p, np, p_diag;  int_t *nnzToSend, *nnzToRecv, *nnzToSend_l, *nnzToSend_u,     *tmp_ptrToSend, *mem;  int_t *nnzToRecv_l, *nnzToRecv_u;  int_t *send_1, *send_2, nsend_1, nsend_2;  int_t *ptrToSend, *ptrToRecv, sendL, sendU, *snd_luind, *rcv_luind;  int_t nsupers, nsupers_i, nsupers_j;  int *nvtcs, *intBuf1, *intBuf2, *intBuf3, *intBuf4, intNvtcs_loc;  int_t maxszsn, maxNvtcsPProc;  int_t *xsup_n, *supno_n, *temp, *xsup_beg_s, *xsup_end_s, *supno_s;  int_t *xlsub_s, *lsub_s, *xusub_s, *usub_s;  int_t *xlsub_n, *lsub_n, *xusub_n, *usub_n;  int_t *xsub_s, *sub_s, *xsub_n, *sub_n;  int_t *globToLoc, nvtcs_loc;  int_t SendCnt_l, SendCnt_u, nnz_loc_l, nnz_loc_u, nnz_loc,    RecvCnt_l, RecvCnt_u, ind_loc;  int_t i, k, j, gb, szsn,  gb_n, gb_s, gb_l, fst_s, fst_s_l, lst_s, i_loc;  int_t nelts, isize;  float memAux;  /* Memory used during this routine and freed on return */  float memRet; /* Memory allocated and not freed on return */  int_t iword, dword;    /* ------------------------------------------------------------     INITIALIZATION.     ------------------------------------------------------------*/  iam = grid->iam;#if ( DEBUGlevel>=1 )  CHECK_MALLOC(iam, "Enter dist_symbLU()");#endif  nprocs = (int) grid->nprow * grid->npcol;  xlsub_s = Pslu_freeable->xlsub; lsub_s = Pslu_freeable->lsub;  xusub_s = Pslu_freeable->xusub; usub_s = Pslu_freeable->usub;  maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc;  globToLoc     = Pslu_freeable->globToLoc;  nvtcs_loc     = Pslu_freeable->nvtcs_loc;  xsup_beg_s    = Pslu_freeable->xsup_beg_loc;  xsup_end_s    = Pslu_freeable->xsup_end_loc;  supno_s       = Pslu_freeable->supno_loc;  rcv_luind     = NULL;  iword = sizeof(int_t);  dword = sizeof(doublecomplex);  memAux = 0.; memRet = 0.;    mem           = intCalloc_dist(12 * nprocs);  if (!mem)    return (ERROR_RET);  memAux     = (float) (12 * nprocs * sizeof(int_t));  nnzToRecv     = mem;  nnzToSend     = nnzToRecv + 2*nprocs;  nnzToSend_l   = nnzToSend + 2 * nprocs;  nnzToSend_u   = nnzToSend_l + nprocs;  send_1        = nnzToSend_u + nprocs;  send_2        = send_1 + nprocs;  tmp_ptrToSend = send_2 + nprocs;  nnzToRecv_l   = tmp_ptrToSend + nprocs;  nnzToRecv_u   = nnzToRecv_l + nprocs;    ptrToSend = nnzToSend;  ptrToRecv = nnzToSend + nprocs;  nvtcs = (int *) SUPERLU_MALLOC(5 * nprocs * sizeof(int));  intBuf1 = nvtcs + nprocs;  intBuf2 = nvtcs + 2 * nprocs;  intBuf3 = nvtcs + 3 * nprocs;  intBuf4 = nvtcs + 4 * nprocs;  memAux += 5 * nprocs * sizeof(int);  maxszsn   = sp_ienv_dist(3);    /* Allocate space for storing Glu_persist_n. */  if ( !(supno_n = intMalloc_dist(n+1)) ) {    fprintf (stderr, "Malloc fails for supno_n[].");    return (memAux);  }  memRet += (float) ((n+1) * sizeof(int_t));  /* ------------------------------------------------------------     DETERMINE SUPERNODES FOR NUMERICAL FACTORIZATION     ------------------------------------------------------------*/    if (nvtcs_loc > INT_MAX)    ABORT("ERROR in dist_symbLU nvtcs_loc > INT_MAX\n");  intNvtcs_loc = (int) nvtcs_loc;  MPI_Gather (&intNvtcs_loc, 1, MPI_INT, nvtcs, 1, MPI_INT,	      0, grid->comm);  if (!iam) {    /* set ptrToRecv to point to the beginning of the data for       each processor */    for (k = 0, p = 0; p < nprocs; p++) {      ptrToRecv[p] = k;      k += nvtcs[p];    }  }    if (nprocs > 1) {    temp = NULL;    if (!iam ) {      if ( !(temp = intMalloc_dist (n+1)) ) {	fprintf (stderr, "Malloc fails for temp[].");	return (memAux + memRet);      }      memAux += (float) (n+1) * iword;    }#if defined (_LONGINT)    for (p=0; p<nprocs; p++) {      if (ptrToRecv[p] > INT_MAX)	ABORT("ERROR in dist_symbLU size to send > INT_MAX\n");      intBuf1[p] = (int) ptrToRecv[p];    }#else  /* Default */    intBuf1 = ptrToRecv;#endif    MPI_Gatherv (supno_s, (int) nvtcs_loc, mpi_int_t, 		 temp, nvtcs, intBuf1, mpi_int_t, 0, grid->comm);  }  else    temp = supno_s;  if (!iam) {    nsupers = 0;    p = (int) OWNER( globToLoc[0] );    gb = temp[ptrToRecv[p]];    supno_n[0] = nsupers;    ptrToRecv[p] ++;    szsn = 1;    for (j = 1; j < n; j ++) {      if (p != (int) OWNER( globToLoc[j] ) || szsn >= maxszsn || gb != temp[ptrToRecv[p]]) {	nsupers ++;	p  = (int) OWNER( globToLoc[j] );	gb = temp[ptrToRecv[p]];	szsn = 1;      }      else {	szsn ++;      }      ptrToRecv[p] ++;      supno_n[j] = nsupers;    }    nsupers++;    if (nprocs > 1) {      SUPERLU_FREE (temp);      memAux -= (float) (n+1) * iword;    }    supno_n[n] = nsupers;  }  /* reset to 0 nnzToSend */  for (p = 0; p < 2 *nprocs; p++)    nnzToSend[p] = 0;    MPI_Bcast (supno_n, n+1, mpi_int_t, 0, grid->comm);  nsupers = supno_n[n];  /* Allocate space for storing Glu_persist_n. */  if ( !(xsup_n = intMalloc_dist(nsupers+1)) ) {    fprintf (stderr, "Malloc fails for xsup_n[].");    return (memAux + memRet);  }  memRet += (float) (nsupers+1) * iword;    /* ------------------------------------------------------------     COUNT THE NUMBER OF NONZEROS TO BE SENT TO EACH PROCESS,     THEN ALLOCATE SPACE.     THIS ACCOUNTS FOR THE FIRST PASS OF L and U.     ------------------------------------------------------------*/  gb = EMPTY;  for (i = 0; i < n; i++) {    if (gb != supno_n[i]) {      /* a new supernode starts */      gb = supno_n[i];      xsup_n[gb] = i;    }  }  xsup_n[nsupers] = n;    for (p = 0; p < nprocs; p++) {    send_1[p] = FALSE;    send_2[p] = FALSE;  }  for (gb_n = 0; gb_n < nsupers; gb_n ++) {    i = xsup_n[gb_n];    if (iam == (int) OWNER( globToLoc[i] )) {      pc = PCOL( gb_n, grid );      pr = PROW( gb_n, grid );      p_diag = PNUM( pr, pc, grid);            i_loc = LOCAL_IND( globToLoc[i] );      gb_s  = supno_s[i_loc];      fst_s = xsup_beg_s[gb_s];      lst_s = xsup_end_s[gb_s];      fst_s_l = LOCAL_IND( globToLoc[fst_s] );      for (j = xlsub_s[fst_s_l]; j < xlsub_s[fst_s_l+1]; j++) {	k = lsub_s[j];	if (k >= i) {	  gb = supno_n[k];	  p = (int) PNUM( PROW(gb, grid), pc, grid );	  nnzToSend[2*p] ++;	  send_1[p] = TRUE;	}      }      for (j = xusub_s[fst_s_l]; j < xusub_s[fst_s_l+1]; j++) {	k = usub_s[j];	if (k >= i + xsup_n[gb_n+1] - xsup_n[gb_n]) {	  gb = supno_n[k];	  p = PNUM( pr, PCOL(gb, grid), grid);	  nnzToSend[2*p+1] ++;		  send_2[p] = TRUE;	}      }            nsend_2 = 0;      for (p = pr * grid->npcol; p < (pr + 1) * grid->npcol; p++) {	nnzToSend[2*p+1] += 2;	if (send_2[p])  nsend_2 ++;	        }      for (p = pr * grid->npcol; p < (pr + 1) * grid->npcol; p++) 	if (send_2[p] || p == p_diag) {	  if (p == p_diag && !send_2[p])	    nnzToSend[2*p+1] += nsend_2;	  else	    nnzToSend[2*p+1] += nsend_2-1;	  send_2[p] = FALSE;	}      nsend_1 = 0;      for (p = pc; p < nprocs; p += grid->npcol) {	nnzToSend[2*p] += 2;	if (send_1[p]) nsend_1 ++;      }      for (p = pc; p < nprocs; p += grid->npcol) 	if (send_1[p]) {	  nnzToSend[2*p] += nsend_1-1;	  send_1[p] = FALSE;	}	else	  nnzToSend[2*p] += nsend_1;    }  }    /* All-to-all communication */  MPI_Alltoall( nnzToSend, 2, mpi_int_t, nnzToRecv, 2, mpi_int_t,		grid->comm);    nnz_loc_l = nnz_loc_u = 0;  SendCnt_l = SendCnt_u = RecvCnt_l = RecvCnt_u = 0;    for (p = 0; p < nprocs; p++) {    if ( p != iam ) {      SendCnt_l += nnzToSend[2*p];   nnzToSend_l[p] = nnzToSend[2*p];      SendCnt_u += nnzToSend[2*p+1]; nnzToSend_u[p] = nnzToSend[2*p+1];       RecvCnt_l += nnzToRecv[2*p];   nnzToRecv_l[p] = nnzToRecv[2*p];      RecvCnt_u += nnzToRecv[2*p+1]; nnzToRecv_u[p] = nnzToRecv[2*p+1];    } else {      nnz_loc_l += nnzToRecv[2*p];      nnz_loc_u += nnzToRecv[2*p+1];      nnzToSend_l[p] = 0; nnzToSend_u[p] = 0;      nnzToRecv_l[p] = nnzToRecv[2*p];       nnzToRecv_u[p] = nnzToRecv[2*p+1];    }  }    /* Allocate space for storing the symbolic structure after redistribution. */  nsupers_i = CEILING( nsupers, grid->nprow ); /* Number of local block rows */  nsupers_j = CEILING( nsupers, grid->npcol ); /* Number of local block columns */  if ( !(xlsub_n = intCalloc_dist(nsupers_j+1)) ) {    fprintf (stderr, "Malloc fails for xlsub_n[].");    return (memAux + memRet);  }  memRet += (float) (nsupers_j+1) * iword;  if ( !(xusub_n = intCalloc_dist(nsupers_i+1)) ) {    fprintf (stderr, "Malloc fails for xusub_n[].");    return (memAux + memRet);  }  memRet += (float) (nsupers_i+1) * iword;    /* Allocate temp storage for sending/receiving the L/U symbolic structure. */  if ( (RecvCnt_l + nnz_loc_l) || (RecvCnt_u + nnz_loc_u) ) {    if (!(rcv_luind = 	  intMalloc_dist(SUPERLU_MAX(RecvCnt_l+nnz_loc_l, RecvCnt_u+nnz_loc_u))) ) {      fprintf (stderr, "Malloc fails for rcv_luind[].");      return (memAux + memRet);    }    memAux += (float) SUPERLU_MAX(RecvCnt_l+nnz_loc_l, RecvCnt_u+nnz_loc_u)       * iword;  }  if ( nprocs > 1 && (SendCnt_l || SendCnt_u) ) {    if (!(snd_luind = intMalloc_dist(SUPERLU_MAX(SendCnt_l, SendCnt_u))) ) {      fprintf (stderr, "Malloc fails for index[].");      return (memAux + memRet);    }    memAux += (float) SUPERLU_MAX(SendCnt_l, SendCnt_u) * iword;  }     /* ------------------------------------------------------------------     LOAD THE SYMBOLIC STRUCTURE OF L AND U INTO THE STRUCTURES TO SEND.     THIS ACCOUNTS FOR THE SECOND PASS OF L and U.     ------------------------------------------------------------------*/  sendL = TRUE;  sendU = FALSE;  while (sendL || sendU) {    if (sendL) {      xsub_s = xlsub_s; sub_s = lsub_s; xsub_n = xlsub_n;      nnzToSend = nnzToSend_l; nnzToRecv = nnzToRecv_l;    }
pzsymbfact_distdata.c - 源码说明

本页面展示了「SuperLU 2.2版本。对大型、稀疏、非对称的线性系统的直接求解」中的 pzsymbfact_distdata.c 源码文件，采用 C语言编程语言编写，共 1,943 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与SuperLU相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?