⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pdsymbfact_distdata.c.old

📁 SuperLU 2.2版本。对大型、稀疏、非对称的线性系统的直接求解
💻 OLD
📖 第 1 页 / 共 5 页
字号:
      xsub_s = xusub_s; sub_s = usub_s; xsub_n = xusub_n;      nnzToSend = nnzToSend_u; nnzToRecv = nnzToRecv_u;    }    for (i = 0, j = 0, p = 0; p < nprocs; p++) {      if ( p != iam ) {	ptrToSend[p] = i;  i += nnzToSend[p];      }      ptrToRecv[p] = j;  j += nnzToRecv[p];    }    nnzToRecv[iam] = 0;        ind_loc = ptrToRecv[iam];    for (gb_n = 0; gb_n < nsupers; gb_n++) {      nsend_2 = 0;          i = xsup_n[gb_n];      if (iam == OWNER( globToLoc[i] )) {	pc = PCOL( gb_n, grid );	pr = PROW( gb_n, grid );	p_diag = PNUM( pr, pc, grid );		i_loc = LOCAL_IND( globToLoc[i] );	gb_s  = supno_s[i_loc];	fst_s = xsup_beg_s[gb_s];	lst_s = xsup_end_s[gb_s];	fst_s_l = LOCAL_IND( globToLoc[fst_s] );	if (sendL) {	  p = pc;                np = grid->nprow;	  	} else {	  p = pr * grid->npcol;  np = grid->npcol;	}	for (j = 0; j < np; j++) {	  if (p == iam) {	    rcv_luind[ind_loc] = gb_n;	    rcv_luind[ind_loc+1] = 0;	    tmp_ptrToSend[p] = ind_loc + 1;	    ind_loc += 2;	 	  }	  else {	    snd_luind[ptrToSend[p]] = gb_n;	    snd_luind[ptrToSend[p]+1] = 0;	    tmp_ptrToSend[p] = ptrToSend[p] + 1;	    ptrToSend[p] += 2;	 	  }	  if (sendL) p += grid->npcol;	  if (sendU) p++;	}	for (j = xsub_s[fst_s_l]; j < xsub_s[fst_s_l+1]; j++) {	  k = sub_s[j];	  if ((sendL && k >= i) || (sendU && k >= i + xsup_n[gb_n+1] - xsup_n[gb_n])) {	    gb = supno_n[k];	    if (sendL)	      p = PNUM( PROW(gb, grid), pc, grid );	    else 	      p = PNUM( pr, PCOL(gb, grid), grid);	    if (send_1[p] == FALSE) {	      send_1[p] = TRUE;	      send_2[nsend_2] = k; nsend_2 ++;	    }	    if (p == iam) {	      rcv_luind[ind_loc] = k;  ind_loc++;	      if (sendL)		xsub_n[LBj( gb_n, grid )] ++;	      else		xsub_n[LBi( gb_n, grid )] ++;	    }	    else {	      snd_luind[ptrToSend[p]] = k;	      ptrToSend[p] ++; snd_luind[tmp_ptrToSend[p]] ++;	    }	  }	}	if (sendL)	  for (p = pc; p < nprocs; p += grid->npcol) {	      for (k = 0; k < nsend_2; k++) {		gb = supno_n[send_2[k]];		if (PNUM(PROW(gb, grid), pc, grid) != p) {		  if (p == iam) {		    rcv_luind[ind_loc] = send_2[k];  ind_loc++;		    xsub_n[LBj( gb_n, grid )] ++;		  }		  else {		    snd_luind[ptrToSend[p]] = send_2[k];		    ptrToSend[p] ++; snd_luind[tmp_ptrToSend[p]] ++;		  }		}	      }	      send_1[p] = FALSE;	  }  	if (sendU)	  for (p = pr * grid->npcol; p < (pr + 1) * grid->npcol; p++) {	    if (send_1[p] || p == p_diag) {	      	      for (k = 0; k < nsend_2; k++) {		gb = supno_n[send_2[k]];		if(PNUM( pr, PCOL(gb, grid), grid) != p) {		  if (p == iam) {		    rcv_luind[ind_loc] = send_2[k];  ind_loc++;		    xsub_n[LBi( gb_n, grid )] ++;		  }		  else {		    snd_luind[ptrToSend[p]] = send_2[k];		    ptrToSend[p] ++; snd_luind[tmp_ptrToSend[p]] ++;		  }	     		}	      } 	      send_1[p] = FALSE;	    }	  }      }    }        /* reset ptrToSnd to point to the beginning of the data for       each processor (structure needed in MPI_Alltoallv) */    for (i = 0, p = 0; p < nprocs; p++) {      ptrToSend[p] = i;  i += nnzToSend[p];    }    /* ------------------------------------------------------------       PERFORM REDISTRIBUTION. THIS INVOLVES ALL-TO-ALL COMMUNICATION.       Note: it uses MPI_Alltoallv.       ------------------------------------------------------------*/    if (nprocs > 1) {#if defined (_LONGINT)      for (p=0; p<nprocs; p++) {	if (nnzToSend[p] > INT_MAX || ptrToSend[p] > INT_MAX ||	    nnzToRecv[p] > INT_MAX || ptrToRecv[p] > INT_MAX)	  ABORT("ERROR in dist_symbLU size to send > INT_MAX\n");	intBuf1[p] = (int) nnzToSend[p];	intBuf2[p] = (int) ptrToSend[p];	intBuf3[p] = (int) nnzToRecv[p];	intBuf4[p] = (int) ptrToRecv[p];      }#else  /* Default */      intBuf1 = nnzToSend;  intBuf2 = ptrToSend;      intBuf3 = nnzToRecv;  intBuf4 = ptrToRecv;#endif      MPI_Alltoallv (snd_luind, intBuf1, intBuf2, mpi_int_t, 		     rcv_luind, intBuf3, intBuf4, mpi_int_t,		     grid->comm);    }    if (sendL)      nnzToRecv[iam] = nnz_loc_l;    else       nnzToRecv[iam] = nnz_loc_u;        /* ------------------------------------------------------------       DEALLOCATE TEMPORARY STORAGE.       -------------------------------------------------------------*/    if (sendU)       if ( nprocs > 1 && (SendCnt_l || SendCnt_u) ) {	SUPERLU_FREE (snd_luind);	memAux -= (float) SUPERLU_MAX(SendCnt_l, SendCnt_u) * iword;      }        /* ------------------------------------------------------------       CONVERT THE FORMAT.       ------------------------------------------------------------*/    /* Initialize the array of column of L/ row of U pointers */    k = 0;    for (p = 0; p < nprocs; p ++) {      if (p != iam) {	i = k;	while (i < k + nnzToRecv[p]) {	  gb = rcv_luind[i];	  nelts = rcv_luind[i+1];	  if (sendL)	    xsub_n[LBj( gb, grid )] = nelts;	  else	    xsub_n[LBi( gb, grid )] = nelts;	  i += nelts + 2;	}      }      k += nnzToRecv[p];    }    if (sendL) j = nsupers_j;    else j = nsupers_i;    k = 0;     isize = xsub_n[0];    xsub_n[0] = 0;     for (gb_l = 1; gb_l < j; gb_l++) {      k += isize;      isize = xsub_n[gb_l];      xsub_n[gb_l] = k;    }    xsub_n[gb_l] = k + isize;    nnz_loc = xsub_n[gb_l];    if (sendL) {      lsub_n = NULL;      if (nnz_loc) {	if ( !(lsub_n = intMalloc_dist(nnz_loc)) ) {	  fprintf (stderr, "Malloc fails for lsub_n[].");	  return (memAux + memRet);	}	memRet += (float) (nnz_loc * iword);      }      sub_n = lsub_n;    }    if (sendU) {      usub_n = NULL;      if (nnz_loc) {	if ( !(usub_n = intMalloc_dist(nnz_loc)) ) {	  fprintf (stderr, "Malloc fails for usub_n[].");	  return (memAux + memRet);	}	memRet += (float) (nnz_loc * iword);      }      sub_n = usub_n;    }        /* Copy the data into the L column / U row oriented storage */    k = 0;    for (p = 0; p < nprocs; p++) {      i = k;      while (i < k + nnzToRecv[p]) {	gb = rcv_luind[i];	if (gb >= nsupers)	  printf ("Pe[%d] p %d gb %d nsupers %d i %d i-k %d\n",		  iam, p, gb, nsupers, i, i-k);	i += 2;	if (sendL) gb_l = LBj( gb, grid );	if (sendU) gb_l = LBi( gb, grid );	for (j = xsub_n[gb_l]; j < xsub_n[gb_l+1]; i++, j++) {	  sub_n[j] = rcv_luind[i];	}      }            k += nnzToRecv[p];    }    if (sendL) {      sendL = FALSE;  sendU = TRUE;    }    else      sendU = FALSE;  }  /* deallocate memory allocated during symbolic factorization routine */  if (rcv_luind != NULL) {    SUPERLU_FREE (rcv_luind);    memAux -= (float) SUPERLU_MAX(RecvCnt_l+nnz_loc_l, RecvCnt_u+nnz_loc_u) * iword;  }  SUPERLU_FREE (mem);    memAux -= (float) (12 * nprocs * iword);  SUPERLU_FREE(nvtcs);  memAux -= (float) (5 * nprocs * sizeof(int));    if (xlsub_s != NULL) {    SUPERLU_FREE (xlsub_s); SUPERLU_FREE (lsub_s);  }  if (xusub_s != NULL) {    SUPERLU_FREE (xusub_s); SUPERLU_FREE (usub_s);  }  SUPERLU_FREE (globToLoc);   if (supno_s != NULL) {    SUPERLU_FREE (xsup_beg_s); SUPERLU_FREE (xsup_end_s);    SUPERLU_FREE (supno_s);  }    Glu_persist->supno = supno_n;  Glu_persist->xsup  = xsup_n;  *p_xlsub = xlsub_n; *p_lsub = lsub_n;  *p_xusub = xusub_n; *p_usub = usub_n;#if ( DEBUGlevel>=1 )  CHECK_MALLOC(iam, "Exit dist_symbLU()");#endif    return (-memRet);}  static floatddist_A(SuperMatrix *A, ScalePermstruct_t *ScalePermstruct,	Glu_persist_t *Glu_persist, gridinfo_t *grid, 	int_t **p_ainf_colptr, int_t **p_ainf_rowind, double **p_ainf_val,	int_t **p_asup_rowptr, int_t **p_asup_colind, double **p_asup_val,	int_t *ilsum_i, int_t *ilsum_j	){/* * * Purpose * ======= *   Re-distribute A on the 2D process mesh.  The lower part is *   stored using a column format and the upper part *   is stored using a row format. *  * Arguments * ========= *  * A      (Input) SuperMatrix* *	  The distributed input matrix A of dimension (A->nrow, A->ncol). *        The type of A can be: Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE. * * ScalePermstruct (Input) ScalePermstruct_t* *        The data structure to store the scaling and permutation vectors *        describing the transformations performed to the original matrix A. * * Glu_persist  (Input) Glu_persist_t * *        Information on supernodes mapping. *  * grid   (Input) gridinfo_t* *        The 2D process mesh. * * p_ainf_colptr (Output) int_t** *         Pointer to the lower part of A distributed on a 2D grid  *         of processors, stored by columns. * * p_ainf_rowind (Output) int_t** *         Structure of of the lower part of A distributed on a  *         2D grid of processors, stored by columns. * * p_ainf_val    (Output) double** *         Numerical values of the lower part of A, distributed on a  *         2D grid of processors, stored by columns. * * p_asup_rowptr (Output) int_t** *         Pointer to the upper part of A distributed on a 2D grid  *         of processors, stored by rows. * * p_asup_colind (Output) int_t** *         Structure of of the upper part of A distributed on a  *         2D grid of processors, stored by rows. * * p_asup_val    (Output) double** *         Numerical values of the upper part of A, distributed on a  *         2D grid of processors, stored by rows. * * ilsum_i  (Input) int_t * *       Starting position of each supernode in  *       the full array (local, block row wise). * * ilsum_j  (Input) int_t * *       Starting position of each supernode in  *       the full array (local, block column wise). * * Return value * ============ *   < 0, number of bytes allocated on return from the dist_symbLU *   > 0, number of bytes allocated when out of memory. *        (an approximation). * */  int    iam, p, procs;  NRformat_loc *Astore;  int_t  *perm_r; /* row permutation vector */  int_t  *perm_c; /* column permutation vector */  int_t  i, it, irow, fst_row, j, jcol, k, gbi, gbj, n, m_loc, jsize, isize;  int_t  nsupers, nsupers_i, nsupers_j;  int_t  nnz_loc, nnz_loc_ainf, nnz_loc_asup;    /* number of local nonzeros */  int_t  nnz_remote; /* number of remote nonzeros to be sent */  int_t  SendCnt; /* number of remote nonzeros to be sent */  int_t  RecvCnt; /* number of remote nonzeros to be sent */  int_t *ainf_colptr, *ainf_rowind, *asup_rowptr, *asup_colind;  double *asup_val, *ainf_val;  int_t  *nnzToSend, *nnzToRecv, maxnnzToRecv;  int_t  *ia, *ja, **ia_send, *index, *itemp;  int_t  *ptr_to_send;  double *aij, **aij_send, *nzval, *dtemp;  double *nzval_a;  MPI_Request *send_req;  MPI_Status  status;  int_t *xsup = Glu_persist->xsup;    /* supernode and column mapping */  int_t *supno = Glu_persist->supno;     float memAux;  /* Memory used during this routine and freed on return */  float memRet; /* Memory allocated and not freed on return */  int_t iword, dword, szbuf;  /* ------------------------------------------------------------     INITIALIZATION.     ------------------------------------------------------------*/  iam = grid->iam;#if ( DEBUGlevel>=1 )  CHECK_MALLOC(iam, "Enter ddist_A()");#endif  iword = sizeof(int_t);  dword = sizeof(double);    perm_r = ScalePermstruct->perm_r;  perm_c = ScalePermstruct->perm_c;  procs = grid->nprow * grid->npcol;  Astore = (NRformat_loc *) A->Store;  n = A->ncol;  m_loc = Astore->m_loc;  fst_row = Astore->fst_row;  if (!(nnzToRecv = intCalloc_dist(2*procs))) {    fprintf (stderr, "Malloc fails for nnzToRecv[].");    return (ERROR_RET);  }  memAux = (float) (2 * procs * iword);  memRet = 0.;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -