⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pzsymbfact_distdata.c

📁 SuperLU 2.2版本。对大型、稀疏、非对称的线性系统的直接求解
💻 C
📖 第 1 页 / 共 5 页
字号:
 */{  Glu_persist_t *Glu_persist = LUstruct->Glu_persist;  Glu_freeable_t Glu_freeable_n;  LocalLU_t *Llu = LUstruct->Llu;  int_t bnnz, fsupc, i, irow, istart, j, jb, jj, k,     len, len1, nsupc, nsupc_gb, ii, nprocs;  int_t ljb;  /* local block column number */  int_t nrbl; /* number of L blocks in current block column */  int_t nrbu; /* number of U blocks in current block column */  int_t gb;   /* global block number; 0 < gb <= nsuper */  int_t lb;   /* local block number; 0 < lb <= ceil(NSUPERS/Pr) */  int iam, jbrow, jbcol, jcol, kcol, mycol, myrow, pc, pr, ljb_i, ljb_j, p;  int_t mybufmax[NBUFFERS];  NRformat_loc *Astore;  doublecomplex *a;  int_t *asub, *xa;  int_t *ainf_colptr, *ainf_rowind, *asup_rowptr, *asup_colind;  doublecomplex *asup_val, *ainf_val;  int_t *xsup, *supno;    /* supernode and column mapping */  int_t *lsub, *xlsub, *usub, *xusub;  int_t nsupers, nsupers_i, nsupers_j, nsupers_ij;  int_t next_ind;      /* next available position in index[*] */  int_t next_val;      /* next available position in nzval[*] */  int_t *index;         /* indices consist of headers and row subscripts */  doublecomplex *lusup, *uval; /* nonzero values in L and U */  int_t *recvBuf;  int *ptrToRecv, *nnzToRecv, *ptrToSend, *nnzToSend;  doublecomplex **Lnzval_bc_ptr;  /* size ceil(NSUPERS/Pc) */  int_t  **Lrowind_bc_ptr; /* size ceil(NSUPERS/Pc) */  doublecomplex **Unzval_br_ptr;  /* size ceil(NSUPERS/Pr) */  int_t  **Ufstnz_br_ptr;  /* size ceil(NSUPERS/Pr) */    /*-- Counts to be used in factorization. --*/  int_t  *ToRecv, *ToSendD, **ToSendR;    /*-- Counts to be used in lower triangular solve. --*/  int_t  *fmod;          /* Modification count for L-solve.        */  int_t  **fsendx_plist; /* Column process list to send down Xk.   */  int_t  nfrecvx = 0;    /* Number of Xk I will receive.           */  int_t  nfsendx = 0;    /* Number of Xk I will send               */  int_t  kseen;    /*-- Counts to be used in upper triangular solve. --*/  int_t  *bmod;          /* Modification count for U-solve.        */  int_t  **bsendx_plist; /* Column process list to send down Xk.   */  int_t  nbrecvx = 0;    /* Number of Xk I will receive.           */  int_t  nbsendx = 0;    /* Number of Xk I will send               */    int_t  *ilsum;         /* starting position of each supernode in 			    the full array (local)                 */    int_t  *ilsum_j, ldaspa_j; /* starting position of each supernode in 				the full array (local, block column wise) */    /*-- Auxiliary arrays; freed on return --*/  int_t *Urb_marker;  /* block hit marker; size ceil(NSUPERS/Pr)           */  int_t *LUb_length; /* L,U block length; size nsupers_ij */  int_t *LUb_indptr; /* pointers to L,U index[]; size nsupers_ij */  int_t *LUb_number; /* global block number; size nsupers_ij */  int_t *LUb_valptr; /* pointers to U nzval[]; size ceil(NSUPERS/Pc)      */  int_t *Lrb_marker;  /* block hit marker; size ceil(NSUPERS/Pr)           */  doublecomplex *dense, *dense_col; /* SPA */  doublecomplex zero = {0.0, 0.0};  int_t ldaspa;     /* LDA of SPA */  int_t iword, dword;  float memStrLU, memA,    memDist = 0.; /* memory used for redistributing the data, which does		   not include the memory for the numerical values of L and U */  float  memNLU = 0.; /* memory allocated for storing the numerical values of 		    L and U, that will be used in the numeric factorization */#if ( PRNTlevel>=1 )  int_t nLblocks = 0, nUblocks = 0;#endif    /* Initialization. */  iam = grid->iam;#if ( DEBUGlevel>=1 )  CHECK_MALLOC(iam, "Enter dist_psymbtonum()");#endif  myrow = MYROW( iam, grid );  mycol = MYCOL( iam, grid );  nprocs = grid->npcol * grid->nprow;  for (i = 0; i < NBUFFERS; ++i) mybufmax[i] = 0;  Astore   = (NRformat_loc *) A->Store;    iword = sizeof(int_t);  dword = sizeof(doublecomplex);  if (fact == SamePattern_SameRowPerm) {    ABORT ("ERROR: call of dist_psymbtonum with fact equals SamePattern_SameRowPerm.");    }  if ((memStrLU =        dist_symbLU (n, Pslu_freeable, 		    Glu_persist, &xlsub, &lsub, &xusub, &usub,	grid)) > 0)    return (memStrLU);  memDist += (-memStrLU);  xsup  = Glu_persist->xsup;    /* supernode and column mapping */  supno = Glu_persist->supno;     nsupers  = supno[n-1] + 1;  nsupers_i = CEILING( nsupers, grid->nprow );/* No of local row blocks */  nsupers_j = CEILING( nsupers, grid->npcol );/* No of local column blocks */  nsupers_ij = SUPERLU_MAX(nsupers_i, nsupers_j);  if ( !(ilsum = intMalloc_dist(nsupers_i+1)) ) {    fprintf (stderr, "Malloc fails for ilsum[].");      return (memDist + memNLU);  }  memNLU += (nsupers_i+1) * iword;  if ( !(ilsum_j = intMalloc_dist(nsupers_j+1)) ) {    fprintf (stderr, "Malloc fails for ilsum_j[].");    return (memDist + memNLU);  }  memDist += (nsupers_j+1) * iword;  /* Compute ldaspa and ilsum[], ldaspa_j and ilsum_j[]. */  ilsum[0] = 0;  ldaspa = 0;  for (gb = 0; gb < nsupers; gb++)     if ( myrow == PROW( gb, grid ) ) {      i = SuperSize( gb );      ldaspa += i;      lb = LBi( gb, grid );      ilsum[lb + 1] = ilsum[lb] + i;    }  ilsum[nsupers_i] = ldaspa;  ldaspa_j = 0; ilsum_j[0] = 0;    for (gb = 0; gb < nsupers; gb++)     if (mycol == PCOL( gb, grid )) {      i = SuperSize( gb );      ldaspa_j += i;      lb = LBj( gb, grid );      ilsum_j[lb + 1] = ilsum_j[lb] + i;    }  ilsum_j[nsupers_j] = ldaspa_j;    if ((memA = zdist_A(A, ScalePermstruct, Glu_persist,		      grid, &ainf_colptr, &ainf_rowind, &ainf_val,		      &asup_rowptr, &asup_colind, &asup_val,		      ilsum, ilsum_j)) > 0)    return (memDist + memA + memNLU);  memDist += (-memA);  /* ------------------------------------------------------------     FIRST TIME CREATING THE L AND U DATA STRUCTURES.     ------------------------------------------------------------*/    /* We first need to set up the L and U data structures and then   * propagate the values of A into them.   */  if ( !(ToRecv = intCalloc_dist(nsupers)) ) {    fprintf(stderr, "Calloc fails for ToRecv[].");    return (memDist + memNLU);  }  memNLU += nsupers * iword;    k = CEILING( nsupers, grid->npcol ); /* Number of local column blocks */  if ( !(ToSendR = (int_t **) SUPERLU_MALLOC(k*sizeof(int_t*))) ) {    fprintf(stderr, "Malloc fails for ToSendR[].");    return (memDist + memNLU);  }  memNLU += k*sizeof(int_t*);  j = k * grid->npcol;  if ( !(index = intMalloc_dist(j)) ) {    fprintf(stderr, "Malloc fails for index[].");    return (memDist + memNLU);  }  memNLU += j*iword;    for (i = 0; i < j; ++i) index[i] = EMPTY;  for (i = 0,j = 0; i < k; ++i, j += grid->npcol) ToSendR[i] = &index[j];    /* Auxiliary arrays used to set up L and U block data structures.     They are freed on return. */  if ( !(LUb_length = intCalloc_dist(nsupers_ij)) ) {    fprintf(stderr, "Calloc fails for LUb_length[].");    return (memDist + memNLU);  }  if ( !(LUb_indptr = intMalloc_dist(nsupers_ij)) ) {    fprintf(stderr, "Malloc fails for LUb_indptr[].");    return (memDist + memNLU);  }  if ( !(LUb_number = intCalloc_dist(nsupers_ij)) ) {    fprintf(stderr, "Calloc fails for LUb_number[].");    return (memDist + memNLU);  }      if ( !(LUb_valptr = intCalloc_dist(nsupers_ij)) ) {    fprintf(stderr, "Calloc fails for LUb_valptr[].");    return (memDist + memNLU);  }  memDist += 4 * nsupers_ij * iword;    k = CEILING( nsupers, grid->nprow );   /* Pointers to the beginning of each block row of U. */  if ( !(Unzval_br_ptr = 	 (doublecomplex**)SUPERLU_MALLOC(nsupers_i * sizeof(doublecomplex*))) ) {    fprintf(stderr, "Malloc fails for Unzval_br_ptr[].");    return (memDist + memNLU);  }  if ( !(Ufstnz_br_ptr = (int_t**)SUPERLU_MALLOC(nsupers_i * sizeof(int_t*))) ) {    fprintf(stderr, "Malloc fails for Ufstnz_br_ptr[].");    return (memDist + memNLU);  }  memNLU += nsupers_i*sizeof(doublecomplex*) + nsupers_i*sizeof(int_t*);  Unzval_br_ptr[nsupers_i-1] = NULL;  Ufstnz_br_ptr[nsupers_i-1] = NULL;  if ( !(ToSendD = intCalloc_dist(nsupers_i)) ) {    fprintf(stderr, "Malloc fails for ToSendD[].");    return (memDist + memNLU);  }  memNLU += nsupers_i*iword;    if ( !(Urb_marker = intCalloc_dist(nsupers_j))) {    fprintf(stderr, "Calloc fails for rb_marker[].");    return (memDist + memNLU);  }  if ( !(Lrb_marker = intCalloc_dist( nsupers_i ))) {    fprintf(stderr, "Calloc fails for rb_marker[].");    return (memDist + memNLU);  }  memDist += (nsupers_i + nsupers_j)*iword;    /* Auxiliary arrays used to set up L, U block data structures.     They are freed on return.     k is the number of local row blocks.   */  if ( !(dense = doublecomplexCalloc_dist(SUPERLU_MAX(ldaspa, ldaspa_j) 				   * sp_ienv_dist(3))) ) {    fprintf(stderr, "Calloc fails for SPA dense[].");    return (memDist + memNLU);  }  /* These counts will be used for triangular solves. */  if ( !(fmod = intCalloc_dist(nsupers_i)) ) {    fprintf(stderr, "Calloc fails for fmod[].");    return (memDist + memNLU);  }  if ( !(bmod = intCalloc_dist(nsupers_i)) ) {    fprintf(stderr, "Calloc fails for bmod[].");    return (memDist + memNLU);  }  /* ------------------------------------------------ */  memNLU += 2*nsupers_i*iword +     SUPERLU_MAX(ldaspa, ldaspa_j)*sp_ienv_dist(3)*dword;     /* Pointers to the beginning of each block column of L. */  if ( !(Lnzval_bc_ptr = 	 (doublecomplex**)SUPERLU_MALLOC(nsupers_j * sizeof(doublecomplex*))) ) {    fprintf(stderr, "Malloc fails for Lnzval_bc_ptr[].");    return (memDist + memNLU);  }  if ( !(Lrowind_bc_ptr = (int_t**)SUPERLU_MALLOC(nsupers_j * sizeof(int_t*))) ) {    fprintf(stderr, "Malloc fails for Lrowind_bc_ptr[].");    return (memDist + memNLU);  }  memNLU += nsupers_j * sizeof(doublecomplex*) + nsupers_j * sizeof(int_t*);  Lnzval_bc_ptr[nsupers_j-1] = NULL;  Lrowind_bc_ptr[nsupers_j-1] = NULL;    /* These lists of processes will be used for triangular solves. */  if ( !(fsendx_plist = (int_t **) SUPERLU_MALLOC(nsupers_j*sizeof(int_t*))) ) {    fprintf(stderr, "Malloc fails for fsendx_plist[].");    return (memDist + memNLU);  }  len = nsupers_j * grid->nprow;  if ( !(index = intMalloc_dist(len)) ) {    fprintf(stderr, "Malloc fails for fsendx_plist[0]");    return (memDist + memNLU);  }  for (i = 0; i < len; ++i) index[i] = EMPTY;  for (i = 0, j = 0; i < nsupers_j; ++i, j += grid->nprow)    fsendx_plist[i] = &index[j];  if ( !(bsendx_plist = (int_t **) SUPERLU_MALLOC(nsupers_j*sizeof(int_t*))) ) {    fprintf(stderr, "Malloc fails for bsendx_plist[].");    return (memDist + memNLU);  }  if ( !(index = intMalloc_dist(len)) ) {    fprintf(stderr, "Malloc fails for bsendx_plist[0]");    return (memDist + memNLU);  }  for (i = 0; i < len; ++i) index[i] = EMPTY;  for (i = 0, j = 0; i < nsupers_j; ++i, j += grid->nprow)    bsendx_plist[i] = &index[j];  /* -------------------------------------------------------------- */  memNLU += 2*nsupers_j*sizeof(int_t*) + 2*len*iword;    /*------------------------------------------------------------    PROPAGATE ROW SUBSCRIPTS AND VALUES OF A INTO L AND U BLOCKS.    THIS ACCOUNTS FOR ONE-PASS PROCESSING OF A, L AND U.    ------------------------------------------------------------*/  for (jb = 0; jb < nsupers; jb++) {    jbcol = PCOL( jb, grid );    jbrow = PROW( jb, grid );    ljb_j = LBj( jb, grid ); /* Local block number column wise */    ljb_i = LBi( jb, grid);  /* Local block number row wise */    fsupc = FstBlockC( jb );    nsupc = SuperSize( jb );        if ( myrow == jbrow ) { /* Block row jb in my process row */      /* Scatter A into SPA. */      for (j = ilsum[ljb_i], dense_col = dense; j < ilsum[ljb_i]+nsupc; j++) {	for (i = asup_rowptr[j]; i < asup_rowptr[j+1]; i++) {	  if (i >= asup_rowptr[ilsum[nsupers_i]]) 	    printf ("ERR7\n");	  jcol = asup_colind[i];	  if (jcol >= n)	    printf ("Pe[%d] ERR distsn jb %d gb %d j %d jcol %d\n",		    iam, jb, gb, j, jcol);	  gb = BlockNum( jcol );	  lb = LBj( gb, grid );	  if (gb >= nsupers || lb >= nsupers_j) printf ("ERR8\n");	  jcol = ilsum_j[lb] + jcol - FstBlockC( gb );	  if (jcol >= ldaspa_j)	    printf ("Pe[%d] ERR1 jb %d gb %d j %d jcol %d\n",		    iam, jb, gb, j, jcol);	  dense_col[jcol] = asup_val[i];	}	dense_col += ldaspa_j;      }            /*------------------------------------------------       * SET UP U BLOCKS.       *------------------------------------------------*/      /* Count number of blocks and length of each block. */      nrbu = 0;      len = 0; /* Number of column subscripts I own. */      len1 = 0; /* number of fstnz subscripts */      for (i = xusub[ljb_i]; i < xusub[ljb_i+1]; i++) {	if (i >= xusub[nsupers_i]) printf ("ERR10\n");	jcol = usub[i];	gb = BlockNum( jcol ); /* Global block number */		/*if (fsupc <= 146445 && 146445 < fsupc + nsupc && jcol == 397986)	  printf ("Pe[%d] [%d %d] elt [%d] jbcol %d pc %d\n",	  iam, jb, gb, jcol, jbcol, pc); */		lb = LBj( gb, grid );  /* Local block number */	pc = PCOL( gb, grid ); /* Process col owning this block */	if (mycol == jbcol) ToSendR[ljb_j][pc] = YES;	/* if (mycol == jbcol && mycol != pc) ToSendR[ljb_j][pc] = YES; */	pr = PROW( gb, grid );	if ( pr != jbrow  && mycol == pc)	  bsendx_plist[lb][jbrow] = YES; 	if (mycol == pc) {	  len += nsupc;	  LUb_length[lb] += nsupc;	  ToSendD[ljb_i] = YES;	  if (Urb_marker[lb] <= jb) { /* First see this block */	    if (Urb_marker[lb] == FALSE && gb != jb && myrow != pr) nbrecvx ++;	    Urb_marker[lb] = jb + 1;	    LUb_number[nrbu] = gb;	    /* if (gb == 391825 && jb == 145361)	       printf ("Pe[%d] T1 [%d %d] nrbu %d \n",	       iam, jb, gb, nrbu); */	    nrbu ++;	    len1 += SuperSize( gb );	    if ( gb != jb )/* Exclude diagonal block. */	      ++bmod[ljb_i];/* Mod. count for back solve */#if ( PRNTlevel>=1 )	    ++nUblocks;#endif	  }	}      } /* for i ... */            if ( nrbu ) { 	/* Sort the blocks of U in increasing block column index.	   SuperLU_DIST assumes this is true */	/* simple insert sort algorithm */	/* to be transformed in quick sort */	for (j = 1; j < nrbu; j++) {	  k = LUb_number[j];	  for (i=j-1; i>=0 && LUb_number[i] > k; i--) {	    LUb_number[i+1] = LUb_number[i];	  }	  LUb_number[i+1] = k;	} 		/* Set up the initial pointers for each block in	   index[] and nzval[]. */	/* Add room for descriptors */	len1 += BR_HEADER + nrbu * UB_DESCRIPTOR;	if ( !(index = intMalloc_dist(len1+1)) ) {	  fprintf (stderr, "Malloc fails for Uindex[]");	  return (memDist + memNLU);	}	Ufstnz_br_ptr[ljb_i] = index;	if (!(Unzval_br_ptr[ljb_i] =	      doublecomplexMalloc_dist(len))) {	  fprintf (stderr, "Malloc fails for Unzval_br_ptr[*][]");	  return (memDist + memNLU);	}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -