pzsymbfact_distdata.c

来自「SuperLU 2.2版本。对大型、稀疏、非对称的线性系统的直接求解」· C语言代码 · 共 1,943 行 · 第 1/5 页
1,943 行
  memAux = (float) (2 * procs * iword);  memRet = 0.;  nnzToSend = nnzToRecv + procs;  nsupers  = supno[n-1] + 1;    /* ------------------------------------------------------------     COUNT THE NUMBER OF NONZEROS TO BE SENT TO EACH PROCESS,     THEN ALLOCATE SPACE.     THIS ACCOUNTS FOR THE FIRST PASS OF A.     ------------------------------------------------------------*/  for (i = 0; i < m_loc; ++i) {    for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) {      irow = perm_c[perm_r[i+fst_row]];  /* Row number in Pc*Pr*A */      jcol = Astore->colind[j];      gbi = BlockNum( irow );      gbj = BlockNum( jcol );      p = PNUM( PROW(gbi,grid), PCOL(gbj,grid), grid );      ++nnzToSend[p];     }  }    /* All-to-all communication */  MPI_Alltoall( nnzToSend, 1, mpi_int_t, nnzToRecv, 1, mpi_int_t,		grid->comm);    maxnnzToRecv = 0;  nnz_loc = SendCnt = RecvCnt = 0;    for (p = 0; p < procs; ++p) {    if ( p != iam ) {      SendCnt += nnzToSend[p];      RecvCnt += nnzToRecv[p];      maxnnzToRecv = SUPERLU_MAX( nnzToRecv[p], maxnnzToRecv );    } else {      nnz_loc += nnzToRecv[p];      /*assert(nnzToSend[p] == nnzToRecv[p]);*/    }  }  k = nnz_loc + RecvCnt; /* Total nonzeros ended up in my process. */  szbuf = k;  /* Allocate space for storing the triplets after redistribution. */  if ( !(ia = intMalloc_dist(2*k)) ) {    fprintf (stderr, "Malloc fails for ia[].");    return (memAux);  }  memAux += (float) (2*k*iword);  ja = ia + k;  if ( !(aij = doublecomplexMalloc_dist(k)) ) {    fprintf (stderr, "Malloc fails for aij[].");    return (memAux);  }  memAux += (float) (k*dword);    /* Allocate temporary storage for sending/receiving the A triplets. */  if ( procs > 1 ) {    if ( !(send_req = (MPI_Request *)	   SUPERLU_MALLOC(2*procs *sizeof(MPI_Request))) ) {      fprintf (stderr, "Malloc fails for send_req[].");      return (memAux);    }    memAux += (float) (2*procs *sizeof(MPI_Request));    if ( !(ia_send = (int_t **) SUPERLU_MALLOC(procs*sizeof(int_t*))) ) {      fprintf(stderr, "Malloc fails for ia_send[].");      return (memAux);    }    memAux += (float) (procs*sizeof(int_t*));    if ( !(aij_send = (doublecomplex **)SUPERLU_MALLOC(procs*sizeof(doublecomplex*))) ) {      fprintf(stderr, "Malloc fails for aij_send[].");      return (memAux);    }    memAux += (float) (procs*sizeof(doublecomplex*));        if ( !(index = intMalloc_dist(2*SendCnt)) ) {      fprintf(stderr, "Malloc fails for index[].");      return (memAux);    }    memAux += (float) (2*SendCnt*iword);    if ( !(nzval = doublecomplexMalloc_dist(SendCnt)) ) {      fprintf(stderr, "Malloc fails for nzval[].");      return (memAux);    }    memAux += (float) (SendCnt * dword);    if ( !(ptr_to_send = intCalloc_dist(procs)) ) {      fprintf(stderr, "Malloc fails for ptr_to_send[].");      return (memAux);    }    memAux += (float) (procs * iword);    if ( !(itemp = intMalloc_dist(2*maxnnzToRecv)) ) {      fprintf(stderr, "Malloc fails for itemp[].");      return (memAux);    }    memAux += (float) (2*maxnnzToRecv*iword);    if ( !(dtemp = doublecomplexMalloc_dist(maxnnzToRecv)) ) {      fprintf(stderr, "Malloc fails for dtemp[].");      return (memAux);    }    memAux += (float) (maxnnzToRecv * dword);        for (i = 0, j = 0, p = 0; p < procs; ++p) {      if ( p != iam ) {	ia_send[p] = &index[i];	i += 2 * nnzToSend[p]; /* ia/ja indices alternate */	aij_send[p] = &nzval[j];	j += nnzToSend[p];      }    }  } /* if procs > 1 */    nsupers_i = CEILING( nsupers, grid->nprow ); /* Number of local block rows */  nsupers_j = CEILING( nsupers, grid->npcol ); /* Number of local block columns */  if ( !(ainf_colptr = intCalloc_dist(ilsum_j[nsupers_j] + 1)) ) {    fprintf (stderr, "Malloc fails for *ainf_colptr[].");    return (memAux);  }  memRet += (float) (ilsum_j[nsupers_j] + 1) * iword;  if ( !(asup_rowptr = intCalloc_dist(ilsum_i[nsupers_i] + 1)) ) {    fprintf (stderr, "Malloc fails for *asup_rowptr[].");    return (memAux+memRet);  }  memRet += (float) (ilsum_i[nsupers_i] + 1) * iword;    /* ------------------------------------------------------------     LOAD THE ENTRIES OF A INTO THE (IA,JA,AIJ) STRUCTURES TO SEND.     THIS ACCOUNTS FOR THE SECOND PASS OF A.     ------------------------------------------------------------*/  nnz_loc = 0; /* Reset the local nonzero count. */  nnz_loc_ainf = nnz_loc_asup = 0;  nzval_a = Astore->nzval;  for (i = 0; i < m_loc; ++i) {    for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) {      irow = perm_c[perm_r[i+fst_row]];  /* Row number in Pc*Pr*A */      jcol = Astore->colind[j];      gbi = BlockNum( irow );      gbj = BlockNum( jcol );      p = PNUM( PROW(gbi,grid), PCOL(gbj,grid), grid );            if ( p != iam ) { /* remote */	k = ptr_to_send[p];	ia_send[p][k] = irow;	ia_send[p][k + nnzToSend[p]] = jcol;	aij_send[p][k] = nzval_a[j];	++ptr_to_send[p];       } else {          /* local */	ia[nnz_loc] = irow;	ja[nnz_loc] = jcol;	aij[nnz_loc] = nzval_a[j];	++nnz_loc;	/* Count nonzeros in each column of L / row of U */	if (gbi >= gbj) {	  ainf_colptr[ilsum_j[LBj( gbj, grid )] + jcol - FstBlockC( gbj )] ++;	  nnz_loc_ainf ++;	}	else {	  asup_rowptr[ilsum_i[LBi( gbi, grid )] + irow - FstBlockC( gbi )] ++;	  nnz_loc_asup ++;	}      }    }  }  /* ------------------------------------------------------------     PERFORM REDISTRIBUTION. THIS INVOLVES ALL-TO-ALL COMMUNICATION.     NOTE: Can possibly use MPI_Alltoallv.     ------------------------------------------------------------*/  for (p = 0; p < procs; ++p) {    if ( p != iam ) {      it = 2*nnzToSend[p];      MPI_Isend( ia_send[p], it, mpi_int_t,		 p, iam, grid->comm, &send_req[p] );      it = nnzToSend[p];      MPI_Isend( aij_send[p], it, SuperLU_MPI_DOUBLE_COMPLEX,		 p, iam+procs, grid->comm, &send_req[procs+p] );     }  }    for (p = 0; p < procs; ++p) {    if ( p != iam ) {      it = 2*nnzToRecv[p];      MPI_Recv( itemp, it, mpi_int_t, p, p, grid->comm, &status );       it = nnzToRecv[p];      MPI_Recv( dtemp, it, SuperLU_MPI_DOUBLE_COMPLEX, p, p+procs,		grid->comm, &status );      for (i = 0; i < nnzToRecv[p]; ++i) {	ia[nnz_loc] = itemp[i];	irow = itemp[i];	jcol = itemp[i + nnzToRecv[p]];	/* assert(jcol<n); */	ja[nnz_loc] = jcol;	aij[nnz_loc] = dtemp[i];	++nnz_loc;		gbi = BlockNum( irow );	gbj = BlockNum( jcol );	/* Count nonzeros in each column of L / row of U */	if (gbi >= gbj) {	  ainf_colptr[ilsum_j[LBj( gbj, grid )] + jcol - FstBlockC( gbj )] ++;	  nnz_loc_ainf ++;	}	else {	  asup_rowptr[ilsum_i[LBi( gbi, grid )] + irow - FstBlockC( gbi )] ++;	  nnz_loc_asup ++;	}      }    }  }    for (p = 0; p < procs; ++p) {    if ( p != iam ) {      MPI_Wait( &send_req[p], &status);      MPI_Wait( &send_req[procs+p], &status);    }  }    /* ------------------------------------------------------------     DEALLOCATE TEMPORARY STORAGE     ------------------------------------------------------------*/    SUPERLU_FREE(nnzToRecv);  memAux -= 2 * procs * iword;  if ( procs > 1 ) {    SUPERLU_FREE(send_req);    SUPERLU_FREE(ia_send);    SUPERLU_FREE(aij_send);    SUPERLU_FREE(index);    SUPERLU_FREE(nzval);    SUPERLU_FREE(ptr_to_send);    SUPERLU_FREE(itemp);    SUPERLU_FREE(dtemp);    memAux -= 2*procs *sizeof(MPI_Request) + procs*sizeof(int_t*) +      procs*sizeof(doublecomplex*) + 2*SendCnt * iword +      SendCnt* dword + procs*iword +      2*maxnnzToRecv*iword + maxnnzToRecv*dword;  }    /* ------------------------------------------------------------     CONVERT THE TRIPLET FORMAT.     ------------------------------------------------------------*/  if (nnz_loc_ainf != 0) {    if ( !(ainf_rowind = intMalloc_dist(nnz_loc_ainf)) ) {      fprintf (stderr, "Malloc fails for *ainf_rowind[].");      return (memAux+memRet);    }    memRet += (float) (nnz_loc_ainf * iword);    if ( !(ainf_val = doublecomplexMalloc_dist(nnz_loc_ainf)) ) {      fprintf (stderr, "Malloc fails for *ainf_val[].");      return (memAux+memRet);    }    memRet += (float) (nnz_loc_ainf * dword);  }  else {    ainf_rowind = NULL;    ainf_val = NULL;  }  if (nnz_loc_asup != 0) {    if ( !(asup_colind = intMalloc_dist(nnz_loc_asup)) ) {      fprintf (stderr, "Malloc fails for *asup_colind[].");      return (memAux + memRet);    }    memRet += (float) (nnz_loc_asup * iword);    if ( !(asup_val = doublecomplexMalloc_dist(nnz_loc_asup)) ) {      fprintf (stderr, "Malloc fails for *asup_val[].");      return (memAux  + memRet);    }    memRet += (float) (nnz_loc_asup * dword);  }  else {    asup_colind = NULL;    asup_val = NULL;  }  /* Initialize the array of column pointers */  k = 0;   jsize = ainf_colptr[0];  ainf_colptr[0] = 0;   for (j = 1; j < ilsum_j[nsupers_j]; j++) {    k += jsize;                  jsize = ainf_colptr[j];      ainf_colptr[j] = k;  }  ainf_colptr[ilsum_j[nsupers_j]] = k + jsize;  i = 0;  isize = asup_rowptr[0];  asup_rowptr[0] = 0;  for (j = 1; j < ilsum_i[nsupers_i]; j++) {    i += isize;    isize = asup_rowptr[j];      asup_rowptr[j] = i;  }  asup_rowptr[ilsum_i[nsupers_i]] = i + isize;  /* Copy the triplets into the column oriented storage */  for (i = 0; i < nnz_loc; ++i) {    jcol = ja[i];    irow = ia[i];    gbi = BlockNum( irow );    gbj = BlockNum( jcol );    /* Count nonzeros in each column of L / row of U */    if (gbi >= gbj) {      j = ilsum_j[LBj( gbj, grid )] + jcol - FstBlockC( gbj );      k = ainf_colptr[j];      ainf_rowind[k] = irow;      ainf_val[k] = aij[i];      ainf_colptr[j] ++;    }    else {      j = ilsum_i[LBi( gbi, grid )] + irow - FstBlockC( gbi );      k = asup_rowptr[j];      asup_colind[k] = jcol;      asup_val[k] = aij[i];      asup_rowptr[j] ++;    }  }  /* Reset the column pointers to the beginning of each column */  for (j = ilsum_j[nsupers_j]; j > 0; j--)     ainf_colptr[j] = ainf_colptr[j-1];  for (j = ilsum_i[nsupers_i]; j > 0; j--)     asup_rowptr[j] = asup_rowptr[j-1];  ainf_colptr[0] = 0;  asup_rowptr[0] = 0;    SUPERLU_FREE(ia);  SUPERLU_FREE(aij);  memAux -= 2*szbuf*iword + szbuf*dword;    *p_ainf_colptr = ainf_colptr;  *p_ainf_rowind = ainf_rowind;   *p_ainf_val    = ainf_val;  *p_asup_rowptr = asup_rowptr;  *p_asup_colind = asup_colind;  *p_asup_val    = asup_val;#if ( DEBUGlevel>=1 )  CHECK_MALLOC(iam, "Exit zdist_A()");  fprintf (stdout, "Size of allocated memory (MB) %.3f\n", memRet*1e-6);#endif  return (-memRet);} /* dist_A */int_tzdist_psymbtonum(fact_t fact, int_t n, SuperMatrix *A,		ScalePermstruct_t *ScalePermstruct,		Pslu_freeable_t *Pslu_freeable, 		LUstruct_t *LUstruct, gridinfo_t *grid)/* * * * Purpose * ======= *   Distribute the input matrix onto the 2D process mesh. *  * Arguments * ========= *  * fact (input) fact_t *        Specifies whether or not the L and U structures will be re-used. *        = SamePattern_SameRowPerm: L and U structures are input, and *                                   unchanged on exit. *          This routine should not be called for this case, an error *          is generated.  Instead, pddistribute routine should be called. *        = DOFACT or SamePattern: L and U structures are computed and output. * * n      (Input) int *        Dimension of the matrix. * * A      (Input) SuperMatrix* *	  The distributed input matrix A of dimension (A->nrow, A->ncol). *        A may be overwritten by diag(R)*A*diag(C)*Pc^T. *        The type of A can be: Stype = NR; Dtype = SLU_D; Mtype = GE. * * ScalePermstruct (Input) ScalePermstruct_t* *        The data structure to store the scaling and permutation vectors *        describing the transformations performed to the original matrix A. * * Glu_freeable (Input) *Glu_freeable_t *        The global structure describing the graph of L and U. *  * LUstruct (Input) LUstruct_t* *        Data structures for L and U factors. * * grid   (Input) gridinfo_t* *        The 2D process mesh. * * Return value * ============ *   < 0, number of bytes allocated on return from the dist_symbLU *   > 0, number of bytes allocated for performing the distribution *       of the data, when out of memory. *        (an approximation). *
pzsymbfact_distdata.c - 源码说明

本页面展示了「SuperLU 2.2版本。对大型、稀疏、非对称的线性系统的直接求解」中的 pzsymbfact_distdata.c 源码文件，采用 C语言编程语言编写，共 1,943 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与SuperLU相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?