⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 get_perm_c_parmetis.c

📁 SuperLU 2.2版本。对大型、稀疏、非对称的线性系统的直接求解
💻 C
📖 第 1 页 / 共 2 页
字号:
  /* compute the inverse row permutation vector */  for (i = 0; i < n; i++) {    marker[i] = 1;    if (perm_r[i] != i)      redist_pra = TRUE;    iperm_r[perm_r[i]] = i;  }  /* TRANSPOSE LOCAL ROWS ON MY PROCESSOR iam.         */  /* THE RESULT IS STORED IN TCOLIND_SEND.             */  /* THIS COUNTS FOR TWO PASSES OF THE LOCAL MATRIX.   */  /* First pass to get counts of each row of T, and set up column pointers */  for (j = 0; j < m_loc; j++) {    for (i = rowptr[j]; i < rowptr[j+1]; i++){      marker[iperm_r[colind[i]]]++;    }  }  /* determine number of elements to be sent to each processor */  for (p = 0; p < nprocs_i; p++) {    sendCnts[p] = 0;    for (i = vtxdist_i[p]; i < vtxdist_i[p+1]; i++)       sendCnts[p] += marker[i];  }  /* exchange send/receive counts information in between all processors */  MPI_Alltoall (sendCnts, 1, mpi_int_t,		recvCnts, 1, mpi_int_t, grid->comm);  sendCnts[iam] = 0;  sz_tcolind_loc = recvCnts[iam];    for (i = 0, j = 0, p = 0; p < nprocs_i; p++) {    rdispls[p] = j;    j += recvCnts[p];    sdispls[p] = i;      i += sendCnts[p];  }  recvCnts[iam] = 0;  sz_tcolind_recv = j;  sz_tcolind_send = i;    /* allocate memory to receive necessary blocks of transpose matrix T */  if (sz_tcolind_recv) {    if ( !(tcolind_recv = (int_t*) SUPERLU_MALLOC( sz_tcolind_recv 						   * sizeof(int_t) )))      ABORT("SUPERLU_MALLOC fails tcolind_recv[]");    apat_mem += sz_tcolind_recv;  }  /* allocate memory to send blocks of local transpose matrix T to other processors */  if (sz_tcolind_send) {    if (!(tcolind_send = (int_t*) SUPERLU_MALLOC( (sz_tcolind_send) 						  * sizeof(int_t))))      ABORT("SUPERLU_MALLOC fails for tcolind_send[]");    apat_mem += sz_tcolind_send;  }  /* Set up marker[] to point at the beginning of each row in the     send/receive buffer.  For each row, we store first its number of     elements, and then the elements. */    ind_rcv = rdispls[iam];  for (p = 0; p < nprocs_i; p++) {    for (i = vtxdist_i[p]; i < vtxdist_i[p+1]; i++) {      nelts = marker[i] - 1;      if (p == iam) {	tcolind_recv[ind_rcv] = nelts;	marker[i] = ind_rcv + 1;	ind_rcv += nelts + 1;      }      else {	tcolind_send[sdispls[p]] = nelts;	marker[i] = sdispls[p] + 1;	sdispls[p] += nelts + 1;      }    }  }  /* reset sdispls vector */  for (i = 0, p = 0; p < nprocs_i; p++) {    sdispls[p] = i;      i += sendCnts[p];  }  /* Second pass of the local matrix A to copy data to be send */  for (j = 0; j < m_loc; j++)    for (i = rowptr[j]; i < rowptr[j+1]; i++) {      col = colind[i];      ipcol = iperm_r[col];            if (ipcol >= fst_row && ipcol < fst_row + m_loc)  /* local data */	tcolind_recv[marker[ipcol]] = perm_r[j + fst_row];            else /* remote */ 	tcolind_send[marker[ipcol]] = perm_r[j + fst_row];      marker[ipcol] ++;    }  sendCnts[iam] = 0;  recvCnts[iam] = 0;#if defined (_LONGINT)  for (p=0; p<nprocs; p++) {    if (sendCnts[p] > INT_MAX || sdispls[p] > INT_MAX ||	recvCnts[p] > INT_MAX || rdispls[p] > INT_MAX)      ABORT("ERROR in dist_symbLU size to send > INT_MAX\n");    intBuf1[p] = (int) sendCnts[p];    intBuf2[p] = (int) sdispls[p];    intBuf3[p] = (int) recvCnts[p];    intBuf4[p] = (int) rdispls[p];  }#else  /* Default */  intBuf1 = sendCnts;  intBuf2 = sdispls;  intBuf3 = recvCnts;  intBuf4 = rdispls;#endif    /* send/receive transpose matrix T */  MPI_Alltoallv (tcolind_send, intBuf1, intBuf2, mpi_int_t,		 tcolind_recv, intBuf3, intBuf4, mpi_int_t,		 grid->comm);  /* ------------------------------------------------------------     DEALLOCATE SEND COMMUNICATION STORAGE     ------------------------------------------------------------*/  if (sz_tcolind_send) {    SUPERLU_FREE( tcolind_send );    apat_mem_max = apat_mem;    apat_mem -= sz_tcolind_send;  }  /* ----------------------------------------------------------------     FOR LOCAL ROWS:       compute B = A + T, where row j of B is:       Struct (B(j,:)) = Struct (A(j,:)) UNION Struct (T(j,:))       do not include the diagonal entry     THIS COUNTS FOR TWO PASSES OF THE LOCAL ROWS OF A AND T.        ------------------------------------------------------------------ */    /* Reset marker to EMPTY */  for (i = 0; i < n; ++i) marker[i] = EMPTY;  /* save rdispls information */  for (p = 0; p < nprocs_i; p++)    sdispls[p] = rdispls[p];  /* First pass determines number of nonzeros in B */  num_nz = 0;  for (j = 0; j < m_loc; j++) {    /* Flag the diagonal so it's not included in the B matrix */    marker[perm_r[j + fst_row]] = j;        /* Add pattern of row A(j,:) to B(j,:) */    for (i = rowptr[j]; i < rowptr[j+1]; i++) {      k = colind[i];      if ( marker[k] != j ) {	marker[k] = j;	++num_nz;      }    }        /* Add pattern of row T(j,:) to B(j,:) */    for (p = 0; p < nprocs_i; p++) {      t_ind = rdispls[p];      nelts = tcolind_recv[t_ind]; t_ind ++;      for (i = t_ind; i < t_ind + nelts; i++) {	k = tcolind_recv[i];	if ( marker[k] != j ) {	  marker[k] = j;	  ++num_nz;	}      }      t_ind += nelts;      rdispls[p] = t_ind;    }  }  bnz_t = num_nz;  /* Allocate storage for B=Pr*A+A'*Pr' */  if ( !(b_rowptr_t = (int_t*) SUPERLU_MALLOC((m_loc+1) * sizeof(int_t))))    ABORT("SUPERLU_MALLOC fails for b_rowptr_t[]");  if ( bnz_t ) {    if ( !(b_colind_t = (int_t*) SUPERLU_MALLOC( bnz_t * sizeof(int_t))))      ABORT("SUPERLU_MALLOC fails for b_colind_t[]");  }  apat_mem += m_loc + 1 + bnz_t;  if (apat_mem > apat_mem_max)    apat_mem_max = apat_mem;    /* Reset marker to EMPTY */  for (i = 0; i < n; i++) marker[i] = EMPTY;  /* restore rdispls information */  for (p = 0; p < nprocs_i; p++)    rdispls[p] = sdispls[p];    /* Second pass, compute each row of B, one at a time */  num_nz = 0;  t_ind = 0;  for (j = 0; j < m_loc; j++) {    b_rowptr_t[j] = num_nz;        /* Flag the diagonal so it's not included in the B matrix */    marker[perm_r[j + fst_row]] = j;    /* Add pattern of row A(j,:) to B(j,:) */    for (i = rowptr[j]; i < rowptr[j+1]; i++) {      k = colind[i];      if ( marker[k] != j ) {	marker[k] = j;	b_colind_t[num_nz] = k; num_nz ++;      }    }    /* Add pattern of row T(j,:) to B(j,:) */    for (p = 0; p < nprocs_i; p++) {      t_ind = rdispls[p];      nelts = tcolind_recv[t_ind]; t_ind++;      for (i = t_ind; i < t_ind + nelts; i++) {	k = tcolind_recv[i];	if ( marker[k] != j ) {	  marker[k] = j;	  b_colind_t[num_nz] = k; num_nz++;	}      }      t_ind += nelts;      rdispls[p] = t_ind;    }  }  b_rowptr_t[m_loc] = num_nz;  for (p = 0; p <= SUPERLU_MIN(nprocs_i, nprocs_o); p++)     if (vtxdist_i[p] != vtxdist_o[p])      redist_pra = TRUE;    if (sz_tcolind_recv) {    SUPERLU_FREE (tcolind_recv);    apat_mem -= sz_tcolind_recv;  }  SUPERLU_FREE (marker);  SUPERLU_FREE (iperm_r);  apat_mem -= 2 * n + 1;    /* redistribute permuted matrix (by rows) from nproc_i processors     to nproc_o processors */  if (redist_pra) {    m_loc_o = vtxdist_o[iam+1] - vtxdist_o[iam];    fst_row_o = vtxdist_o[iam];    nnz_loc = 0;        if ( !(b_rowptr = intMalloc_dist(m_loc_o + 1)) )      ABORT("Malloc fails for *b_rowptr[].");    apat_mem += m_loc_o + 1;    if (apat_mem > apat_mem_max)      apat_mem_max = apat_mem;    for (p = 0; p < nprocs_i; p++) {      sendCnts[p] = 0;      recvCnts[p] = 0;    }    for (i = 0; i < m_loc; i++) {      k = perm_r[i+fst_row];      /* find the processor to which row k belongs */      j = FALSE; p = 0;      while (!j) {	if (vtxdist_o[p] <= k && k < vtxdist_o[p+1])	  j = TRUE;	else 	  p ++;      }      if (p == iam) {	b_rowptr[k-fst_row_o] = b_rowptr_t[i + 1] - b_rowptr_t[i];	nnz_loc += b_rowptr[k-fst_row_o];      }      else	sendCnts[p] += b_rowptr_t[i + 1] - b_rowptr_t[i] + 2;    }    /* exchange send/receive counts information in between all processors */    MPI_Alltoall (sendCnts, 1, mpi_int_t,		  recvCnts, 1, mpi_int_t, grid->comm);        for (i = 0, j = 0, p = 0; p < nprocs_i; p++) {      rdispls[p] = j;      j += recvCnts[p];      sdispls[p] = i;        i += sendCnts[p];    }    rdispls[p] = j;    sdispls[p] = i;    sz_tcolind_recv = j;    sz_tcolind_send = i;    /* allocate memory for local data */    tcolind_recv = NULL;    tcolind_send = NULL;    if (sz_tcolind_recv) {      if ( !(tcolind_recv = (int_t*) SUPERLU_MALLOC( sz_tcolind_recv 						     * sizeof(int_t) )))	ABORT("SUPERLU_MALLOC fails tcolind_recv[]");      apat_mem += sz_tcolind_recv;    }    /* allocate memory to receive necessary data */    if (sz_tcolind_send) {      if (!(tcolind_send = (int_t*) SUPERLU_MALLOC( (sz_tcolind_send) 						    * sizeof(int_t))))	ABORT("SUPERLU_MALLOC fails for tcolind_send[]");      apat_mem += sz_tcolind_send;    }    if (apat_mem > apat_mem_max)      apat_mem_max = apat_mem;    /* Copy data to be send */    ind_rcv = rdispls[iam];    for (i = 0; i < m_loc; i++) {      k = perm_r[i+fst_row];      /* find the processor to which row k belongs */      j = FALSE; p = 0;      while (!j) {	if (vtxdist_o[p] <= k && k < vtxdist_o[p+1])	  j = TRUE;	else 	  p ++;      }      if (p != iam) { /* remote */ 	tcolind_send[sdispls[p]] = k;	tcolind_send[sdispls[p]+1] = b_rowptr_t[i+1] - b_rowptr_t[i];	sdispls[p] += 2;	for (j = b_rowptr_t[i]; j < b_rowptr_t[i+1]; j++) {	  tcolind_send[sdispls[p]] = b_colind_t[j]; sdispls[p] ++;	}      }    }      /* reset sdispls vector */    for (i = 0, p = 0; p < nprocs_i; p++) {      sdispls[p] = i;        i += sendCnts[p];    }    sendCnts[iam] = 0;    recvCnts[iam] = 0;    #if defined (_LONGINT)    for (p=0; p<nprocs; p++) {      if (sendCnts[p] > INT_MAX || sdispls[p] > INT_MAX ||	  recvCnts[p] > INT_MAX || rdispls[p] > INT_MAX)	ABORT("ERROR in dist_symbLU size to send > INT_MAX\n");      intBuf1[p] = (int) sendCnts[p];      intBuf2[p] = (int) sdispls[p];      intBuf3[p] = (int) recvCnts[p];      intBuf4[p] = (int) rdispls[p];    }#else  /* Default */    intBuf1 = sendCnts;  intBuf2 = sdispls;    intBuf3 = recvCnts;  intBuf4 = rdispls;#endif    /* send/receive permuted matrix T by rows */    MPI_Alltoallv (tcolind_send, intBuf1, intBuf2, mpi_int_t,		   tcolind_recv, intBuf3, intBuf4, mpi_int_t,		   grid->comm);    /* ------------------------------------------------------------       DEALLOCATE COMMUNICATION STORAGE       ------------------------------------------------------------*/    if (sz_tcolind_send) {      SUPERLU_FREE( tcolind_send );      apat_mem -= sz_tcolind_send;    }        /* ------------------------------------------------------------       STORE ROWS IN ASCENDING ORDER OF THEIR NUMBER       ------------------------------------------------------------*/    for (p = 0; p < nprocs; p++) {      if (p != iam) {	i = rdispls[p];	while (i < rdispls[p+1]) {	  j = tcolind_recv[i];	  nelts = tcolind_recv[i+1];	  i += 2 + nelts;	  b_rowptr[j-fst_row_o] = nelts;	  nnz_loc += nelts;	}      }    }    if (nnz_loc)      if ( !(b_colind = intMalloc_dist(nnz_loc)) ) {	ABORT("Malloc fails for bcolind[].");	apat_mem += nnz_loc;	if (apat_mem > apat_mem_max)	  apat_mem_max = apat_mem;      }    /* Initialize the array of row pointers */    k = 0;    for (j = 0; j < m_loc_o; j++) {      i = b_rowptr[j];      b_rowptr[j] = k;      k += i;    }    if (m_loc_o) b_rowptr[j] = k;        /* Copy the data into the row oriented storage */    for (p = 0; p < nprocs; p++) {      if (p != iam) {	i = rdispls[p];	while (i < rdispls[p+1]) {	  j = tcolind_recv[i];	  nelts = tcolind_recv[i+1];	  for (i += 2, k = b_rowptr[j-fst_row_o]; 	       k < b_rowptr[j-fst_row_o+1]; i++, k++) 	    b_colind[k] = tcolind_recv[i];	}      }    }    for (i = 0; i < m_loc; i++) {      k = perm_r[i+fst_row];      if (k >= vtxdist_o[iam] && k < vtxdist_o[iam+1]) {	ind = b_rowptr[k-fst_row_o];	for (j = b_rowptr_t[i]; j < b_rowptr_t[i+1]; j++, ind++)	  b_colind[ind] = b_colind_t[j];      }    }        SUPERLU_FREE(b_rowptr_t);    if ( bnz_t )      SUPERLU_FREE(b_colind_t);    if (sz_tcolind_recv)      SUPERLU_FREE(tcolind_recv);    apat_mem -= bnz_t + m_loc + sz_tcolind_recv;        *p_bnz = nnz_loc;    *p_b_rowptr = b_rowptr;    *p_b_colind = b_colind;  }  else {    *p_bnz = bnz_t;    *p_b_rowptr = b_rowptr_t;    *p_b_colind = b_colind_t;  }    SUPERLU_FREE (rdispls);  SUPERLU_FREE (sdispls);  SUPERLU_FREE (sendCnts);  SUPERLU_FREE (recvCnts);  apat_mem -= 4 * nprocs + 2;#if defined (_LONGINT)  SUPERLU_FREE (intBuf1);  apat_mem -= 4*nprocs*sizeof(int) / sizeof(int_t);#endif  #if ( DEBUGlevel>=1 )  CHECK_MALLOC(iam, "Exit a_plus_at_CompRow_loc()");#endif    return (- apat_mem_max * sizeof(int_t));} /* a_plus_at_CompRow_loc */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -