📄 get_perm_c_parmetis.c
字号:
/* compute the inverse row permutation vector */ for (i = 0; i < n; i++) { marker[i] = 1; if (perm_r[i] != i) redist_pra = TRUE; iperm_r[perm_r[i]] = i; } /* TRANSPOSE LOCAL ROWS ON MY PROCESSOR iam. */ /* THE RESULT IS STORED IN TCOLIND_SEND. */ /* THIS COUNTS FOR TWO PASSES OF THE LOCAL MATRIX. */ /* First pass to get counts of each row of T, and set up column pointers */ for (j = 0; j < m_loc; j++) { for (i = rowptr[j]; i < rowptr[j+1]; i++){ marker[iperm_r[colind[i]]]++; } } /* determine number of elements to be sent to each processor */ for (p = 0; p < nprocs_i; p++) { sendCnts[p] = 0; for (i = vtxdist_i[p]; i < vtxdist_i[p+1]; i++) sendCnts[p] += marker[i]; } /* exchange send/receive counts information in between all processors */ MPI_Alltoall (sendCnts, 1, mpi_int_t, recvCnts, 1, mpi_int_t, grid->comm); sendCnts[iam] = 0; sz_tcolind_loc = recvCnts[iam]; for (i = 0, j = 0, p = 0; p < nprocs_i; p++) { rdispls[p] = j; j += recvCnts[p]; sdispls[p] = i; i += sendCnts[p]; } recvCnts[iam] = 0; sz_tcolind_recv = j; sz_tcolind_send = i; /* allocate memory to receive necessary blocks of transpose matrix T */ if (sz_tcolind_recv) { if ( !(tcolind_recv = (int_t*) SUPERLU_MALLOC( sz_tcolind_recv * sizeof(int_t) ))) ABORT("SUPERLU_MALLOC fails tcolind_recv[]"); apat_mem += sz_tcolind_recv; } /* allocate memory to send blocks of local transpose matrix T to other processors */ if (sz_tcolind_send) { if (!(tcolind_send = (int_t*) SUPERLU_MALLOC( (sz_tcolind_send) * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for tcolind_send[]"); apat_mem += sz_tcolind_send; } /* Set up marker[] to point at the beginning of each row in the send/receive buffer. For each row, we store first its number of elements, and then the elements. */ ind_rcv = rdispls[iam]; for (p = 0; p < nprocs_i; p++) { for (i = vtxdist_i[p]; i < vtxdist_i[p+1]; i++) { nelts = marker[i] - 1; if (p == iam) { tcolind_recv[ind_rcv] = nelts; marker[i] = ind_rcv + 1; ind_rcv += nelts + 1; } else { tcolind_send[sdispls[p]] = nelts; marker[i] = sdispls[p] + 1; sdispls[p] += nelts + 1; } } } /* reset sdispls vector */ for (i = 0, p = 0; p < nprocs_i; p++) { sdispls[p] = i; i += sendCnts[p]; } /* Second pass of the local matrix A to copy data to be send */ for (j = 0; j < m_loc; j++) for (i = rowptr[j]; i < rowptr[j+1]; i++) { col = colind[i]; ipcol = iperm_r[col]; if (ipcol >= fst_row && ipcol < fst_row + m_loc) /* local data */ tcolind_recv[marker[ipcol]] = perm_r[j + fst_row]; else /* remote */ tcolind_send[marker[ipcol]] = perm_r[j + fst_row]; marker[ipcol] ++; } sendCnts[iam] = 0; recvCnts[iam] = 0;#if defined (_LONGINT) for (p=0; p<nprocs; p++) { if (sendCnts[p] > INT_MAX || sdispls[p] > INT_MAX || recvCnts[p] > INT_MAX || rdispls[p] > INT_MAX) ABORT("ERROR in dist_symbLU size to send > INT_MAX\n"); intBuf1[p] = (int) sendCnts[p]; intBuf2[p] = (int) sdispls[p]; intBuf3[p] = (int) recvCnts[p]; intBuf4[p] = (int) rdispls[p]; }#else /* Default */ intBuf1 = sendCnts; intBuf2 = sdispls; intBuf3 = recvCnts; intBuf4 = rdispls;#endif /* send/receive transpose matrix T */ MPI_Alltoallv (tcolind_send, intBuf1, intBuf2, mpi_int_t, tcolind_recv, intBuf3, intBuf4, mpi_int_t, grid->comm); /* ------------------------------------------------------------ DEALLOCATE SEND COMMUNICATION STORAGE ------------------------------------------------------------*/ if (sz_tcolind_send) { SUPERLU_FREE( tcolind_send ); apat_mem_max = apat_mem; apat_mem -= sz_tcolind_send; } /* ---------------------------------------------------------------- FOR LOCAL ROWS: compute B = A + T, where row j of B is: Struct (B(j,:)) = Struct (A(j,:)) UNION Struct (T(j,:)) do not include the diagonal entry THIS COUNTS FOR TWO PASSES OF THE LOCAL ROWS OF A AND T. ------------------------------------------------------------------ */ /* Reset marker to EMPTY */ for (i = 0; i < n; ++i) marker[i] = EMPTY; /* save rdispls information */ for (p = 0; p < nprocs_i; p++) sdispls[p] = rdispls[p]; /* First pass determines number of nonzeros in B */ num_nz = 0; for (j = 0; j < m_loc; j++) { /* Flag the diagonal so it's not included in the B matrix */ marker[perm_r[j + fst_row]] = j; /* Add pattern of row A(j,:) to B(j,:) */ for (i = rowptr[j]; i < rowptr[j+1]; i++) { k = colind[i]; if ( marker[k] != j ) { marker[k] = j; ++num_nz; } } /* Add pattern of row T(j,:) to B(j,:) */ for (p = 0; p < nprocs_i; p++) { t_ind = rdispls[p]; nelts = tcolind_recv[t_ind]; t_ind ++; for (i = t_ind; i < t_ind + nelts; i++) { k = tcolind_recv[i]; if ( marker[k] != j ) { marker[k] = j; ++num_nz; } } t_ind += nelts; rdispls[p] = t_ind; } } bnz_t = num_nz; /* Allocate storage for B=Pr*A+A'*Pr' */ if ( !(b_rowptr_t = (int_t*) SUPERLU_MALLOC((m_loc+1) * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for b_rowptr_t[]"); if ( bnz_t ) { if ( !(b_colind_t = (int_t*) SUPERLU_MALLOC( bnz_t * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for b_colind_t[]"); } apat_mem += m_loc + 1 + bnz_t; if (apat_mem > apat_mem_max) apat_mem_max = apat_mem; /* Reset marker to EMPTY */ for (i = 0; i < n; i++) marker[i] = EMPTY; /* restore rdispls information */ for (p = 0; p < nprocs_i; p++) rdispls[p] = sdispls[p]; /* Second pass, compute each row of B, one at a time */ num_nz = 0; t_ind = 0; for (j = 0; j < m_loc; j++) { b_rowptr_t[j] = num_nz; /* Flag the diagonal so it's not included in the B matrix */ marker[perm_r[j + fst_row]] = j; /* Add pattern of row A(j,:) to B(j,:) */ for (i = rowptr[j]; i < rowptr[j+1]; i++) { k = colind[i]; if ( marker[k] != j ) { marker[k] = j; b_colind_t[num_nz] = k; num_nz ++; } } /* Add pattern of row T(j,:) to B(j,:) */ for (p = 0; p < nprocs_i; p++) { t_ind = rdispls[p]; nelts = tcolind_recv[t_ind]; t_ind++; for (i = t_ind; i < t_ind + nelts; i++) { k = tcolind_recv[i]; if ( marker[k] != j ) { marker[k] = j; b_colind_t[num_nz] = k; num_nz++; } } t_ind += nelts; rdispls[p] = t_ind; } } b_rowptr_t[m_loc] = num_nz; for (p = 0; p <= SUPERLU_MIN(nprocs_i, nprocs_o); p++) if (vtxdist_i[p] != vtxdist_o[p]) redist_pra = TRUE; if (sz_tcolind_recv) { SUPERLU_FREE (tcolind_recv); apat_mem -= sz_tcolind_recv; } SUPERLU_FREE (marker); SUPERLU_FREE (iperm_r); apat_mem -= 2 * n + 1; /* redistribute permuted matrix (by rows) from nproc_i processors to nproc_o processors */ if (redist_pra) { m_loc_o = vtxdist_o[iam+1] - vtxdist_o[iam]; fst_row_o = vtxdist_o[iam]; nnz_loc = 0; if ( !(b_rowptr = intMalloc_dist(m_loc_o + 1)) ) ABORT("Malloc fails for *b_rowptr[]."); apat_mem += m_loc_o + 1; if (apat_mem > apat_mem_max) apat_mem_max = apat_mem; for (p = 0; p < nprocs_i; p++) { sendCnts[p] = 0; recvCnts[p] = 0; } for (i = 0; i < m_loc; i++) { k = perm_r[i+fst_row]; /* find the processor to which row k belongs */ j = FALSE; p = 0; while (!j) { if (vtxdist_o[p] <= k && k < vtxdist_o[p+1]) j = TRUE; else p ++; } if (p == iam) { b_rowptr[k-fst_row_o] = b_rowptr_t[i + 1] - b_rowptr_t[i]; nnz_loc += b_rowptr[k-fst_row_o]; } else sendCnts[p] += b_rowptr_t[i + 1] - b_rowptr_t[i] + 2; } /* exchange send/receive counts information in between all processors */ MPI_Alltoall (sendCnts, 1, mpi_int_t, recvCnts, 1, mpi_int_t, grid->comm); for (i = 0, j = 0, p = 0; p < nprocs_i; p++) { rdispls[p] = j; j += recvCnts[p]; sdispls[p] = i; i += sendCnts[p]; } rdispls[p] = j; sdispls[p] = i; sz_tcolind_recv = j; sz_tcolind_send = i; /* allocate memory for local data */ tcolind_recv = NULL; tcolind_send = NULL; if (sz_tcolind_recv) { if ( !(tcolind_recv = (int_t*) SUPERLU_MALLOC( sz_tcolind_recv * sizeof(int_t) ))) ABORT("SUPERLU_MALLOC fails tcolind_recv[]"); apat_mem += sz_tcolind_recv; } /* allocate memory to receive necessary data */ if (sz_tcolind_send) { if (!(tcolind_send = (int_t*) SUPERLU_MALLOC( (sz_tcolind_send) * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for tcolind_send[]"); apat_mem += sz_tcolind_send; } if (apat_mem > apat_mem_max) apat_mem_max = apat_mem; /* Copy data to be send */ ind_rcv = rdispls[iam]; for (i = 0; i < m_loc; i++) { k = perm_r[i+fst_row]; /* find the processor to which row k belongs */ j = FALSE; p = 0; while (!j) { if (vtxdist_o[p] <= k && k < vtxdist_o[p+1]) j = TRUE; else p ++; } if (p != iam) { /* remote */ tcolind_send[sdispls[p]] = k; tcolind_send[sdispls[p]+1] = b_rowptr_t[i+1] - b_rowptr_t[i]; sdispls[p] += 2; for (j = b_rowptr_t[i]; j < b_rowptr_t[i+1]; j++) { tcolind_send[sdispls[p]] = b_colind_t[j]; sdispls[p] ++; } } } /* reset sdispls vector */ for (i = 0, p = 0; p < nprocs_i; p++) { sdispls[p] = i; i += sendCnts[p]; } sendCnts[iam] = 0; recvCnts[iam] = 0; #if defined (_LONGINT) for (p=0; p<nprocs; p++) { if (sendCnts[p] > INT_MAX || sdispls[p] > INT_MAX || recvCnts[p] > INT_MAX || rdispls[p] > INT_MAX) ABORT("ERROR in dist_symbLU size to send > INT_MAX\n"); intBuf1[p] = (int) sendCnts[p]; intBuf2[p] = (int) sdispls[p]; intBuf3[p] = (int) recvCnts[p]; intBuf4[p] = (int) rdispls[p]; }#else /* Default */ intBuf1 = sendCnts; intBuf2 = sdispls; intBuf3 = recvCnts; intBuf4 = rdispls;#endif /* send/receive permuted matrix T by rows */ MPI_Alltoallv (tcolind_send, intBuf1, intBuf2, mpi_int_t, tcolind_recv, intBuf3, intBuf4, mpi_int_t, grid->comm); /* ------------------------------------------------------------ DEALLOCATE COMMUNICATION STORAGE ------------------------------------------------------------*/ if (sz_tcolind_send) { SUPERLU_FREE( tcolind_send ); apat_mem -= sz_tcolind_send; } /* ------------------------------------------------------------ STORE ROWS IN ASCENDING ORDER OF THEIR NUMBER ------------------------------------------------------------*/ for (p = 0; p < nprocs; p++) { if (p != iam) { i = rdispls[p]; while (i < rdispls[p+1]) { j = tcolind_recv[i]; nelts = tcolind_recv[i+1]; i += 2 + nelts; b_rowptr[j-fst_row_o] = nelts; nnz_loc += nelts; } } } if (nnz_loc) if ( !(b_colind = intMalloc_dist(nnz_loc)) ) { ABORT("Malloc fails for bcolind[]."); apat_mem += nnz_loc; if (apat_mem > apat_mem_max) apat_mem_max = apat_mem; } /* Initialize the array of row pointers */ k = 0; for (j = 0; j < m_loc_o; j++) { i = b_rowptr[j]; b_rowptr[j] = k; k += i; } if (m_loc_o) b_rowptr[j] = k; /* Copy the data into the row oriented storage */ for (p = 0; p < nprocs; p++) { if (p != iam) { i = rdispls[p]; while (i < rdispls[p+1]) { j = tcolind_recv[i]; nelts = tcolind_recv[i+1]; for (i += 2, k = b_rowptr[j-fst_row_o]; k < b_rowptr[j-fst_row_o+1]; i++, k++) b_colind[k] = tcolind_recv[i]; } } } for (i = 0; i < m_loc; i++) { k = perm_r[i+fst_row]; if (k >= vtxdist_o[iam] && k < vtxdist_o[iam+1]) { ind = b_rowptr[k-fst_row_o]; for (j = b_rowptr_t[i]; j < b_rowptr_t[i+1]; j++, ind++) b_colind[ind] = b_colind_t[j]; } } SUPERLU_FREE(b_rowptr_t); if ( bnz_t ) SUPERLU_FREE(b_colind_t); if (sz_tcolind_recv) SUPERLU_FREE(tcolind_recv); apat_mem -= bnz_t + m_loc + sz_tcolind_recv; *p_bnz = nnz_loc; *p_b_rowptr = b_rowptr; *p_b_colind = b_colind; } else { *p_bnz = bnz_t; *p_b_rowptr = b_rowptr_t; *p_b_colind = b_colind_t; } SUPERLU_FREE (rdispls); SUPERLU_FREE (sdispls); SUPERLU_FREE (sendCnts); SUPERLU_FREE (recvCnts); apat_mem -= 4 * nprocs + 2;#if defined (_LONGINT) SUPERLU_FREE (intBuf1); apat_mem -= 4*nprocs*sizeof(int) / sizeof(int_t);#endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit a_plus_at_CompRow_loc()");#endif return (- apat_mem_max * sizeof(int_t));} /* a_plus_at_CompRow_loc */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -