📄 gather.c
字号:
/* -*- Mode: C; c-basic-offset:4 ; -*- *//* * * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */#include "mpiimpl.h"/* -- Begin Profiling Symbol Block for routine MPI_Gather */#if defined(HAVE_PRAGMA_WEAK)#pragma weak MPI_Gather = PMPI_Gather#elif defined(HAVE_PRAGMA_HP_SEC_DEF)#pragma _HP_SECONDARY_DEF PMPI_Gather MPI_Gather#elif defined(HAVE_PRAGMA_CRI_DUP)#pragma _CRI duplicate MPI_Gather as PMPI_Gather#endif/* -- End Profiling Symbol Block *//* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build the MPI routines */#ifndef MPICH_MPI_FROM_PMPI#define MPI_Gather PMPI_Gather/* This is the default implementation of gather. The algorithm is: Algorithm: MPI_Gather We use a binomial tree algorithm for both short and long messages. At nodes other than leaf nodes we need to allocate a temporary buffer to store the incoming message. If the root is not rank 0, we receive data in a temporary buffer on the root and then reorder it into the right order. In the heterogeneous case we first pack the buffers by using MPI_Pack and then do the gather. Cost = lgp.alpha + n.((p-1)/p).beta where n is the total size of the data gathered at the root. Possible improvements: End Algorithm: MPI_Gather*//* not declared static because it is called in intercomm. allgather *//* begin:nested */int MPIR_Gather ( void *sendbuf, int sendcnt, MPI_Datatype sendtype, void *recvbuf, int recvcnt, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr ){ static const char FCNAME[] = "MPIR_Gather"; int comm_size, rank; int mpi_errno = MPI_SUCCESS; int curr_cnt=0, relative_rank, nbytes, recv_size, is_homogeneous; int mask, sendtype_size, recvtype_size, src, dst, position;#ifdef MPID_HAS_HETERO int tmp_buf_size;#endif void *tmp_buf=NULL; MPI_Status status; MPI_Aint extent=0; /* Datatype extent */ MPI_Comm comm; comm = comm_ptr->handle; comm_size = comm_ptr->local_size; rank = comm_ptr->rank; if ( ((rank == root) && (recvcnt == 0)) || ((rank != root) && (sendcnt == 0)) ) return MPI_SUCCESS; is_homogeneous = 1;#ifdef MPID_HAS_HETERO if (comm_ptr->is_hetero) is_homogeneous = 0;#endif /* check if multiple threads are calling this collective function */ MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr ); /* Use binomial tree algorithm. */ relative_rank = (rank >= root) ? rank - root : rank - root + comm_size; if (rank == root) MPID_Datatype_get_extent_macro(recvtype, extent); if (is_homogeneous) { /* communicator is homogeneous. no need to pack buffer. */ if (rank == root) { MPID_Datatype_get_size_macro(recvtype, recvtype_size); nbytes = recvtype_size * recvcnt; } else { MPID_Datatype_get_size_macro(sendtype, sendtype_size); nbytes = sendtype_size * sendcnt; } if (rank == root) { if (root != 0) { /* allocate temporary buffer to receive data because it will not be in the right order. We will need to reorder it into the recv_buf. */ tmp_buf = MPIU_Malloc(nbytes*comm_size); /* --BEGIN ERROR HANDLING-- */ if (!tmp_buf) { mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 ); return mpi_errno; } /* --END ERROR HANDLING-- */ if (sendbuf != MPI_IN_PLACE) { /* copy root's sendbuf into tmpbuf just so that it is easier to unpack everything later into the recv_buf */ mpi_errno = MPIR_Localcopy(sendbuf, sendcnt, sendtype, tmp_buf, nbytes, MPI_BYTE); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); return mpi_errno; } /* --END ERROR HANDLING-- */ } curr_cnt = nbytes; } else { /* root is 0. no tmp_buf needed at root. */ /* copy root's sendbuf into recvbuf */ if (sendbuf != MPI_IN_PLACE) { mpi_errno = MPIR_Localcopy(sendbuf, sendcnt, sendtype, recvbuf, recvcnt, recvtype); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); return mpi_errno; } /* --END ERROR HANDLING-- */ } curr_cnt = recvcnt; } } else if (!(relative_rank % 2)) { /* allocate temporary buffer for nodes other than leaf nodes. max size needed is (nbytes*comm_size)/2. */ tmp_buf = MPIU_Malloc((nbytes*comm_size)/2); /* --BEGIN ERROR HANDLING-- */ if (!tmp_buf) { mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 ); return mpi_errno; } /* --END ERROR HANDLING-- */ /* copy from sendbuf into tmp_buf */ mpi_errno = MPIR_Localcopy(sendbuf, sendcnt, sendtype, tmp_buf, nbytes, MPI_BYTE); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); return mpi_errno; } /* --END ERROR HANDLING-- */ curr_cnt = nbytes; } mask = 0x1; while (mask < comm_size) { if ((mask & relative_rank) == 0) { src = relative_rank | mask; if (src < comm_size) { src = (src + root) % comm_size; if ((rank == root) && (root == 0)) { /* root is 0. Receive directly into recvbuf */ mpi_errno = MPIC_Recv(((char *)recvbuf + src*recvcnt*extent), recvcnt*mask, recvtype, src, MPIR_GATHER_TAG, comm, &status); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); return mpi_errno; } /* --END ERROR HANDLING-- */ } else { /* intermediate nodes or nonzero root. store in tmp_buf */ mpi_errno = MPIC_Recv(((char *)tmp_buf + curr_cnt), mask*nbytes, MPI_BYTE, src, MPIR_GATHER_TAG, comm, &status); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); return mpi_errno; } /* --END ERROR HANDLING-- */ /* the recv size is larger than what may be sent in some cases. query amount of data actually received */ NMPI_Get_count(&status, MPI_BYTE, &recv_size); curr_cnt += recv_size; } } } else { dst = relative_rank ^ mask; dst = (dst + root) % comm_size; if (relative_rank % 2) { /* leaf nodes send directly from sendbuf */ mpi_errno = MPIC_Send(sendbuf, sendcnt, sendtype, dst, MPIR_GATHER_TAG, comm); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); return mpi_errno; } /* --END ERROR HANDLING-- */ } else { mpi_errno = MPIC_Send(tmp_buf, curr_cnt, MPI_BYTE, dst, MPIR_GATHER_TAG, comm); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); return mpi_errno; } /* --END ERROR HANDLING-- */ } break; } mask <<= 1; } if ((rank == root) && (root != 0)) { /* reorder and copy from tmp_buf into recvbuf */ position = 0; if (sendbuf != MPI_IN_PLACE) { MPIR_Localcopy(tmp_buf, nbytes*(comm_size-rank), MPI_BYTE, ((char *) recvbuf + extent*recvcnt*rank), recvcnt*(comm_size-rank), recvtype); } else { MPIR_Localcopy((char *) tmp_buf + nbytes, nbytes*(comm_size-rank-1), MPI_BYTE, ((char *) recvbuf + extent*recvcnt*(rank+1)), recvcnt*(comm_size-rank-1), recvtype); } MPIR_Localcopy((char *) tmp_buf + nbytes*(comm_size-rank), nbytes*rank, MPI_BYTE, recvbuf, recvcnt*rank, recvtype); MPIU_Free(tmp_buf); } else if (relative_rank && !(relative_rank % 2)) MPIU_Free(tmp_buf); } #ifdef MPID_HAS_HETERO else { /* communicator is heterogeneous. pack data into tmp_buf. */ if (rank == root) NMPI_Pack_size(recvcnt*comm_size, recvtype, comm, &tmp_buf_size); else NMPI_Pack_size(sendcnt*(comm_size/2), sendtype, comm, &tmp_buf_size); tmp_buf = MPIU_Malloc(tmp_buf_size); /* --BEGIN ERROR HANDLING-- */ if (!tmp_buf) { mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 ); return mpi_errno; } /* --END ERROR HANDLING-- */ position = 0; if (sendbuf != MPI_IN_PLACE) { NMPI_Pack(sendbuf, sendcnt, sendtype, tmp_buf, tmp_buf_size, &position, comm); nbytes = position; } else { /* do a dummy pack just to calculate nbytes */ NMPI_Pack(recvbuf, 1, recvtype, tmp_buf, tmp_buf_size, &position, comm); nbytes = position*recvcnt; } curr_cnt = nbytes; mask = 0x1; while (mask < comm_size) { if ((mask & relative_rank) == 0) { src = relative_rank | mask; if (src < comm_size) { src = (src + root) % comm_size; mpi_errno = MPIC_Recv(((char *)tmp_buf + curr_cnt), mask*nbytes, MPI_BYTE, src, MPIR_GATHER_TAG, comm, &status); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); return mpi_errno; } /* --END ERROR HANDLING-- */ /* the recv size is larger than what may be sent in some cases. query amount of data actually received */ NMPI_Get_count(&status, MPI_BYTE, &recv_size); curr_cnt += recv_size; } } else { dst = relative_rank ^ mask; dst = (dst + root) % comm_size; mpi_errno = MPIC_Send(tmp_buf, curr_cnt, MPI_BYTE, dst, MPIR_GATHER_TAG, comm); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); return mpi_errno; } /* --END ERROR HANDLING-- */ break; } mask <<= 1; } if (rank == root) { /* reorder and copy from tmp_buf into recvbuf */ if (sendbuf != MPI_IN_PLACE) { position = 0; NMPI_Unpack(tmp_buf, tmp_buf_size, &position,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -