📄 parallel.h
字号:
libmesh_assert (ierr == MPI_SUCCESS); STOP_LOG("irecv()", "Parallel"); } template <typename T> inline void irecv (const int src_processor_id, std::vector<std::complex<T> > &buf, request &r, const int tag) { START_LOG("irecv()", "Parallel"); const int ierr = MPI_Irecv (buf.empty() ? NULL : &buf[0], buf.size() * 2, datatype<T>(), src_processor_id, tag, libMesh::COMM_WORLD, &r); libmesh_assert (ierr == MPI_SUCCESS); STOP_LOG("irecv()", "Parallel"); } inline void wait (request &r) { START_LOG("wait()", "Parallel"); MPI_Wait (&r, MPI_STATUS_IGNORE); STOP_LOG("wait()", "Parallel"); } inline void wait (std::vector<request> &r) { START_LOG("wait()", "Parallel"); MPI_Waitall (r.size(), r.empty() ? NULL : &r[0], MPI_STATUSES_IGNORE); STOP_LOG("wait()", "Parallel"); } template <typename T> inline void send_receive(const unsigned int dest_processor_id, T &send, const unsigned int source_processor_id, T &recv) { START_LOG("send_receive()", "Parallel"); if (dest_processor_id == libMesh::processor_id() && source_processor_id == libMesh::processor_id()) { recv = send; STOP_LOG("send_receive()", "Parallel"); return; } MPI_Status status; MPI_Sendrecv(&send, 1, datatype<T>(), dest_processor_id, 0, &recv, 1, datatype<T>(), source_processor_id, 0, libMesh::COMM_WORLD, &status); STOP_LOG("send_receive()", "Parallel"); } template <typename T> inline void send_receive(const unsigned int dest_processor_id, std::complex<T> &send, const unsigned int source_processor_id, std::complex<T> &recv) { START_LOG("send_receive()", "Parallel"); if (dest_processor_id == libMesh::processor_id() && source_processor_id == libMesh::processor_id()) { recv = send; STOP_LOG("send_receive()", "Parallel"); return; } MPI_Status status; MPI_Sendrecv(&send, 2, datatype<T>(), dest_processor_id, 0, &recv, 2, datatype<T>(), source_processor_id, 0, libMesh::COMM_WORLD, &status); STOP_LOG("send_receive()", "Parallel"); } template <typename T> inline void send_receive(const unsigned int dest_processor_id, std::vector<T> &send, const unsigned int source_processor_id, std::vector<T> &recv) { START_LOG("send_receive()", "Parallel"); if (dest_processor_id == libMesh::processor_id() && source_processor_id == libMesh::processor_id()) { recv = send; STOP_LOG("send_receive()", "Parallel"); return; } // Trade buffer sizes first unsigned int sendsize = send.size(), recvsize; MPI_Status status; MPI_Sendrecv(&sendsize, 1, datatype<unsigned int>(), dest_processor_id, 0, &recvsize, 1, datatype<unsigned int>(), source_processor_id, 0, libMesh::COMM_WORLD, &status); recv.resize(recvsize); MPI_Sendrecv(sendsize ? &send[0] : NULL, sendsize, datatype<T>(), dest_processor_id, 0, recvsize ? &recv[0] : NULL, recvsize, datatype<T>(), source_processor_id, 0, libMesh::COMM_WORLD, &status); STOP_LOG("send_receive()", "Parallel"); } template <typename T> inline void send_receive(const unsigned int dest_processor_id, std::vector<std::complex<T> > &send, const unsigned int source_processor_id, std::vector<std::complex<T> > &recv) { START_LOG("send_receive()", "Parallel"); if (dest_processor_id == libMesh::processor_id() && source_processor_id == libMesh::processor_id()) { recv = send; STOP_LOG("send_receive()", "Parallel"); return; } // Trade buffer sizes first unsigned int sendsize = send.size(), recvsize; MPI_Status status; MPI_Sendrecv(&sendsize, 1, datatype<unsigned int>(), dest_processor_id, 0, &recvsize, 1, datatype<unsigned int>(), source_processor_id, 0, libMesh::COMM_WORLD, &status); recv.resize(recvsize); MPI_Sendrecv(sendsize ? &send[0] : NULL, sendsize * 2, datatype<T>(), dest_processor_id, 0, recvsize ? &recv[0] : NULL, recvsize * 2, datatype<T>(), source_processor_id, 0, libMesh::COMM_WORLD, &status); STOP_LOG("send_receive()", "Parallel"); } template <typename T> inline void send_receive(const unsigned int dest_processor_id, std::vector<T> &send, const unsigned int source_processor_id, std::vector<T> &recv, MPI_Datatype &type) { START_LOG("send_receive()", "Parallel"); if (dest_processor_id == libMesh::processor_id() && source_processor_id == libMesh::processor_id()) { recv = send; STOP_LOG("send_receive()", "Parallel"); return; } // Trade buffer sizes first unsigned int sendsize = send.size(), recvsize; MPI_Status status; MPI_Sendrecv(&sendsize, 1, datatype<unsigned int>(), dest_processor_id, 0, &recvsize, 1, datatype<unsigned int>(), source_processor_id, 0, libMesh::COMM_WORLD, &status); recv.resize(recvsize); MPI_Sendrecv(sendsize ? &send[0] : NULL, sendsize, type, dest_processor_id, 0, recvsize ? &recv[0] : NULL, recvsize, type, source_processor_id, 0, libMesh::COMM_WORLD, &status); STOP_LOG("send_receive()", "Parallel"); } template <typename T> inline void send_receive(const unsigned int dest_processor_id, std::vector<std::vector<T> > &send, const unsigned int source_processor_id, std::vector<std::vector<T> > &recv) { START_LOG("send_receive()", "Parallel"); if (dest_processor_id == libMesh::processor_id() && source_processor_id == libMesh::processor_id()) { recv = send; STOP_LOG("send_receive()", "Parallel"); return; } // Trade outer buffer sizes first unsigned int sendsize = send.size(), recvsize; MPI_Status status; MPI_Sendrecv(&sendsize, 1, datatype<unsigned int>(), dest_processor_id, 0, &recvsize, 1, datatype<unsigned int>(), source_processor_id, 0, libMesh::COMM_WORLD, &status); recv.resize(recvsize); // Trade inner buffer sizes next std::vector<unsigned int> sendsizes(sendsize), recvsizes(recvsize); unsigned int sendsizesum = 0, recvsizesum = 0; for (unsigned int i = 0; i != sendsize; ++i) { sendsizes[i] = send[i].size(); sendsizesum += sendsizes[i]; } MPI_Sendrecv(sendsize ? &sendsizes[0] : NULL, sendsize, datatype<unsigned int>(), dest_processor_id, 0, recvsize ? &recvsizes[0] : NULL, recvsize, datatype<unsigned int>(), source_processor_id, 0, libMesh::COMM_WORLD, &status); for (unsigned int i = 0; i != recvsize; ++i) { recvsizesum += recvsizes[i]; recv[i].resize(recvsizes[i]); } // Build temporary buffers third // We can't do multiple Sendrecv calls instead because send.size() may // differ on different processors std::vector<T> senddata(sendsizesum), recvdata(recvsizesum); // Fill the temporary send buffer typename std::vector<T>::iterator out = senddata.begin(); for (unsigned int i = 0; i != sendsize; ++i) { out = std::copy(send[i].begin(), send[i].end(), out); } libmesh_assert(out == senddata.end()); MPI_Sendrecv(sendsizesum ? &senddata[0] : NULL, sendsizesum, datatype<T>(), dest_processor_id, 0, recvsizesum ? &recvdata[0] : NULL, recvsizesum, datatype<T>(), source_processor_id, 0, libMesh::COMM_WORLD, &status); // Empty the temporary recv buffer typename std::vector<T>::iterator in = recvdata.begin(); for (unsigned int i = 0; i != recvsize; ++i) { std::copy(in, in + recvsizes[i], recv[i].begin()); in += recvsizes[i]; } libmesh_assert(in == recvdata.end()); STOP_LOG("send_receive()", "Parallel"); } template <typename T> inline void gather(const unsigned int root_id, T send, std::vector<T> &recv) { libmesh_assert(root_id < libMesh::n_processors()); if (libMesh::processor_id() == root_id) recv.resize(libMesh::n_processors()); if (libMesh::n_processors() > 1) { START_LOG("gather()", "Parallel"); MPI_Gather(&send, 1, datatype<T>(), recv.empty() ? NULL : &recv[0], 1, datatype<T>(), root_id, libMesh::COMM_WORLD); STOP_LOG("gather()", "Parallel"); } else recv[0] = send; } template <typename T> inline void gather(const unsigned int root_id, std::complex<T> send, std::vector<std::complex<T> > &recv) { libmesh_assert(root_id < libMesh::n_processors()); if (libMesh::processor_id() == root_id) recv.resize(libMesh::n_processors()); if (libMesh::n_processors() > 1) { START_LOG("gather()", "Parallel"); MPI_Gather(&send, 2, datatype<T>(), recv.empty() ? NULL : &recv[0], 2, datatype<T>(), root_id, libMesh::COMM_WORLD); STOP_LOG("gather()", "Parallel"); } else recv[0] = send; } /** * This function provides a convenient method * for combining vectors from each processor into one * contiguous chunk on one processor. This handles the * case where the lengths of the vectors may vary. * Specifically, this function transforms this: \verbatim Processor 0: [ ... N_0 ] Processor 1: [ ....... N_1 ] ... Processor M: [ .. N_M] \endverbatim * * into this: * \verbatim [ [ ... N_0 ] [ ....... N_1 ] ... [ .. N_M] ] \endverbatim * * on processor root_id. This function is collective and therefore * must be called by all processors. */ template <typename T> inline void gather(const unsigned int root_id, std::vector<T> &r) { if (libMesh::n_processors() == 1) { libmesh_assert (libMesh::processor_id()==root_id); return; } std::vector<int> sendlengths (libMesh::n_processors(), 0), displacements(libMesh::n_processors(), 0); const int mysize = r.size(); Parallel::allgather(mysize, sendlengths); START_LOG("gather()", "Parallel"); // Find the total size of the final array and // set up the displacement offsets for each processor. unsigned int globalsize = 0; for (unsigned int i=0; i != libMesh::n_processors(); ++i) { displacements[i] = globalsize; globalsize += sendlengths[i]; } // Check for quick return if (globalsize == 0) { STOP_LOG("gather()", "Parallel"); return; } // copy the input buffer std::vector<T> r_src(r); // now resize it to hold the global data // on the receiving processor if (root_id == libMesh::processor_id()) r.resize(globalsize); // and get the data from the remote processors#ifndef NDEBUG // Only catch the return value when asserts are active. const int ierr =#endif MPI_Gatherv (r_src.empty() ? NULL : &r_src[0], mysize, datatype<T>(), r.empty() ? NULL : &r[0], &sendlengths[0], &displacements[0], datatype<T>(), root_id, libMesh::COMM_WORLD); libmesh_assert (ierr == MPI_SUCCESS); STOP_LOG("gather()", "Parallel"); } template <typename T> inline void gather(const unsigned int root_id, std::vector<std::complex<T> > &r) { if (libMesh::n_processors() == 1) { libmesh_assert (libMesh::processor_id()==root_id); return; } std::vector<int> sendlengths (libMesh::n_processors(), 0), displacements(libMesh::n_processors(), 0); const int mysize = r.size() * 2; Parallel::allgather(mysize, sendlengths); START_LOG("gather()", "Parallel"); // Find the total size of the final array and // set up the displacement offsets for each processor. unsigned int globalsize = 0; for (unsigned int i=0; i != libMesh::n_processors(); ++i) { displacements[i] = globalsize;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -