📄 parallel.h

📁 一个用来实现偏微分方程中网格的计算库
💻 H
📖 第 1 页 / 共 4 页
字号:
    libmesh_assert (ierr == MPI_SUCCESS);        STOP_LOG("irecv()", "Parallel");  }   template <typename T>  inline void irecv (const int src_processor_id,		     std::vector<std::complex<T> > &buf,		     request &r,		     const int tag)  {    START_LOG("irecv()", "Parallel");        const int ierr =	        MPI_Irecv (buf.empty() ? NULL : &buf[0],		 buf.size() * 2,		 datatype<T>(),		 src_processor_id,		 tag,		 libMesh::COMM_WORLD,		 &r);        libmesh_assert (ierr == MPI_SUCCESS);        STOP_LOG("irecv()", "Parallel");  }    inline void wait (request &r)  {    START_LOG("wait()", "Parallel");        MPI_Wait (&r, MPI_STATUS_IGNORE);    STOP_LOG("wait()", "Parallel");  }    inline void wait (std::vector<request> &r)  {    START_LOG("wait()", "Parallel");        MPI_Waitall (r.size(), r.empty() ? NULL : &r[0], MPI_STATUSES_IGNORE);    STOP_LOG("wait()", "Parallel");  }      template <typename T>  inline void send_receive(const unsigned int dest_processor_id,			   T &send,			   const unsigned int source_processor_id,			   T &recv)  {    START_LOG("send_receive()", "Parallel");    if (dest_processor_id   == libMesh::processor_id() &&	source_processor_id == libMesh::processor_id())      {	recv = send;	STOP_LOG("send_receive()", "Parallel");	return;      }    MPI_Status status;    MPI_Sendrecv(&send, 1, datatype<T>(),		 dest_processor_id, 0,		 &recv, 1, datatype<T>(),		 source_processor_id, 0,		 libMesh::COMM_WORLD,		 &status);    STOP_LOG("send_receive()", "Parallel");  }  template <typename T>  inline void send_receive(const unsigned int dest_processor_id,			   std::complex<T> &send,			   const unsigned int source_processor_id,			   std::complex<T> &recv)  {    START_LOG("send_receive()", "Parallel");    if (dest_processor_id   == libMesh::processor_id() &&	source_processor_id == libMesh::processor_id())      {	recv = send;	STOP_LOG("send_receive()", "Parallel");	return;      }    MPI_Status status;    MPI_Sendrecv(&send, 2, datatype<T>(),		 dest_processor_id, 0,		 &recv, 2, datatype<T>(),		 source_processor_id, 0,		 libMesh::COMM_WORLD,		 &status);    STOP_LOG("send_receive()", "Parallel");  }  template <typename T>  inline void send_receive(const unsigned int dest_processor_id,			   std::vector<T> &send,			   const unsigned int source_processor_id,			   std::vector<T> &recv)  {    START_LOG("send_receive()", "Parallel");    if (dest_processor_id   == libMesh::processor_id() &&	source_processor_id == libMesh::processor_id())      {	recv = send;	STOP_LOG("send_receive()", "Parallel");	return;      }    // Trade buffer sizes first    unsigned int sendsize = send.size(), recvsize;    MPI_Status status;    MPI_Sendrecv(&sendsize, 1, datatype<unsigned int>(),		 dest_processor_id, 0,		 &recvsize, 1, datatype<unsigned int>(),		 source_processor_id, 0,		 libMesh::COMM_WORLD,		 &status);    recv.resize(recvsize);    MPI_Sendrecv(sendsize ? &send[0] : NULL, sendsize, datatype<T>(),		 dest_processor_id, 0,		 recvsize ? &recv[0] : NULL, recvsize, datatype<T>(),		 source_processor_id, 0,		 libMesh::COMM_WORLD,		 &status);        STOP_LOG("send_receive()", "Parallel");  }  template <typename T>  inline void send_receive(const unsigned int dest_processor_id,			   std::vector<std::complex<T> > &send,			   const unsigned int source_processor_id,			   std::vector<std::complex<T> > &recv)  {    START_LOG("send_receive()", "Parallel");    if (dest_processor_id   == libMesh::processor_id() &&	source_processor_id == libMesh::processor_id())      {	recv = send;	STOP_LOG("send_receive()", "Parallel");	return;      }    // Trade buffer sizes first    unsigned int sendsize = send.size(), recvsize;    MPI_Status status;    MPI_Sendrecv(&sendsize, 1, datatype<unsigned int>(),		 dest_processor_id, 0,		 &recvsize, 1, datatype<unsigned int>(),		 source_processor_id, 0,		 libMesh::COMM_WORLD,		 &status);    recv.resize(recvsize);    MPI_Sendrecv(sendsize ? &send[0] : NULL, sendsize * 2, datatype<T>(),		 dest_processor_id, 0,		 recvsize ? &recv[0] : NULL, recvsize * 2, datatype<T>(),		 source_processor_id, 0,		 libMesh::COMM_WORLD,		 &status);        STOP_LOG("send_receive()", "Parallel");  }  template <typename T>  inline void send_receive(const unsigned int dest_processor_id,			   std::vector<T> &send,			   const unsigned int source_processor_id,			   std::vector<T> &recv,			   MPI_Datatype &type)  {    START_LOG("send_receive()", "Parallel");    if (dest_processor_id   == libMesh::processor_id() &&	source_processor_id == libMesh::processor_id())      {	recv = send;	STOP_LOG("send_receive()", "Parallel");	return;      }    // Trade buffer sizes first    unsigned int sendsize = send.size(), recvsize;    MPI_Status status;    MPI_Sendrecv(&sendsize, 1, datatype<unsigned int>(),		 dest_processor_id, 0,		 &recvsize, 1, datatype<unsigned int>(),		 source_processor_id, 0,		 libMesh::COMM_WORLD,		 &status);    recv.resize(recvsize);    MPI_Sendrecv(sendsize ? &send[0] : NULL, sendsize, type,		 dest_processor_id, 0,		 recvsize ? &recv[0] : NULL, recvsize, type,		 source_processor_id, 0,		 libMesh::COMM_WORLD,		 &status);        STOP_LOG("send_receive()", "Parallel");  }  template <typename T>  inline void send_receive(const unsigned int dest_processor_id,			     std::vector<std::vector<T> > &send,			     const unsigned int source_processor_id,			     std::vector<std::vector<T> > &recv)  {    START_LOG("send_receive()", "Parallel");    if (dest_processor_id   == libMesh::processor_id() &&	source_processor_id == libMesh::processor_id())      {	recv = send;	STOP_LOG("send_receive()", "Parallel");	return;      }    // Trade outer buffer sizes first    unsigned int sendsize = send.size(), recvsize;    MPI_Status status;    MPI_Sendrecv(&sendsize, 1, datatype<unsigned int>(),		 dest_processor_id, 0,		 &recvsize, 1, datatype<unsigned int>(),		 source_processor_id, 0,		 libMesh::COMM_WORLD,		 &status);    recv.resize(recvsize);    // Trade inner buffer sizes next    std::vector<unsigned int> sendsizes(sendsize), recvsizes(recvsize);    unsigned int sendsizesum = 0, recvsizesum = 0;    for (unsigned int i = 0; i != sendsize; ++i)      {        sendsizes[i] = send[i].size();	sendsizesum += sendsizes[i];      }    MPI_Sendrecv(sendsize ? &sendsizes[0] : NULL, sendsize, 		 datatype<unsigned int>(), dest_processor_id, 0,		 recvsize ? &recvsizes[0] : NULL, recvsize, 		 datatype<unsigned int>(), source_processor_id, 0,		 libMesh::COMM_WORLD,		 &status);    for (unsigned int i = 0; i != recvsize; ++i)      {	recvsizesum += recvsizes[i];	recv[i].resize(recvsizes[i]);      }    // Build temporary buffers third    // We can't do multiple Sendrecv calls instead because send.size() may    // differ on different processors    std::vector<T> senddata(sendsizesum), recvdata(recvsizesum);    // Fill the temporary send buffer    typename std::vector<T>::iterator out = senddata.begin();    for (unsigned int i = 0; i != sendsize; ++i)      {	out = std::copy(send[i].begin(), send[i].end(), out);      }    libmesh_assert(out == senddata.end());        MPI_Sendrecv(sendsizesum ? &senddata[0] : NULL, sendsizesum,		 datatype<T>(), dest_processor_id, 0,		 recvsizesum ? &recvdata[0] : NULL, recvsizesum,		 datatype<T>(), source_processor_id, 0,		 libMesh::COMM_WORLD,		 &status);    // Empty the temporary recv buffer    typename std::vector<T>::iterator in = recvdata.begin();    for (unsigned int i = 0; i != recvsize; ++i)      {	std::copy(in, in + recvsizes[i], recv[i].begin());	in += recvsizes[i];      }    libmesh_assert(in == recvdata.end());    STOP_LOG("send_receive()", "Parallel");  }  template <typename T>  inline void gather(const unsigned int root_id,		     T send,		     std::vector<T> &recv)  {    libmesh_assert(root_id < libMesh::n_processors());    if (libMesh::processor_id() == root_id)      recv.resize(libMesh::n_processors());        if (libMesh::n_processors() > 1)      {        START_LOG("gather()", "Parallel");	MPI_Gather(&send,		   1,		   datatype<T>(),		   recv.empty() ? NULL : &recv[0],		   1,		   datatype<T>(),		   root_id,		   libMesh::COMM_WORLD);        STOP_LOG("gather()", "Parallel");      }    else      recv[0] = send;  }  template <typename T>  inline void gather(const unsigned int root_id,		     std::complex<T> send,		     std::vector<std::complex<T> > &recv)  {    libmesh_assert(root_id < libMesh::n_processors());    if (libMesh::processor_id() == root_id)      recv.resize(libMesh::n_processors());        if (libMesh::n_processors() > 1)      {        START_LOG("gather()", "Parallel");	MPI_Gather(&send,		   2,		   datatype<T>(),		   recv.empty() ? NULL : &recv[0],		   2,		   datatype<T>(),		   root_id,		   libMesh::COMM_WORLD);        STOP_LOG("gather()", "Parallel");      }    else      recv[0] = send;  }  /**   * This function provides a convenient method   * for combining vectors from each processor into one   * contiguous chunk on one processor.  This handles the    * case where the lengths of the vectors may vary.     * Specifically, this function transforms this:   \verbatim    Processor 0: [ ... N_0 ]    Processor 1: [ ....... N_1 ]      ...    Processor M: [ .. N_M]   \endverbatim   *   * into this:   *   \verbatim   [ [ ... N_0 ] [ ....... N_1 ] ... [ .. N_M] ]   \endverbatim   *   * on processor root_id. This function is collective and therefore   * must be called by all processors.   */  template <typename T>  inline void gather(const unsigned int root_id,		     std::vector<T> &r)  {    if (libMesh::n_processors() == 1)      {	libmesh_assert (libMesh::processor_id()==root_id);	return;      }        std::vector<int>      sendlengths  (libMesh::n_processors(), 0),      displacements(libMesh::n_processors(), 0);    const int mysize = r.size();    Parallel::allgather(mysize, sendlengths);        START_LOG("gather()", "Parallel");    // Find the total size of the final array and    // set up the displacement offsets for each processor.    unsigned int globalsize = 0;     for (unsigned int i=0; i != libMesh::n_processors(); ++i)      {	displacements[i] = globalsize;	globalsize += sendlengths[i];      }    // Check for quick return    if (globalsize == 0)      {	STOP_LOG("gather()", "Parallel");	return;      }    // copy the input buffer    std::vector<T> r_src(r);    // now resize it to hold the global data    // on the receiving processor    if (root_id == libMesh::processor_id())      r.resize(globalsize);    // and get the data from the remote processors#ifndef NDEBUG    // Only catch the return value when asserts are active.    const int ierr =#endif      MPI_Gatherv (r_src.empty() ? NULL : &r_src[0], mysize, datatype<T>(),		   r.empty() ? NULL :  &r[0], &sendlengths[0],		   &displacements[0], datatype<T>(),		   root_id,		   libMesh::COMM_WORLD);    libmesh_assert (ierr == MPI_SUCCESS);    STOP_LOG("gather()", "Parallel");  }  template <typename T>  inline void gather(const unsigned int root_id,		     std::vector<std::complex<T> > &r)  {    if (libMesh::n_processors() == 1)      {	libmesh_assert (libMesh::processor_id()==root_id);	return;      }        std::vector<int>      sendlengths  (libMesh::n_processors(), 0),      displacements(libMesh::n_processors(), 0);    const int mysize = r.size() * 2;    Parallel::allgather(mysize, sendlengths);        START_LOG("gather()", "Parallel");    // Find the total size of the final array and    // set up the displacement offsets for each processor.    unsigned int globalsize = 0;     for (unsigned int i=0; i != libMesh::n_processors(); ++i)      {	displacements[i] = globalsize;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -