📄 parallel.h
字号:
// $Id: parallel.h 2979 2008-08-14 20:30:38Z roystgnr $// The libMesh Finite Element Library.// Copyright (C) 2002-2007 Benjamin S. Kirk, John W. Peterson // This library is free software; you can redistribute it and/or// modify it under the terms of the GNU Lesser General Public// License as published by the Free Software Foundation; either// version 2.1 of the License, or (at your option) any later version. // This library is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU// Lesser General Public License for more details. // You should have received a copy of the GNU Lesser General Public// License along with this library; if not, write to the Free Software// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA#ifndef __parallel_h__#define __parallel_h__// System includes#include <string>#include <vector>// Local includes#include "libmesh_common.h" // for Real#include "libmesh_logging.h"// Macro to identify and debug functions which should only be called in// parallel on every processor at once#undef parallel_only#ifndef NDEBUG #define parallel_only() do { libmesh_assert(Parallel::verify(std::string(__FILE__))); libmesh_assert(Parallel::verify(__LINE__)); } while (0)#else #define parallel_only()#endif/** * The Parallel namespace is for wrapper functions * for common general parallel synchronization tasks. * * For MPI 1.1 compatibility, temporary buffers are used * instead of MPI 2's MPI_IN_PLACE */namespace Parallel{#ifdef HAVE_MPI //------------------------------------------------------------------- /** * Data types for communication */ typedef MPI_Datatype data_type; /** * Templated function to return the appropriate MPI datatype * for use with built-in C types */ template <typename T> inline data_type datatype(); /** * Request object for non-blocking I/O */ typedef MPI_Request request; /** * Default message tag id */ const int any_tag=MPI_ANY_TAG; /** * Accept from any source */ const int any_source=MPI_ANY_SOURCE; #else // These shouldn't be needed typedef unsigned int data_type; typedef unsigned int request; const int any_tag=-1; const int any_source=0;#endif // HAVE_MPI //------------------------------------------------------------------- /** * Encapsulates the MPI_Status struct. Allows the source and size * of the message to be determined. */ class Status { public: Status () {} #ifndef HAVE_MPI int source () const { return 0; }#else Status (const MPI_Status &mpi_status, const MPI_Datatype &data_type) : _status(mpi_status), _datatype(data_type) {} int source () const { return _status.MPI_SOURCE; } unsigned int size () const { int msg_size; MPI_Get_count (const_cast<MPI_Status*>(&_status), _datatype, &msg_size); libmesh_assert (msg_size >= 0); return msg_size; } private: MPI_Status _status; MPI_Datatype _datatype;#endif }; //------------------------------------------------------------------- /** * Pause execution until all processors reach a certain point. */ inline void barrier () {#ifdef HAVE_MPI MPI_Barrier (libMesh::COMM_WORLD);#endif return; } //------------------------------------------------------------------- /** * Verify that a local variable has the same value on all processors */ template <typename T> inline bool verify(const T &r); //------------------------------------------------------------------- /** * Take a local variable and replace it with the minimum of it's values * on all processors */ template <typename T> inline void min(T &r); //------------------------------------------------------------------- /** * Take a vector of local variables and replace each entry with the minimum * of it's values on all processors */ template <typename T> inline void min(std::vector<T> &r); //------------------------------------------------------------------- /** * Take a local variable and replace it with the maximum of it's values * on all processors */ template <typename T> inline void max(T &r); //------------------------------------------------------------------- /** * Take a vector of local variables and replace each entry with the maximum * of it's values on all processors */ template <typename T> inline void max(std::vector<T> &r); //------------------------------------------------------------------- /** * Take a local variable and replace it with the sum of it's values * on all processors */ template <typename T> inline void sum(T &r); //------------------------------------------------------------------- /** * Take a vector of local variables and replace each entry with the sum of * it's values on all processors */ template <typename T> inline void sum(std::vector<T> &r); //------------------------------------------------------------------- /** * Blocking-send vector to one processor. */ template <typename T> inline void send (const unsigned int dest_processor_id, std::vector<T> &buf, const int tag=0); //------------------------------------------------------------------- /** * Nonblocking-send vector to one processor. */ template <typename T> inline void isend (const unsigned int dest_processor_id, std::vector<T> &buf, request &r, const int tag=0); //------------------------------------------------------------------- /** * Nonblocking-send vector to one processor with user-defined type. */ template <typename T> inline void isend (const unsigned int dest_processor_id, std::vector<T> &buf, data_type &type, request &r, const int tag=0); //------------------------------------------------------------------- /** * Blocking-receive vector from one processor. */ template <typename T> inline Status recv (const int src_processor_id, std::vector<T> &buf, const int tag=any_tag); //------------------------------------------------------------------- /** * Blocking-receive vector from one processor with user-defined type */ template <typename T> inline Status recv (const int src_processor_id, std::vector<T> &buf, data_type &type, const int tag=any_tag); //------------------------------------------------------------------- /** * Nonblocking-receive vector from one processor. */ template <typename T> inline void irecv (const int src_processor_id, std::vector<T> &buf, request &r, const int tag=any_tag); //------------------------------------------------------------------- /** * Wait for a non-blocking send or receive to finish */ inline void wait (request &r); //------------------------------------------------------------------- /** * Wait for a non-blocking send or receive to finish */ inline void wait (std::vector<request> &r); //------------------------------------------------------------------- /** * Send vector send to one processor while simultaneously receiving * another vector recv from a (potentially different) processor. */ template <typename T> inline void send_receive(const unsigned int dest_processor_id, T &send, const unsigned int source_processor_id, T &recv); //------------------------------------------------------------------- /** * Send vector send to one processor while simultaneously receiving * another vector recv from a (potentially different) processor using * a user-specified MPI Dataype. */ template <typename T> inline void send_receive(const unsigned int dest_processor_id, T &send, const unsigned int source_processor_id, T &recv, data_type &type); //------------------------------------------------------------------- /** * Take a vector of length n_processors, and on processor root_id fill in * recv[processor_id] = the value of send on processor processor_id */ template <typename T> inline void gather(const unsigned int root_id, T send, std::vector<T> &recv); //------------------------------------------------------------------- /** * Take a vector of local variables and expand it on processor root_id * to include values from all processors */ template <typename T> inline void gather(const unsigned int root_id, std::vector<T> &r); //------------------------------------------------------------------- /** * Take a vector of length n_processors, and fill in recv[processor_id] = the * value of send on that processor */ template <typename T> inline void allgather(T send, std::vector<T> &recv); //------------------------------------------------------------------- /** * Take a vector of local variables and expand it to include * values from all processors */ template <typename T> inline void allgather(std::vector<T> &r); //------------------------------------------------------------------- /** * Effectively transposes the input vector across all processors. * The jth entry on processor i is replaced with the ith entry * from processor j. */ template <typename T> inline void alltoall(std::vector<T> &r); //------------------------------------------------------------------- /** * Take a local value and broadcast it to all processors. * Optionally takes the \p root_id processor, which specifies * the processor intiating the broadcast. */ template <typename T> inline void broadcast(T &data, const unsigned int root_id=0); //------------------------------------------------------------------- /** * Take a local vector and broadcast it to all processors. * Optionally takes the \p root_id processor, which specifies * the processor intiating the broadcast. The user is responsible * for appropriately sizing the input buffer on all processors. */ template <typename T> inline void broadcast(std::vector<T> &data, const unsigned int root_id=0); // gcc appears to need an additional declaration to make sure it // uses the right definition below template <typename T> inline void broadcast(std::vector<std::complex<T> > &data, const unsigned int root_id=0); //----------------------------------------------------------------------- // Parallel members // Internal helper function to create vector<something_useable> from // vector<bool> for compatibility with MPI bitwise operations template <typename T> inline void pack_vector_bool(const std::vector<bool> &in, std::vector<T> &out) { unsigned int data_bits = 8*sizeof(T); unsigned int in_size = in.size(); unsigned int out_size = in_size/data_bits + (in_size%data_bits?1:0); out.clear(); out.resize(out_size); for (unsigned int i=0; i != in_size; ++i) { unsigned int index = i/data_bits; unsigned int offset = i%data_bits; out[index] += (in[i]?1:0) << offset; } } // Internal helper function to create vector<something_useable> from // vector<bool> for compatibility with MPI byte operations template <typename T> inline void unpack_vector_bool(const std::vector<T> &in, std::vector<bool> &out) { unsigned int data_bits = 8*sizeof(T); // We need the output vector to already be properly sized unsigned int out_size = out.size(); libmesh_assert(out_size/data_bits + (out_size%data_bits?1:0) == in.size()); for (unsigned int i=0; i != out_size; ++i) { unsigned int index = i/data_bits; unsigned int offset = i%data_bits; out[i] = in[index] << (data_bits-1-offset) >> (data_bits-1); } }#ifdef HAVE_MPI template<> inline MPI_Datatype datatype<char>() { return MPI_CHAR; } template<> inline MPI_Datatype datatype<unsigned char>() { return MPI_UNSIGNED_CHAR; } template<> inline MPI_Datatype datatype<short int>() { return MPI_SHORT; } template<> inline MPI_Datatype datatype<unsigned short int>() { return MPI_UNSIGNED_SHORT; } template<> inline MPI_Datatype datatype<int>() { return MPI_INT; } template<> inline MPI_Datatype datatype<unsigned int>() { return MPI_UNSIGNED; } template<> inline MPI_Datatype datatype<long>() { return MPI_LONG; } template<> inline MPI_Datatype datatype<unsigned long>() { return MPI_UNSIGNED_LONG; } template<> inline MPI_Datatype datatype<float>() { return MPI_FLOAT; } template<> inline MPI_Datatype datatype<double>() { return MPI_DOUBLE; } template<> inline MPI_Datatype datatype<long double>() { return MPI_LONG_DOUBLE; } template <typename T> inline bool verify(const T &r) { if (libMesh::n_processors() > 1) { T tempmin = r, tempmax = r; Parallel::min(tempmin); Parallel::max(tempmax); bool verified = (r == tempmin) && (r == tempmax); Parallel::min(verified); return verified; } return true; } template <> inline bool verify(const std::string & r) { if (libMesh::n_processors() > 1) { // Cannot use <char> since MPI_MIN is not // strictly defined for chars! std::vector<short int> temp; temp.reserve(r.size()); for (unsigned int i=0; i != r.size(); ++i) temp.push_back(r[i]); return Parallel::verify(temp); } return true;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -