📄 mesh_communication.c
字号:
// $Id: mesh_communication.C 2914 2008-07-08 19:04:53Z jwpeterson $// The libMesh Finite Element Library.// Copyright (C) 2002-2007 Benjamin S. Kirk, John W. Peterson // This library is free software; you can redistribute it and/or// modify it under the terms of the GNU Lesser General Public// License as published by the Free Software Foundation; either// version 2.1 of the License, or (at your option) any later version. // This library is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU// Lesser General Public License for more details. // You should have received a copy of the GNU Lesser General Public// License along with this library; if not, write to the Free Software// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA// C++ Includes -----------------------------------// Local Includes -----------------------------------#include "boundary_info.h"#include "elem.h"#include "libmesh_config.h"#include "libmesh_common.h"#include "libmesh_logging.h"#include "location_maps.h"#include "mesh_base.h"#include "mesh_communication.h"#include "mesh_tools.h"#include "parallel.h"#include "parallel_mesh.h"#include "parallel_ghost_sync.h"//-----------------------------------------------// anonymous namespace for implementation detailsnamespace { /** * Specific weak ordering for Elem*'s to be used in a set. * We use the id, but first sort by level. This guarantees * when traversing the set from beginning to end the lower * level (parent) elements are encountered first. Additionally, * order siblings so that child(0) is added before child(1). */ struct CompareElemIdsByLevel { bool operator()(const Elem *a, const Elem *b) const { libmesh_assert (a); libmesh_assert (b); if (a->level() == b->level()) {// // order siblings sequentially// if (a->parent() && b->parent()) // a & b have parents// if (a->parent() == b->parent()) // which are the same// return (a->parent()->which_child_am_i(a) <// b->parent()->which_child_am_i(b)); // otherwise order by id return a->id() < b->id(); } // otherwise order by level return a->level() < b->level(); } }; }// ------------------------------------------------------------// MeshCommunication class membersvoid MeshCommunication::clear (){ // _neighboring_processors.clear();}// #ifdef HAVE_MPI// void MeshCommunication::find_neighboring_processors (const MeshBase& mesh)// {// // Don't need to do anything if there is// // only one processor.// if (libMesh::n_processors() == 1)// return; // _neighboring_processors.clear();// // Get the bounding sphere for the local processor// Sphere bounding_sphere =// MeshTools::processor_bounding_sphere (mesh, libMesh::processor_id());// // Just to be sure, increase its radius by 10%. Sure would suck to// // miss a neighboring processor!// bounding_sphere.radius() *= 1.1;// // Collect the bounding spheres from all processors, test for intersection// {// std::vector<float>// send (4, 0),// recv (4*libMesh::n_processors(), 0);// send[0] = bounding_sphere.center()(0);// send[1] = bounding_sphere.center()(1);// send[2] = bounding_sphere.center()(2);// send[3] = bounding_sphere.radius();// MPI_Allgather (&send[0], send.size(), MPI_FLOAT,// &recv[0], send.size(), MPI_FLOAT,// libMesh::COMM_WORLD);// for (unsigned int proc=0; proc<libMesh::n_processors(); proc++)// {// const Point center (recv[4*proc+0],// recv[4*proc+1],// recv[4*proc+2]); // const Real radius = recv[4*proc+3];// const Sphere proc_sphere (center, radius);// if (bounding_sphere.intersects(proc_sphere))// _neighboring_processors.push_back(proc);// }// // Print out the _neighboring_processors list// std::cout << "Processor " << libMesh::processor_id()// << " intersects:" << std::endl;// for (unsigned int p=0; p<_neighboring_processors.size(); p++)// std::cout << " " << _neighboring_processors[p] << std::endl;// }// }// #else// void MeshCommunication::find_neighboring_processors (const MeshBase&)// {// }// #endif#ifndef HAVE_MPI // avoid spurious gcc warningsvoid MeshCommunication::redistribute (ParallelMesh &) const{ // no MPI, no redistribution return;}#elsevoid MeshCommunication::redistribute (ParallelMesh &mesh) const{ // This method will be called after a new partitioning has been // assigned to the elements. This partitioning was defined in // terms of the active elements, and "trickled down" to the // parents and nodes as to be consistent. // // The point is that the entire concept of local elements is // kinda shaky in this method. Elements which were previously // local may now be assigned to other processors, so we need to // send those off. Similarly, we need to accept elements from // other processors. // // The approach is as follows: // (1) send all elements we have stored to their proper homes // (2) receive elements from all processors, watching for duplicates // (3) deleting all nonlocal elements elements // (4) obtaining required ghost elements from neighboring processors parallel_only(); libmesh_assert (!mesh.is_serial()); libmesh_assert (MeshTools::n_elem(mesh.unpartitioned_elements_begin(), mesh.unpartitioned_elements_end()) == 0); START_LOG("redistribute()","MeshCommunication"); // register a derived datatype to use in shipping nodes MPI_Datatype packed_node_datatype = Node::PackedNode::create_mpi_datatype(); MPI_Type_commit (&packed_node_datatype); // Figure out how many nodes and elements we have which are assigned to each // processor. send_n_nodes_and_elem_per_proc contains the number of nodes/elements // we will be sending to each processor, recv_n_nodes_and_elem_per_proc contains // the number of nodes/elements we will be receiving from each processor. // Format: // send_n_nodes_and_elem_per_proc[5*pid+0] = number of nodes to send to pid // send_n_nodes_and_elem_per_proc[5*pid+1] = number of elements to send to pid // send_n_nodes_and_elem_per_proc[5*pid+2] = connectivity buffer size // send_n_nodes_and_elem_per_proc[5*pid+3] = node bc buffer size // send_n_nodes_and_elem_per_proc[5*pid+4] = element bc buffer size std::vector<unsigned int> send_n_nodes_and_elem_per_proc(5*libMesh::n_processors(), 0); std::vector<std::vector<Node::PackedNode> > nodes_sent(libMesh::n_processors()); std::vector<std::vector<int> > elements_sent(libMesh::n_processors()), node_bcs_sent(libMesh::n_processors()), element_bcs_sent(libMesh::n_processors()); std::vector<Parallel::request> node_send_requests, element_send_requests, node_bc_requests, element_bc_requests; for (unsigned int pid=0; pid<libMesh::n_processors(); pid++) if (pid != libMesh::processor_id()) // don't send to ourselves!! { // Build up a list of nodes and elements to send to processor pid. // We will certainly send all the active elements assigned to this processor, // but we will also ship off the other elements in the same family tree // as the active ones for data structure consistency. We also // ship any nodes connected to these elements. Note some of these nodes // and elements may be replicated from other processors, but that is OK. std::set<const Elem*, CompareElemIdsByLevel> elements_to_send; std::set<const Node*> connected_nodes; { std::vector<const Elem*> family_tree; MeshBase::const_element_iterator elem_it = mesh.active_pid_elements_begin(pid); const MeshBase::const_element_iterator elem_end = mesh.active_pid_elements_end(pid); for (; elem_it!=elem_end; ++elem_it) { const Elem *top_parent = (*elem_it)->top_parent(); // avoid a lot of duplication -- if we already have top_parent // in the set its entire family tree is already in the set. if (!elements_to_send.count(top_parent)) {#ifdef ENABLE_AMR top_parent->family_tree(family_tree);#else family_tree.clear(); family_tree.push_back(top_parent);#endif for (unsigned int e=0; e<family_tree.size(); e++) { const Elem *elem = family_tree[e]; elements_to_send.insert (elem); for (unsigned int n=0; n<elem->n_nodes(); n++) connected_nodes.insert (elem->get_node(n)); } } // then do the same for the face neighbors for (unsigned int s=0; s<(*elem_it)->n_sides(); s++) if ((*elem_it)->neighbor(s) != NULL) if (!(*elem_it)->neighbor(s)->is_remote()) { top_parent = (*elem_it)->neighbor(s)->top_parent(); if (!elements_to_send.count(top_parent)) {#ifdef ENABLE_AMR top_parent->family_tree(family_tree);#else family_tree.clear(); family_tree.push_back(top_parent);#endif for (unsigned int e=0; e<family_tree.size(); e++) { const Elem *elem = family_tree[e]; elements_to_send.insert (elem); for (unsigned int n=0; n<elem->n_nodes(); n++) connected_nodes.insert (elem->get_node(n)); } } } } } // The elements_to_send set now contains all the elements stored on the local // processor but owned by processor pid. Additionally, the face neighbors // for these elements are also transferred. Finally, the entire refinement // tree is also included. This is a very simplistic way of ensuring data // structure consistency at the cost of larger communication buffers. It is // worth profiling this on several parallel architectures to assess its impact. if (!connected_nodes.empty()) { // the number of nodes we will ship to pid send_n_nodes_and_elem_per_proc[5*pid+0] = connected_nodes.size(); nodes_sent[pid].reserve(connected_nodes.size()); for (std::set<const Node*>::const_iterator node_it = connected_nodes.begin(); node_it != connected_nodes.end(); ++node_it) { nodes_sent[pid].push_back(Node::PackedNode(**node_it)); // add the node if it has BCs if (mesh.boundary_info->boundary_id(*node_it) != mesh.boundary_info->invalid_id) { node_bcs_sent[pid].push_back((*node_it)->id()); node_bcs_sent[pid].push_back(mesh.boundary_info->boundary_id(*node_it)); } } // the size of the node bc buffer we will ship to pid send_n_nodes_and_elem_per_proc[5*pid+3] = node_bcs_sent[pid].size(); // send the nodes off to the destination processor node_send_requests.push_back(Parallel::request()); Parallel::isend (pid, nodes_sent[pid], packed_node_datatype, node_send_requests.back(), /* tag = */ 0); if (!node_bcs_sent[pid].empty()) { node_bc_requests.push_back(Parallel::request()); Parallel::isend (pid, node_bcs_sent[pid], node_bc_requests.back(), /* tag = */ 2); } } if (!elements_to_send.empty()) { // the number of elements we will send to this processor send_n_nodes_and_elem_per_proc[5*pid+1] = elements_to_send.size(); for (std::set<const Elem*, CompareElemIdsByLevel>::const_iterator elem_it = elements_to_send.begin(); elem_it != elements_to_send.end(); ++elem_it) { Elem::PackedElem::pack (elements_sent[pid], *elem_it); // if this is a level-0 element look for boundary conditions if ((*elem_it)->level() == 0) for (unsigned int s=0; s<(*elem_it)->n_sides(); s++) if ((*elem_it)->neighbor(s) == NULL) if (mesh.boundary_info->boundary_id (*elem_it, s) != mesh.boundary_info->invalid_id) { element_bcs_sent[pid].push_back ((*elem_it)->id()); element_bcs_sent[pid].push_back (s); element_bcs_sent[pid].push_back (mesh.boundary_info->boundary_id (*elem_it, s)); } } // the packed connectivity size to send to this processor send_n_nodes_and_elem_per_proc[5*pid+2] = elements_sent[pid].size(); // send the elements off to the destination processor element_send_requests.push_back(Parallel::request()); Parallel::isend (pid, elements_sent[pid], element_send_requests.back(), /* tag = */ 1); // the size of the element bc buffer we will ship to pid send_n_nodes_and_elem_per_proc[5*pid+4] = element_bcs_sent[pid].size(); if (!element_bcs_sent[pid].empty()) { element_bc_requests.push_back(Parallel::request()); Parallel::isend (pid, element_bcs_sent[pid], element_bc_requests.back(), /* tag = */ 3); } } } std::vector<unsigned int> recv_n_nodes_and_elem_per_proc(send_n_nodes_and_elem_per_proc); Parallel::alltoall (recv_n_nodes_and_elem_per_proc); // In general we will only need to communicate with a subset of the other processors. // I can't immediately think of a case where we will send elements but not nodes, but // these are only bools and we have the information anyway... std::vector<bool> send_node_pair(libMesh::n_processors(),false), send_elem_pair(libMesh::n_processors(),false), recv_node_pair(libMesh::n_processors(),false), recv_elem_pair(libMesh::n_processors(),false); unsigned int n_send_node_pairs=0, n_send_elem_pairs=0, n_send_node_bc_pairs=0, n_send_elem_bc_pairs=0, n_recv_node_pairs=0, n_recv_elem_pairs=0, n_recv_node_bc_pairs=0, n_recv_elem_bc_pairs=0, max_n_nodes_received=0, max_conn_size_received=0, max_node_bcs_received=0, max_elem_bcs_received=0; for (unsigned int pid=0; pid<libMesh::n_processors(); pid++) { if (send_n_nodes_and_elem_per_proc[5*pid+0]) // we have nodes to send { send_node_pair[pid] = true; n_send_node_pairs++; if (send_n_nodes_and_elem_per_proc[5*pid+3]) // and node bcs n_send_node_bc_pairs++; } if (send_n_nodes_and_elem_per_proc[5*pid+1]) // we have elements to send { send_elem_pair[pid] = true; n_send_elem_pairs++; if (send_n_nodes_and_elem_per_proc[5*pid+4]) // and element bcs n_send_elem_bc_pairs++; } if (recv_n_nodes_and_elem_per_proc[5*pid+0]) // we have nodes to receive { recv_node_pair[pid] = true; n_recv_node_pairs++; max_n_nodes_received = std::max(max_n_nodes_received, recv_n_nodes_and_elem_per_proc[5*pid+0]); if (recv_n_nodes_and_elem_per_proc[5*pid+3]) // and node bcs { n_recv_node_bc_pairs++; max_node_bcs_received = std::max(max_node_bcs_received, recv_n_nodes_and_elem_per_proc[5*pid+3]); } } if (recv_n_nodes_and_elem_per_proc[5*pid+1]) // we have elements to receive { recv_elem_pair[pid] = true; n_recv_elem_pairs++; max_conn_size_received = std::max(max_conn_size_received, recv_n_nodes_and_elem_per_proc[5*pid+2]); if (recv_n_nodes_and_elem_per_proc[5*pid+4]) // and element bcs { n_recv_elem_bc_pairs++; max_elem_bcs_received = std::max(max_elem_bcs_received, recv_n_nodes_and_elem_per_proc[5*pid+4]); } } } libmesh_assert (n_send_node_pairs == node_send_requests.size()); libmesh_assert (n_send_elem_pairs == element_send_requests.size()); libmesh_assert (n_send_node_bc_pairs == node_bc_requests.size()); libmesh_assert (n_send_elem_bc_pairs == element_bc_requests.size()); // Receive nodes. Size this array for the largest message. { std::vector<Node::PackedNode> received_nodes(max_n_nodes_received); // We now know how many processors will be sending us nodes. for (unsigned int node_comm_step=0; node_comm_step<n_recv_node_pairs; node_comm_step++) { // but we don't necessarily want to impose an ordering, so // just grab whatever message is available next. Parallel::Status status = Parallel::recv (Parallel::any_source, received_nodes, packed_node_datatype, /* tag = */ 0); const unsigned int source_pid = status.source(); const unsigned int n_nodes_received =
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -