📄 intercomm_create.c
字号:
* operations. */ if (comm_ptr->rank == local_leader) { MPID_Comm_get_ptr( peer_comm, peer_comm_ptr );# ifdef HAVE_ERROR_CHECKING { MPID_BEGIN_ERROR_CHECKS; { MPID_Comm_valid_ptr( peer_comm_ptr, mpi_errno ); /* Note: In MPI 1.0, peer_comm was restricted to intracommunicators. In 1.1, it may be any communicator */ /* In checking the rank of the remote leader, allow the peer_comm to be in intercommunicator by checking against the remote size */ if (!mpi_errno && peer_comm_ptr && (remote_leader < 0 || remote_leader >= peer_comm_ptr->remote_size)) { MPIU_ERR_SET2(mpi_errno,MPI_ERR_RANK, "**rankremote", "**rankremote %d %d", local_leader, comm_ptr->local_size ); } /* Check that the local leader and the remote leader are different processes. This test requires looking at the lpid for the two ranks in their respective communicators */ /* FIXME: Why is this test commented out ? *//* if (local_leader == remote_leader) { mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_RANK, "**ranksdistinct", 0 ); }*/ if (mpi_errno) goto fn_fail; } MPID_END_ERROR_CHECKS; }# endif /* HAVE_ERROR_CHECKING */ MPIR_Nest_incr(); /* First, exchange the group information. If we were certain that the groups were disjoint, we could exchange possible context ids at the same time, saving one communication. But experience has shown that that is a risky assumption. */ /* Exchange information with my peer. Use sendrecv */ local_size = comm_ptr->local_size; MPIU_DBG_PRINTF(("rank %d sendrecv to rank %d\n", peer_comm_ptr->rank, remote_leader)); mpi_errno = NMPI_Sendrecv( &local_size, 1, MPI_INT, remote_leader, tag, &remote_size, 1, MPI_INT, remote_leader, tag, peer_comm, MPI_STATUS_IGNORE ); MPIU_DBG_PRINTF(( "local size = %d, remote size = %d\n", local_size, remote_size )); /* With this information, we can now send and receive the global process ids from the peer. */ MPIU_CHKLMEM_MALLOC(remote_gpids,int*,2*remote_size*sizeof(int), mpi_errno,"remote_gpids"); MPIU_CHKLMEM_MALLOC(remote_lpids,int*,remote_size*sizeof(int), mpi_errno,"remote_lpids"); MPIU_CHKLMEM_MALLOC(local_gpids,int*,2*local_size*sizeof(int), mpi_errno,"local_gpids"); MPIU_CHKLMEM_MALLOC(local_lpids,int*,local_size*sizeof(int), mpi_errno,"local_lpids"); MPID_GPID_GetAllInComm( comm_ptr, local_size, local_gpids, &singlePG ); /* Exchange the lpid arrays */ NMPI_Sendrecv( local_gpids, 2*local_size, MPI_INT, remote_leader, tag, remote_gpids, 2*remote_size, MPI_INT, remote_leader, tag, peer_comm, MPI_STATUS_IGNORE ); /* Convert the remote gpids to the lpids */ mpi_errno = MPID_GPID_ToLpidArray( remote_size, remote_gpids, remote_lpids ); if (mpi_errno) { MPIR_Nest_decr(); goto fn_fail; } /* Get our own lpids */ MPID_LPID_GetAllInComm( comm_ptr, local_size, local_lpids ); # ifdef HAVE_ERROR_CHECKING { MPID_BEGIN_ERROR_CHECKS; { /* Now that we have both the local and remote processes, check for any overlap */ mpi_errno = MPIR_CheckDisjointLpids( local_lpids, local_size, remote_lpids, remote_size ); if (mpi_errno) { MPIR_Nest_decr(); goto fn_fail; } } MPID_END_ERROR_CHECKS; }# endif /* HAVE_ERROR_CHECKING */ /* Make an arbitrary decision about which group of processs is the low group. The LEADERS do this by comparing the local process ids of the 0th member of the two groups */ is_low_group = local_lpids[0] < remote_lpids[0]; /* At this point, we're done with the local lpids; they'll be freed with the other local memory on exit */ MPIR_Nest_decr(); } /* End of the first phase of the leader communication */ /* * Create the contexts. Each group will have a context for sending * to the other group. All processes must be involved. Because * we know that the local and remote groups are disjoint, this * step will complete */ MPIU_DBG_PRINTF(( "About to get contextid (commsize=%d) on %d\n", comm_ptr->local_size, comm_ptr->rank )); context_id = MPIR_Get_contextid( comm_ptr ); if (context_id == 0) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**toomanycomm"); } MPIU_DBG_PRINTF(( "Got contextid\n" )); /* Increment the nest count for everyone because all processes will be communicating now */ MPIR_Nest_incr(); /* Leaders can now swap context ids and then broadcast the value to the local group of processes */ if (comm_ptr->rank == local_leader) { int remote_context_id; NMPI_Sendrecv( &context_id, 1, MPI_INT, remote_leader, tag, &remote_context_id, 1, MPI_INT, remote_leader, tag, peer_comm, MPI_STATUS_IGNORE ); /* We need to do something with the context ids. For MPI1, we can just take the min of the two context ids and use that value. For MPI2, we'll need to have separate send and receive context ids - FIXME */ if (remote_context_id < context_id) final_context_id = remote_context_id; else final_context_id = context_id; /* Now, send all of our local processes the remote_lpids, along with the final context id */ comm_info[0] = remote_size; comm_info[1] = final_context_id; comm_info[2] = is_low_group; MPIU_DBG_PRINTF(("About to bcast on local_comm\n")); NMPI_Bcast( comm_info, 3, MPI_INT, local_leader, local_comm ); NMPI_Bcast( remote_gpids, 2*remote_size, MPI_INT, local_leader, local_comm ); MPIU_DBG_PRINTF(( "end of bcast on local_comm of size %d\n", comm_ptr->local_size )); /* FIXME: Here we should: ensure that all processes have all of the process groups (allreduce to check, unless only group is group of comm world) if not all processes have all process groups, call routine to bcast process group information to the other proceses. Do this before the GPID_ToLpidArray call, since that call will rely on having that information */ MPID_PG_ForwardPGInfo( peer_comm_ptr, comm_ptr, remote_size, remote_gpids, local_leader ); } else { /* we're the other processes */ MPIU_DBG_PRINTF(("About to receive bcast on local_comm\n")); NMPI_Bcast( comm_info, 3, MPI_INT, local_leader, local_comm ); remote_size = comm_info[0]; MPIU_CHKLMEM_MALLOC(remote_gpids,int*,2*remote_size*sizeof(int), mpi_errno,"remote_gpids"); MPIU_CHKLMEM_MALLOC(remote_lpids,int*,remote_size*sizeof(int), mpi_errno,"remote_lpids"); NMPI_Bcast( remote_gpids, 2*remote_size, MPI_INT, local_leader, local_comm ); /* Check on the remote process groups */ MPID_PG_ForwardPGInfo( peer_comm_ptr, comm_ptr, remote_size, remote_gpids, local_leader ); /* Convert the remote gpids to the lpids */ mpi_errno = MPID_GPID_ToLpidArray( remote_size, remote_gpids, remote_lpids ); if (mpi_errno) { MPIR_Nest_decr(); goto fn_fail; } /* Extract the context and group sign informatin */ final_context_id = comm_info[1]; is_low_group = comm_info[2]; } /* If we did not choose this context, free it. We won't do this once we have MPI2 intercomms (at least, not for intercomms that are not subsets of MPI_COMM_WORLD) - FIXME */ if (final_context_id != context_id) { MPIR_Free_contextid( context_id ); } /* Finish up by giving the device the opportunity to update any other infomration among these processes. Note that the new intercomm has not been set up; in fact, we haven't yet attempted to set up the connection tables. */#ifdef MPID_ICCREATE_REMOTECOMM_HOOK MPID_ICCREATE_REMOTECOMM_HOOK( comm_ptr, peer_comm_ptr, local_leader, remote_leader );#endif /* At last, we now have the information that we need to build the intercommunicator */ /* Decrement the nesting pointer because we're done making MPI calls */ MPIR_Nest_decr(); /* All processes in the local_comm now build the communicator */ newcomm_ptr = (MPID_Comm *)MPIU_Handle_obj_alloc( &MPID_Comm_mem ); if (!newcomm_ptr) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem"); } MPIU_Object_set_ref( newcomm_ptr, 1 ); newcomm_ptr->attributes = 0; newcomm_ptr->context_id = final_context_id; newcomm_ptr->remote_size = remote_size; newcomm_ptr->local_size = comm_ptr->local_size; newcomm_ptr->rank = comm_ptr->rank; newcomm_ptr->local_group = 0; newcomm_ptr->remote_group = 0; newcomm_ptr->comm_kind = MPID_INTERCOMM; newcomm_ptr->local_comm = 0; newcomm_ptr->is_low_group = is_low_group; mpi_errno = MPID_VCR_CommFromLpids( newcomm_ptr, remote_size, remote_lpids ); if (mpi_errno) goto fn_fail; /* Setup the communicator's vc table: local group. This is just a duplicate of the local_comm's group */ MPID_VCRT_Create( comm_ptr->local_size, &newcomm_ptr->local_vcrt ); MPID_VCRT_Get_ptr( newcomm_ptr->local_vcrt, &newcomm_ptr->local_vcr ); for (i=0; i<comm_ptr->local_size; i++) { MPID_VCR_Dup( comm_ptr->vcr[i], &newcomm_ptr->local_vcr[i] ); } /* Inherit the error handler (if any) */ newcomm_ptr->errhandler = comm_ptr->errhandler; if (comm_ptr->errhandler) { MPIU_Object_add_ref( comm_ptr->errhandler ); } /* Notify the device of this new communicator */ MPID_Dev_comm_create_hook( newcomm_ptr ); *newintercomm = newcomm_ptr->handle; /* ... end of body of routine ... */ fn_exit: MPIU_CHKLMEM_FREEALL(); MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INTERCOMM_CREATE); MPID_CS_EXIT(); return mpi_errno; fn_fail: /* --BEGIN ERROR HANDLING-- */# ifdef HAVE_ERROR_CHECKING { mpi_errno = MPIR_Err_create_code( mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**mpi_intercomm_create", "**mpi_intercomm_create %C %d %C %d %d %p", local_comm, local_leader, peer_comm, remote_leader, tag, newintercomm); }# endif mpi_errno = MPIR_Err_return_comm( comm_ptr, FCNAME, mpi_errno ); goto fn_exit; /* --END ERROR HANDLING-- */}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -