📄 commutil.c
字号:
/* -*- Mode: C; c-basic-offset:4 ; -*- *//* $Id: commutil.c,v 1.80 2007/05/04 17:16:15 gropp Exp $ * * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */#include "mpiimpl.h"#include "mpicomm.h"/* This is the utility file for comm that contains the basic comm items and storage management */#ifndef MPID_COMM_PREALLOC #define MPID_COMM_PREALLOC 8#endif/* Preallocated comm objects */MPID_Comm MPID_Comm_builtin[MPID_COMM_N_BUILTIN] = { {0} };MPID_Comm MPID_Comm_direct[MPID_COMM_PREALLOC] = { {0} };MPIU_Object_alloc_t MPID_Comm_mem = { 0, 0, 0, 0, MPID_COMM, sizeof(MPID_Comm), MPID_Comm_direct, MPID_COMM_PREALLOC};/* FIXME : Reusing context ids can lead to a race condition if (as is desirable) MPI_Comm_free does not include a barrier. Consider the following: Process A frees the communicator. Process A creates a new communicator, reusing the just released id Process B sends a message to A on the old communicator. Process A receives the message, and believes that it belongs to the new communicator. Process B then cancels the message, and frees the communicator. The likelyhood of this happening can be reduced by introducing a gap between when a context id is released and when it is reused. An alternative is to use an explicit message (in the implementation of MPI_Comm_free) to indicate that a communicator is being freed; this will often require less communication than a barrier in MPI_Comm_free, and will ensure that no messages are later sent to the same communicator (we may also want to have a similar check when building fault-tolerant versions of MPI). *//* Create a new communicator with a context. Do *not* initialize the other fields except for the reference count. See MPIR_Comm_copy for a function to produce a copy of part of a communicator *//* Create a communicator structure and perform basic initialization (mostly clearing fields and updating the reference count). */#undef FUNCNAME#define FUNCNAME MPIR_Comm_create#undef FCNAME#define FCNAME "MPIR_Comm_create"int MPIR_Comm_create( MPID_Comm **newcomm_ptr ){ int mpi_errno = MPI_SUCCESS; MPID_Comm *newptr; MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE); MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_CREATE); newptr = (MPID_Comm *)MPIU_Handle_obj_alloc( &MPID_Comm_mem ); /* --BEGIN ERROR HANDLING-- */ if (!newptr) { mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 ); goto fn_fail; } /* --END ERROR HANDLING-- */ *newcomm_ptr = newptr; MPIU_Object_set_ref( newptr, 1 ); /* Clear many items (empty means to use the default; some of these may be overridden within the communicator initialization) */ newptr->errhandler = 0; newptr->attributes = 0; newptr->remote_group = 0; newptr->local_group = 0; newptr->coll_fns = 0; newptr->topo_fns = 0; newptr->name[0] = 0; /* Fields not set include context_id, remote and local size, and kind, since different communicator construction routines need different values */ /* Insert this new communicator into the list of known communicators. Make this conditional on debugger support to match the test in MPIR_Comm_release . */ MPIR_COMML_REMEMBER( newptr ); fn_fail: MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_CREATE); return mpi_errno;}/* Create a local intra communicator from the local group of the specified intercomm. *//* FIXME : For the context id, use the intercomm's context id + 2. (?) */#undef FUNCNAME#define FUNCNAME MPIR_Setup_intercomm_localcomm#undef FCNAME#define FCNAME "MPIR_Setup_intercomm_localcomm"int MPIR_Setup_intercomm_localcomm( MPID_Comm *intercomm_ptr ){ MPID_Comm *localcomm_ptr; int mpi_errno = MPI_SUCCESS; MPID_MPI_STATE_DECL(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM); MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM); localcomm_ptr = (MPID_Comm *)MPIU_Handle_obj_alloc( &MPID_Comm_mem ); /* --BEGIN ERROR HANDLING-- */ if (!localcomm_ptr) { mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 ); goto fn_fail; } /* --END ERROR HANDLING-- */ MPIU_Object_set_ref( localcomm_ptr, 1 ); /* Note that we must not free this context id since we are sharing it with the intercomm's context */ /* FIXME: This was + 2 (in agreement with the docs) but that caused some errors with an apparent use of the same context id by operations in different communicators. Switching this to +1 seems to have fixed that problem, but this isn't the right answer. *//* printf( "intercomm context ids; %d %d\n", intercomm_ptr->context_id, intercomm_ptr->recvcontext_id ); */ /* We use the recvcontext id for both contextids for the localcomm because the localcomm is an intra (not inter) communicator */ localcomm_ptr->context_id = intercomm_ptr->recvcontext_id + 1; localcomm_ptr->recvcontext_id = intercomm_ptr->recvcontext_id + 1; /* Duplicate the VCRT references */ MPID_VCRT_Add_ref( intercomm_ptr->local_vcrt ); localcomm_ptr->vcrt = intercomm_ptr->local_vcrt; localcomm_ptr->vcr = intercomm_ptr->local_vcr; /* Save the kind of the communicator */ localcomm_ptr->comm_kind = MPID_INTRACOMM; /* Set the sizes and ranks */ localcomm_ptr->remote_size = intercomm_ptr->local_size; localcomm_ptr->local_size = intercomm_ptr->local_size; localcomm_ptr->rank = intercomm_ptr->rank; /* More advanced version: if the group is available, dup it by increasing the reference count */ localcomm_ptr->local_group = 0; localcomm_ptr->remote_group = 0; /* This is an internal communicator, so ignore */ localcomm_ptr->errhandler = 0; /* FIXME : No local functions for the collectives */ localcomm_ptr->coll_fns = 0; /* FIXME : No local functions for the topology routines */ localcomm_ptr->topo_fns = 0; /* We do *not* inherit any name */ localcomm_ptr->name[0] = 0; localcomm_ptr->attributes = 0; intercomm_ptr->local_comm = localcomm_ptr; fn_fail: MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM); return mpi_errno;;}/* * Here are the routines to find a new context id. The algorithm is discussed * in detail in the mpich2 coding document. There are versions for * single threaded and multithreaded MPI. * * These assume that int is 32 bits; they should use uint_32 instead, * and an MPI_UINT32 type (should be able to use MPI_INTEGER4) *//* Both the threaded and non-threaded routines use the same mask of available context id values. */#define MAX_CONTEXT_MASK 32static unsigned int context_mask[MAX_CONTEXT_MASK];static int initialize_context_mask = 1;#ifdef USE_DBG_LOGGING/* Create a string that contains the context mask. This is used only with the logging interface, and must be used by one thread at a time (should this be enforced by the logging interface?). Converts the mask to hex and returns a pointer to that string */static char *MPIR_ContextMaskToStr( void ){ static char bufstr[MAX_CONTEXT_MASK*8+1]; int i; int maxset=0; for (maxset=MAX_CONTEXT_MASK-1; maxset>=0; maxset--) { if (context_mask[maxset] != 0) break; } for (i=0; i<maxset; i++) { MPIU_Snprintf( &bufstr[i*8], 9, "%.8x", context_mask[i] ); } return bufstr;}#endifstatic void MPIR_Init_contextid (void) { int i; for (i=1; i<MAX_CONTEXT_MASK; i++) { context_mask[i] = 0xFFFFFFFF; } /* the first three values are already used (comm_world, comm_self, and the internal-only copy of comm_world) */ context_mask[0] = 0xFFFFFFF8; initialize_context_mask = 0;}/* Return the context id corresponding to the first set bit in the mask. Return 0 if no bit found */static int MPIR_Find_context_bit( unsigned int local_mask[] ) { int i, j, context_id = 0; for (i=0; i<MAX_CONTEXT_MASK; i++) { if (local_mask[i]) { /* There is a bit set in this word. */ register unsigned int val, nval; /* The following code finds the highest set bit by recursively checking the top half of a subword for a bit, and incrementing the bit location by the number of bit of the lower sub word if the high subword contains a set bit. The assumption is that full-word bitwise operations and compares against zero are fast */ val = local_mask[i]; j = 0; nval = val & 0xFFFF0000; if (nval) { j += 16; val = nval; } nval = val & 0xFF00FF00; if (nval) { j += 8; val = nval; } nval = val & 0xF0F0F0F0; if (nval) { j += 4; val = nval; } nval = val & 0xCCCCCCCC; if (nval) { j += 2; val = nval; } if (val & 0xAAAAAAAA) { j += 1; } context_mask[i] &= ~(1<<j); context_id = 4 * (32 * i + j); MPIU_DBG_MSG_FMT(COMM,VERBOSE,(MPIU_DBG_FDEST, "allocating contextid = %d, (mask[%d], bit %d\n", context_id, i, j ) ); return context_id; } } return 0;}#ifndef MPICH_IS_THREADED/* Unthreaded (only one MPI call active at any time) */#undef FUNCNAME#define FUNCNAME MPIR_Get_contextid#undef FCNAME#define FCNAME "MPIR_Get_contextid"int MPIR_Get_contextid( MPID_Comm *comm_ptr ){ int context_id = 0; int mpi_errno = 0; unsigned int local_mask[MAX_CONTEXT_MASK]; MPIU_THREADPRIV_DECL; MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID); MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_CONTEXTID); if (initialize_context_mask) { MPIR_Init_contextid(); } memcpy( local_mask, context_mask, MAX_CONTEXT_MASK * sizeof(int) ); MPIU_THREADPRIV_GET; MPIR_Nest_incr(); /* Comm must be an intracommunicator */ mpi_errno = NMPI_Allreduce( MPI_IN_PLACE, local_mask, MAX_CONTEXT_MASK, MPI_INT, MPI_BAND, comm_ptr->handle ); MPIR_Nest_decr(); /* FIXME: We should return the error code upward */ if (mpi_errno) { MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GET_CONTEXTID); return 0; } context_id = MPIR_Find_context_bit( local_mask ); MPIU_DBG_MSG_S(COMM,VERBOSE,"Context mask = %s",MPIR_ContextMaskToStr()); /* return 0 if no context id found. The calling routine should check for this and generate the appropriate error code */ MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GET_CONTEXTID); return context_id;}#else /* MPICH_IS_THREADED is set and true *//* Additional values needed to maintain thread safety */static volatile int mask_in_use = 0;/* lowestContextId is used to break ties when multiple threads are contending for the mask */#define MPIR_MAXID (1 << 30)static volatile int lowestContextId = MPIR_MAXID;#undef FUNCNAME#define FUNCNAME MPIR_Get_contextid#undef FCNAME#define FCNAME "MPIR_Get_contextid"int MPIR_Get_contextid( MPID_Comm *comm_ptr ){ int context_id = 0; int mpi_errno = 0; unsigned int local_mask[MAX_CONTEXT_MASK]; int own_mask = 0; MPIU_THREADPRIV_DECL; MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID); MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_CONTEXTID); MPIU_THREADPRIV_GET; /* We increment the nest level now because we need to know that we're within another MPI routine before calling the CS_ENTER macro */ MPIR_Nest_incr(); /* The SINGLE_CS_ENTER/EXIT macros are commented out because this routine shold always be called from within a routine that has already entered the single critical section. However, in a finer-grained approach, these macros indicate where atomic updates to the shared data structures must be protected. */ /* We lock only around access to the mask. If another thread is using the mask, we take a mask of zero */ MPIU_DBG_MSG_FMT( COMM, VERBOSE, (MPIU_DBG_FDEST, "Entering; shared state is %d:%d", mask_in_use, lowestContextId ) ); while (context_id == 0) { /* MPIU_THREAD_SINGLE_CS_ENTER("context_id"); */ if (initialize_context_mask) { MPIR_Init_contextid(); } if (mask_in_use || comm_ptr->context_id > lowestContextId) { memset( local_mask, 0, MAX_CONTEXT_MASK * sizeof(int) ); own_mask = 0; if (comm_ptr->context_id < lowestContextId) { lowestContextId = comm_ptr->context_id; } MPIU_DBG_MSG_D( COMM, VERBOSE, "In in-use, sed lowestContextId to %d", lowestContextId ); } else { memcpy( local_mask, context_mask, MAX_CONTEXT_MASK * sizeof(int) ); mask_in_use = 1; own_mask = 1; lowestContextId = comm_ptr->context_id; MPIU_DBG_MSG( COMM, VERBOSE, "Copied local_mask" ); } /* MPIU_THREAD_SINGLE_CS_EXIT("context_id"); */ /* Now, try to get a context id */ /* Comm must be an intracommunicator */ mpi_errno = NMPI_Allreduce( MPI_IN_PLACE, local_mask, MAX_CONTEXT_MASK, MPI_INT, MPI_BAND, comm_ptr->handle ); /* FIXME: On error, return mpi_errno upward */ if (mpi_errno) { MPIR_Nest_decr(); MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GET_CONTEXTID); return 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -