📄 ch3_rma_ops.c
字号:
/* -*- Mode: C; c-basic-offset:4 ; -*- *//* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */#include "mpidi_ch3_impl.h"/* * MPIDI_CH3_Win_create() */#undef FUNCNAME#define FUNCNAME MPIDI_CH3_Win_create#undef FCNAME#define FCNAME MPIDI_QUOTE(FUNCNAME)int MPIDI_CH3_Win_create(void *base, MPI_Aint size, int disp_unit, MPID_Info *info, MPID_Comm *comm_ptr, MPID_Win **win_ptr, MPIDI_RMAFns *RMAFns){ int mpi_errno=MPI_SUCCESS, i, comm_size, rank, found, result; void *offset=0; MPIDI_CH3I_Alloc_mem_list_t *curr_ptr; MPIDU_Process_lock_t *locks_base_addr; int *shared_lock_state_baseaddr; volatile char *pscw_sync_addr; MPIU_CHKPMEM_DECL(4); MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_CREATE); MPIDI_STATE_DECL(MPID_STATE_MEMCPY); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_WIN_CREATE); /* We first call the generic MPIDI_Win_create */ mpi_errno = MPIDI_Win_create(base, size, disp_unit, info, comm_ptr, win_ptr, RMAFns); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } /* Now we do the channel-specific stuff */ /* All processes first check whether their base address refers to an address in shared memory. If everyone's address is in shared memory, we set MPIDI_Use_optimized_rma=1 to indicate that shared memory optimizations are possible. If even one process's win_base is not in shared memory, we revert to the generic implementation of RMA in CH3 by setting MPIDI_Use_optimized_rma=0. */ /* For the special case where win_base is NULL, we treat it as if * it is found in shared memory, so as not to disable optimizations. For example, where the window is allocated in shared memory on one process and others call win_create with NULL. */ MPIR_Nest_incr(); comm_size = comm_ptr->local_size; rank = comm_ptr->rank; curr_ptr = MPIDI_CH3I_Get_mem_list_head(); if (base == NULL) { found = 1; } else { found = 0; while (curr_ptr != NULL) { if ((curr_ptr->shm_struct->addr <= base) && (base < (void *) ((char *) curr_ptr->shm_struct->addr + curr_ptr->shm_struct->size))) { found = 1; offset = (void *) ((char *) curr_ptr->shm_struct->addr - (char *) base); break; } curr_ptr = curr_ptr->next; } } mpi_errno = NMPI_Allreduce(&found, &result, 1, MPI_INT, MPI_BAND, comm_ptr->handle); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } if (result == 0) { /* not all windows are in shared memory. can't be optimized. Reset functions pointers for the RMA functions to the default. */ (*win_ptr)->shm_structs = NULL; (*win_ptr)->locks = NULL; RMAFns->Win_free = MPIDI_Win_free; RMAFns->Put = MPIDI_Put; RMAFns->Get = MPIDI_Get; RMAFns->Accumulate = MPIDI_Accumulate; RMAFns->Win_fence = MPIDI_Win_fence; RMAFns->Win_post = MPIDI_Win_post ; RMAFns->Win_start = MPIDI_Win_start; RMAFns->Win_complete = MPIDI_Win_complete ; RMAFns->Win_wait = MPIDI_Win_wait; RMAFns->Win_lock = MPIDI_Win_lock; RMAFns->Win_unlock = MPIDI_Win_unlock; /* leave win_create, alloc_mem, and free_mem as they are (set to the channel-specific version) */ } else { /* all windows in shared memory. can be optimized */ /* again set the channel-specific version of the RMA functions because they may have been reset to the default in an earlier call to win_create */ MPIDI_CH3_RMAFnsInit( RMAFns ); /* allocate memory for the shm_structs */ MPIU_CHKPMEM_MALLOC((*win_ptr)->shm_structs, MPIDI_CH3I_Shmem_block_request_result *, comm_size * sizeof(MPIDI_CH3I_Shmem_block_request_result), mpi_errno, "(*win_ptr)->shm_structs"); /* allocate memory for the offsets from base of shared memory */ MPIU_CHKPMEM_MALLOC((*win_ptr)->offsets, void **, comm_size * sizeof(void *), mpi_errno, "(*win_ptr)->offsets"); if (base != NULL) { /* copy this process's shmem struct into right location in array of shmem structs */ MPIDI_FUNC_ENTER(MPID_STATE_MEMCPY); memcpy(&((*win_ptr)->shm_structs[rank]), curr_ptr->shm_struct, sizeof(MPIDI_CH3I_Shmem_block_request_result)); MPIDI_FUNC_EXIT(MPID_STATE_MEMCPY); /* copy this process's offset into right location in array of offsets */ (*win_ptr)->offsets[rank] = offset; } else { (*win_ptr)->shm_structs[rank].addr = NULL; (*win_ptr)->shm_structs[rank].size = 0; (*win_ptr)->offsets[rank] = 0; } /* collect everyone's shm_structs and offsets */ mpi_errno = NMPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, (*win_ptr)->shm_structs, sizeof(MPIDI_CH3I_Shmem_block_request_result), MPI_BYTE, comm_ptr->handle); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } mpi_errno = NMPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, (*win_ptr)->offsets, sizeof(void *), MPI_BYTE, comm_ptr->handle); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } /* each process now attaches to the shared memory segments (windows) of other processes (if they are non-zero), so that direct RMA is possible. The newly mapped addresses are stored in the addr field in the shmem struct. */ for (i=0; i<comm_size; i++) { if ((i != rank) && ((*win_ptr)->shm_structs[i].size != 0)) { mpi_errno = MPIDI_CH3I_SHM_Attach_notunlink_mem( &((*win_ptr)->shm_structs[i]) ); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } } } /* allocate memory for the locks and the shared lock * state. locks are needed for accumulate operations and for * passive target RMA. */ MPIU_CHKPMEM_MALLOC((*win_ptr)->locks, MPIDI_CH3I_Shmem_block_request_result *, sizeof(MPIDI_CH3I_Shmem_block_request_result), mpi_errno, "(*win_ptr)->locks"); /* Rank 0 allocates shared memory for an array of locks, one for each process, and for an array of shared lock states and initializes the locks and the shared lock state. */ if (rank == 0) { mpi_errno = MPIDI_CH3I_SHM_Get_mem(comm_size * sizeof(MPIDU_Process_lock_t) + comm_size * sizeof(int), (*win_ptr)->locks); MPIU_ERR_CHKANDJUMP(mpi_errno,mpi_errno,MPI_ERR_OTHER,"**nomem"); locks_base_addr = (*win_ptr)->locks->addr; for (i=0; i<comm_size; i++) MPIDU_Process_lock_init(&locks_base_addr[i]); shared_lock_state_baseaddr = (int *) ((char *) (*win_ptr)->locks->addr + comm_size * sizeof(MPIDU_Process_lock_t)); /* initialize shared lock state of all processes to 0 */ for (i=0; i<comm_size; i++) shared_lock_state_baseaddr[i] = 0; } /* rank 0 broadcasts the locks struct to others so that they can attach to the shared memory containing the locks */ mpi_errno = NMPI_Bcast((*win_ptr)->locks, sizeof(MPIDI_CH3I_Shmem_block_request_result), MPI_BYTE, 0, comm_ptr->handle); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } /* Processes other than rank 0 attach to the shared memory containing the lock structure. */ if (rank != 0) { mpi_errno = MPIDI_CH3I_SHM_Attach_notunlink_mem( (*win_ptr)->locks ); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } } /* allocate memory for the shm struct needed to allocate shared memory for synchronizing post-start-complete-wait. */ MPIU_CHKPMEM_MALLOC((*win_ptr)->pscw_shm_structs, MPIDI_CH3I_Shmem_block_request_result *, comm_size * sizeof(MPIDI_CH3I_Shmem_block_request_result), mpi_errno, "(*win_ptr)->pscw_shm_structs"); mpi_errno = MPIDI_CH3I_SHM_Get_mem(2 * comm_size, &(*win_ptr)->pscw_shm_structs[rank]); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } /* initialize it */ pscw_sync_addr = (*win_ptr)->pscw_shm_structs[rank].addr; for (i=0; i<2*comm_size; i++) pscw_sync_addr[i] = '0'; /* collect everyone's pscw_shm_structs*/ mpi_errno = NMPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, (*win_ptr)->pscw_shm_structs, sizeof(MPIDI_CH3I_Shmem_block_request_result), MPI_BYTE, comm_ptr->handle); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } /* each process now attaches to the shared memory for pscw sync of other processes. */ for (i=0; i<comm_size; i++) { if (i != rank) { mpi_errno = MPIDI_CH3I_SHM_Attach_notunlink_mem( &((*win_ptr)->pscw_shm_structs[i]) ); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } } } } (*win_ptr)->access_epoch_grp_ptr = NULL; (*win_ptr)->access_epoch_grp_ranks_in_win = NULL; (*win_ptr)->exposure_epoch_grp_ptr = NULL; (*win_ptr)->exposure_epoch_grp_ranks_in_win = NULL; (*win_ptr)->pt_rma_excl_lock = 0; fn_exit: MPIR_Nest_decr(); MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_WIN_CREATE); return mpi_errno; /* --BEGIN ERROR HANDLING-- */ fn_fail: MPIU_CHKPMEM_REAP(); goto fn_exit; /* --END ERROR HANDLING-- */}/* * MPIDI_CH3_Win_free() */#undef FUNCNAME#define FUNCNAME MPIDI_CH3_Win_free#undef FCNAME#define FCNAME MPIDI_QUOTE(FUNCNAME)int MPIDI_CH3_Win_free(MPID_Win **win_ptr){ int mpi_errno = MPI_SUCCESS, comm_size, rank, i; MPID_Comm *comm_ptr; MPIDU_Process_lock_t *locks_base_addr; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_FREE); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_WIN_FREE); MPID_Comm_get_ptr( (*win_ptr)->comm, comm_ptr ); comm_size = comm_ptr->local_size; rank = comm_ptr->rank; MPIR_Nest_incr(); /* barrier needed so that all passive target rmas directed toward this process are over */ mpi_errno = NMPI_Barrier((*win_ptr)->comm); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } mpi_errno = NMPI_Comm_free(&((*win_ptr)->comm)); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } MPIR_Nest_decr(); MPIU_Free((*win_ptr)->base_addrs); MPIU_Free((*win_ptr)->disp_units); MPIU_Free((*win_ptr)->all_win_handles); MPIU_Free((*win_ptr)->pt_rma_puts_accs);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -