📄 mpid_win_lock.c
字号:
* * \ref msginfo_usage\n * \ref lock_design */void unlk_cb(const MPIDU_Onesided_ctl_t *info, int lpid) { MPID_Win *win; unsigned ret; int orig, rmas; MPID_assert_debug(info->mpid_ctl_w0 == MPID_MSGTYPE_UNLOCK); MPID_Win_get_ptr((MPI_Win)info->mpid_ctl_w1, win); MPID_assert_debug(win != NULL); orig = info->mpid_ctl_w2; rmas = info->mpid_ctl_w3; ret = ((rmas && win->_dev.coll_info[orig].rma_sends < rmas) || local_unlock(win, orig)); if (ret) { /* lock was released */ MPIDU_Onesided_ctl_t ack; if (MPID_LOCK_IS_FREE(win)) { win->_dev.epoch_rma_ok = 0; } epoch_end_cb(win); ack.mpid_ctl_w0 = MPID_MSGTYPE_UNLOCKACK; ack.mpid_ctl_w1 = win->_dev.coll_info[orig].win_handle; ack.mpid_ctl_w2 = mpid_my_lpid; ack.mpid_ctl_w3 = 0; (void) DCMF_Control(&bg1s_ct_proto, win->_dev.my_cstcy, lpid, &ack.ctl); } else { MPIDU_add_unlk(win, orig, info); }}/** * \page lock_design MPID_Win_lock / unlock Design * * MPID_Win_lock/unlock use DCMF_Control() to send messages (both requests and * acknowledgements). All aspects of locking, including queueing waiters for * lock and unlock, is done in this layer, in this source file. * * A lock request / RMA / unlock sequence is as follows: * * <B>MPI_Win_lock Called</B> * * - A sanity-check is done to * ensure that the window is in a valid state to enter a * \e LOCK access epoch. These checks include testing that * no other epoch is currently in affect. * - If the local window is currenty locked then wait for the * lock to be released (calling advance in the loop). * This is made deterministic by inserting a "dummy" waiter on * the lock wait queue which will cause an unlock to stop * trying to grant lock waiters. * - If MPI_MODE_NOCHECK was specified, then return success now. * - Setup a msginfo structure with the msg type, target window * handle, our rank, and lock type, and call DCMF_Control to start * the message on its way to the target. * Spin waiting for both the message to send and the my_sync_done flag * to get set (by receive callback of MPID_MSGTYPE_LOCKACK message) indicating the lock * has been granted. * * <B>On the target node the MPID_MSGTYPE_LOCK callback is invoked</B> * * - If the lock cannot be granted, either because the target node * is currently involved in some other access/exposure epoch or the lock * is currently granted in an incompatible mode: * - An entry is added to the end of the lock wait queue, * containing the rank, lock mode, and ack info, * and the callback returns to the message layer without sending * MPID_MSGTYPE_LOCKACK, * which causes the origin node to wait. * - At some point in the future a node unlocks the window, or the * current epoch ends, at * which time this entry is removed from the lock wait queue and * progress continues with the lock granted. * - If (when) the lock can be granted, by a call to epoch_end_cb() either from * a specific MPID_* epoch-ending synchronization or target processing of MPI_Win_unlock(): * - As long as compatible waiters are found at the head of the lock wait queue, * an MPID_MSGTYPE_LOCKACK message is created from the waiter info and sent to the * (each) origin node, causing the origin's my_sync_done flag to get set, waking it up. * * <B>Origin wakes up after lock completion</B> * * - Set epoch type, target node, and MPI_MODE_* flags in window. * This effectively creates the epoch. * * <B>Origin invokes RMA operation(s)</B> * * - See respective RMA operation calls for details * * <B>Origin calls MPI_Win_unlock</B> * * - Basic sanity-checks are done, including testing that the * window is actually in a \e LOCK access epoch and that the target * node specified is the same as the target node of the MPI_Win_lock. * - If any RMA operations (sends) are pending, wait for them to be * sent (calling advance in the loop). * - If MPI_MODE_NOCHECK was not asserted in the original lock call: * - setup message with the msg type MPID_MSGTYPE_UNLOCK, target window handle, * our rank, and the number of RMA operations that were initiated * to this target. * - Call DCMF_Control, to send the message (unlock request). * - Spin waiting for message to send and my_sync_done to get set. * * <B>On the target node the unlock callback is invoked</B> * * - Sanity-check the unlock to ensure it matches the original lock. * - If the number of RMA operations sent to us by the origin exceeds the number * of operations received from the origin: * - add the unlock request to the unlock wait queue. * Receive callbacks for RMA operations * will update the RMA ops counter(s) and process any unlock waiters who's * counts now match. * - Otherwise, Release the lock: * - Call epoch_end_cb() which will * generate MPID_MSGTYPE_LOCKACK messages to all compatible lock waiters. * - Send an MPID_MSGTYPE_UNLOCKACK message to the origin. This message * causes the origin's my_sync_done flag to get set, waking it up. * * <B>Origin wakes up after unlock completion</B> * * - Reset epoch info in window to indicate the epoch has ended. *//// \cond NOT_REAL_CODE#undef FUNCNAME#define FUNCNAME MPID_Win_lock#undef FCNAME#define FCNAME MPIU_QUOTE(FUNCNAME)/// \endcond/** * \brief MPI-DCMF glue for MPI_WIN_LOCK function * * Begin an access epoch to node \e dest. * Does not return until target has locked window. * * epoch_size is overloaded here, since the assumed * epoch size for MPID_EPOTYPE_LOCK is 1. We use this * field to save the target (locked) rank. This can * be used later to validate the target of an RMA operation * or to sanity-check the unlock. * * \param[in] lock_type Lock type (exclusive or shared) * \param[in] dest Destination rank (target) * \param[in] assert Synchronization hints * \param[in] win_ptr Window * \return MPI_SUCCESS, MPI_ERR_RMA_SYNC, or error returned from * DCMF_Lock. * * \ref msginfo_usage\n * \ref lock_design */int MPID_Win_lock(int lock_type, int dest, int assert, MPID_Win *win_ptr){ int mpi_errno = MPI_SUCCESS; MPIDU_Onesided_ctl_t info; int lpid; MPIU_THREADPRIV_DECL; MPID_MPI_STATE_DECL(MPID_STATE_MPID_WIN_LOCK); MPID_MPI_FUNC_ENTER(MPID_STATE_MPID_WIN_LOCK); MPIU_UNREFERENCED_ARG(assert); MPIU_THREADPRIV_GET; MPIR_Nest_incr(); if (dest == MPI_PROC_NULL) goto fn_exit; if (win_ptr->_dev.epoch_type != MPID_EPOTYPE_NONE) { /* --BEGIN ERROR HANDLING-- */ MPIU_ERR_SETANDSTMT(mpi_errno, MPI_ERR_RMA_SYNC, goto fn_fail, "**rmasync"); /* --END ERROR HANDLING-- */ } /** * \todo Should we pass NOCHECK along with RMA ops, * so that target can confirm?` */ if (!(win_ptr->_dev.epoch_assert & MPI_MODE_NOCHECK)) { if (dest == win_ptr->_dev.comm_ptr->rank) { MPIDU_Spin_lock_acquire(win_ptr, dest, lock_type); } else { info.mpid_ctl_w0 = MPID_MSGTYPE_LOCK; info.mpid_ctl_w1 = win_ptr->_dev.coll_info[dest].win_handle; info.mpid_ctl_w2 = win_ptr->_dev.comm_ptr->rank; info.mpid_ctl_w3 = lock_type; lpid = MPIDU_world_rank(win_ptr, dest); win_ptr->_dev.my_sync_done = 0; mpi_errno = DCMF_Control(&bg1s_ct_proto, win_ptr->_dev.my_cstcy, lpid, &info.ctl); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } MPIDU_Progress_spin(win_ptr->_dev.my_sync_done == 0); } } win_ptr->_dev.epoch_type = MPID_EPOTYPE_LOCK; win_ptr->_dev.epoch_size = dest; win_ptr->_dev.epoch_assert = assert;fn_exit: MPIR_Nest_decr(); MPID_MPI_FUNC_EXIT(MPID_STATE_MPID_WIN_LOCK); return mpi_errno; /* --BEGIN ERROR HANDLING-- */fn_fail: goto fn_exit; /* --END ERROR HANDLING-- */}/// \cond NOT_REAL_CODE#undef FUNCNAME#define FUNCNAME MPID_Win_unlock#undef FCNAME#define FCNAME MPIU_QUOTE(FUNCNAME)/// \endcond/** * \brief MPI-DCMF glue for MPI_WIN_UNLOCK function * * End access epoch started by MPID_Win_lock. * Sends to target the number of RMA ops we performed. * Target node will not unlock until it has received all RMA ops we sent. * While unlock failed call advance. * * \param[in] dest Destination rank (target) * \param[in] win_ptr Window * \return MPI_SUCCESS, MPI_ERR_RMA_SYNC, or error returned from * DCMF_Unlock. * * \ref msginfo_usage\n * \ref lock_design */int MPID_Win_unlock(int dest, MPID_Win *win_ptr){ int mpi_errno = MPI_SUCCESS; int lpid; MPIDU_Onesided_ctl_t info; MPIU_THREADPRIV_DECL; MPID_MPI_STATE_DECL(MPID_STATE_MPID_WIN_UNLOCK); MPID_MPI_FUNC_ENTER(MPID_STATE_MPID_WIN_UNLOCK); MPIU_THREADPRIV_GET; MPIR_Nest_incr(); if (dest == MPI_PROC_NULL) goto fn_exit; if (win_ptr->_dev.epoch_type != MPID_EPOTYPE_LOCK) { /* --BEGIN ERROR HANDLING-- */ MPIU_ERR_SETANDSTMT(mpi_errno, MPI_ERR_RMA_SYNC, goto fn_fail, "**rmasync"); /* --END ERROR HANDLING-- */ } MPID_assert(dest == win_ptr->_dev.epoch_size); /* * We wait for all RMA sends to drain here, just for neatness. * TBD: It may be possible to do this only in the advance loop * after the unlock request. */ MPIDU_Progress_spin(win_ptr->_dev.my_rma_pends > 0 || win_ptr->_dev.my_get_pends > 0); if (!(win_ptr->_dev.epoch_assert & MPI_MODE_NOCHECK)) { if (dest == win_ptr->_dev.comm_ptr->rank) { (void)local_unlock(win_ptr, dest); /* our (subsequent) call to epoch_end_cb() will * handle any lock waiters... */ } else { info.mpid_ctl_w0 = MPID_MSGTYPE_UNLOCK; info.mpid_ctl_w1 = win_ptr->_dev.coll_info[dest].win_handle; info.mpid_ctl_w2 = win_ptr->_dev.comm_ptr->rank; info.mpid_ctl_w3 = win_ptr->_dev.coll_info[dest].rma_sends; /* * Win_unlock should not return until all RMA ops are * complete at the target. So, we loop here until the * target tells us all RMA ops are finished. We also * zero the rma_sends param in the loop, so that the * target can just always += the number and not get an * unreasonable number of pending ops, plus should we * ever decide to do other RMA ops between attempts to * unlock, we can pass that number to the target and it * will update its counter. */ lpid = MPIDU_world_rank(win_ptr, dest); win_ptr->_dev.my_sync_done = 0; mpi_errno = DCMF_Control(&bg1s_ct_proto, win_ptr->_dev.my_cstcy, lpid, &info.ctl); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } MPIDU_Progress_spin(win_ptr->_dev.my_rma_pends > 0 || win_ptr->_dev.my_get_pends > 0 || win_ptr->_dev.my_sync_done == 0); } } win_ptr->_dev.epoch_type = MPID_EPOTYPE_NONE; win_ptr->_dev.epoch_size = 0; win_ptr->_dev.epoch_assert = 0; win_ptr->_dev.coll_info[dest].rma_sends = 0; epoch_end_cb(win_ptr);fn_exit: MPIR_Nest_decr(); MPID_MPI_FUNC_EXIT(MPID_STATE_MPID_WIN_UNLOCK); return mpi_errno; /* --BEGIN ERROR HANDLING-- */fn_fail: goto fn_exit; /* --END ERROR HANDLING-- */}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -