📄 oob_tcp_msg.c
字号:
/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * * In windows, many of the socket functions return an EWOULDBLOCK * instead of \ things like EAGAIN, EINPROGRESS, etc. It has been * verified that this will \ not conflict with other error codes that * are returned by these functions \ under UNIX/Linux environments */#include "orte_config.h"#include "opal/opal_socket_errno.h"#include "orte/class/orte_proc_table.h"#include "orte/orte_constants.h"#include "orte/mca/ns/ns.h"#include "orte/mca/oob/tcp/oob_tcp.h"#include "orte/mca/oob/tcp/oob_tcp_msg.h"static void mca_oob_tcp_msg_construct(mca_oob_tcp_msg_t*);static void mca_oob_tcp_msg_destruct(mca_oob_tcp_msg_t*);static void mca_oob_tcp_msg_ident(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* peer);static bool mca_oob_tcp_msg_recv(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* peer);static void mca_oob_tcp_msg_data(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* peer);static void mca_oob_tcp_msg_ping(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* peer);OBJ_CLASS_INSTANCE( mca_oob_tcp_msg_t, opal_list_item_t, mca_oob_tcp_msg_construct, mca_oob_tcp_msg_destruct);static void mca_oob_tcp_msg_construct(mca_oob_tcp_msg_t* msg){ OBJ_CONSTRUCT(&msg->msg_lock, opal_mutex_t); OBJ_CONSTRUCT(&msg->msg_condition, opal_condition_t);}static void mca_oob_tcp_msg_destruct(mca_oob_tcp_msg_t* msg){ OBJ_DESTRUCT(&msg->msg_lock); OBJ_DESTRUCT(&msg->msg_condition);}/* * Wait for a msg to complete. * @param msg (IN) Message to wait on. * @param rc (OUT) Return code (number of bytes read on success or error code on failure). * @retval ORTE_SUCCESS or error code on failure. */int mca_oob_tcp_msg_wait(mca_oob_tcp_msg_t* msg, int* rc){#if OMPI_ENABLE_PROGRESS_THREADS OPAL_THREAD_LOCK(&msg->msg_lock); while(msg->msg_complete == false) { if(opal_event_progress_thread()) { int rc; OPAL_THREAD_UNLOCK(&msg->msg_lock); rc = opal_event_loop(OPAL_EVLOOP_ONCE); assert(rc >= 0); OPAL_THREAD_LOCK(&msg->msg_lock); } else { opal_condition_wait(&msg->msg_condition, &msg->msg_lock); } } OPAL_THREAD_UNLOCK(&msg->msg_lock);#else /* wait for message to complete */ while(msg->msg_complete == false) { /* msg_wait() is used in the "barrier" at the completion of MPI_FINALIZE, during which time BTLs may still need to progress pending outgoing communication, so we need to call opal_progress() here to make sure that communication gets pushed out so others can enter finalize (and send us the message we're here waiting for). However, if we're in a callback from the event library that was triggered from a call to opal_progress(), opal_progress() will think another thread is already progressing the event engine (in the case of mpi threads enabled) and not progress the engine, meaning we'll never receive our message. So we also need to progress the event library expicitly. We use EVLOOP_NONBLOCK so that we can progress both the registered callbacks and the event library, as EVLOOP_ONCE may block for a short period of time. */ opal_progress(); opal_event_loop(OPAL_EVLOOP_NONBLOCK); }#endif /* return status */ if(NULL != rc) { *rc = msg->msg_rc; } return ORTE_SUCCESS;}/* * Wait up to a timeout for the message to complete. * @param msg (IN) Message to wait on. * @param rc (OUT) Return code (number of bytes read on success or error code on failure). * @retval ORTE_SUCCESS or error code on failure. */int mca_oob_tcp_msg_timedwait(mca_oob_tcp_msg_t* msg, int* rc, struct timespec* abstime){ struct timeval tv; uint32_t secs = abstime->tv_sec; uint32_t usecs = abstime->tv_nsec * 1000; gettimeofday(&tv,NULL);#if OMPI_ENABLE_PROGRESS_THREADS OPAL_THREAD_LOCK(&msg->msg_lock); while(msg->msg_complete == false && ((uint32_t)tv.tv_sec <= secs || ((uint32_t)tv.tv_sec == secs && (uint32_t)tv.tv_usec < usecs))) { if(opal_event_progress_thread()) { int rc; OPAL_THREAD_UNLOCK(&msg->msg_lock); rc = opal_event_loop(OPAL_EVLOOP_ONCE); assert(rc >= 0); OPAL_THREAD_LOCK(&msg->msg_lock); } else { opal_condition_timedwait(&msg->msg_condition, &msg->msg_lock, abstime); } gettimeofday(&tv,NULL); } OPAL_THREAD_UNLOCK(&msg->msg_lock);#else /* wait for message to complete */ while(msg->msg_complete == false && ((uint32_t)tv.tv_sec <= secs || ((uint32_t)tv.tv_sec == secs && (uint32_t)tv.tv_usec < usecs))) { /* see comment in tcp_msg_wait, above */ opal_progress(); opal_event_loop(OPAL_EVLOOP_NONBLOCK); gettimeofday(&tv,NULL); }#endif /* return status */ if(NULL != rc) { *rc = msg->msg_rc; } if(msg->msg_rc < 0) return msg->msg_rc; return (msg->msg_complete ? ORTE_SUCCESS : ORTE_ERR_TIMEOUT);}/* * Signal that a message has completed. * @param msg (IN) Message to wait on. * @param peer (IN) the peer of the message * @retval ORTE_SUCCESS or error code on failure. */int mca_oob_tcp_msg_complete(mca_oob_tcp_msg_t* msg, orte_process_name_t * peer){ OPAL_THREAD_LOCK(&msg->msg_lock); msg->msg_complete = true; if(NULL != msg->msg_cbfunc) { opal_list_item_t* item; OPAL_THREAD_UNLOCK(&msg->msg_lock); /* post to a global list of completed messages */ OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock); opal_list_append(&mca_oob_tcp_component.tcp_msg_completed, (opal_list_item_t*)msg); if(opal_list_get_size(&mca_oob_tcp_component.tcp_msg_completed) > 1) { OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); return ORTE_SUCCESS; } OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); /* invoke message callback */ msg->msg_cbfunc(msg->msg_rc, peer, msg->msg_uiov, msg->msg_ucnt, msg->msg_hdr.msg_tag, msg->msg_cbdata); /* dispatch any completed events */ OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock); opal_list_remove_item(&mca_oob_tcp_component.tcp_msg_completed, (opal_list_item_t*)msg); MCA_OOB_TCP_MSG_RETURN(msg); while(NULL != (item = opal_list_remove_first(&mca_oob_tcp_component.tcp_msg_completed))) { msg = (mca_oob_tcp_msg_t*)item; OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); msg->msg_cbfunc( msg->msg_rc, &msg->msg_peer, msg->msg_uiov, msg->msg_ucnt, msg->msg_hdr.msg_tag, msg->msg_cbdata); OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock); MCA_OOB_TCP_MSG_RETURN(msg); } OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); } else { opal_condition_broadcast(&msg->msg_condition); OPAL_THREAD_UNLOCK(&msg->msg_lock); } return ORTE_SUCCESS;}/* * The function that actually sends the data! * @param msg a pointer to the message to send * @param peer the peer we are sending to * @retval true if the entire message has been sent * @retval false if the entire message has not been sent */bool mca_oob_tcp_msg_send_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_peer_t * peer){ int rc; while(1) { rc = writev(peer->peer_sd, msg->msg_rwptr, msg->msg_rwnum); if(rc < 0) { if(opal_socket_errno == EINTR) continue; /* In windows, many of the socket functions return an EWOULDBLOCK instead of \ things like EAGAIN, EINPROGRESS, etc. It has been verified that this will \ not conflict with other error codes that are returned by these functions \ under UNIX/Linux environments */ else if (opal_socket_errno == EAGAIN || opal_socket_errno == EWOULDBLOCK) return false; else { opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_msg_send_handler: writev failed: %s (%d)", ORTE_NAME_ARGS(orte_process_info.my_name), ORTE_NAME_ARGS(&(peer->peer_name)), strerror(opal_socket_errno), opal_socket_errno); mca_oob_tcp_peer_close(peer); msg->msg_rc = ORTE_ERR_CONNECTION_FAILED; return true; } } msg->msg_rc += rc; do {/* while there is still more iovecs to write */ if(rc < (int)msg->msg_rwptr->iov_len) { msg->msg_rwptr->iov_len -= rc; msg->msg_rwptr->iov_base = (ompi_iov_base_ptr_t)((char *) msg->msg_rwptr->iov_base + rc); break; } else { rc -= msg->msg_rwptr->iov_len; (msg->msg_rwnum)--; (msg->msg_rwptr)++; if(0 == msg->msg_rwnum) { return true; } } } while(msg->msg_rwnum); }}/* * Receives message data. * @param msg the message to be recieved into * @param peer the peer to recieve from * @retval true if the whole message was received * @retval false if the whole message was not received */bool mca_oob_tcp_msg_recv_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_peer_t * peer){ /* has entire header been received */ if(msg->msg_rwptr == msg->msg_rwiov) { if(mca_oob_tcp_msg_recv(msg, peer) == false) return false; /* allocate a buffer for the receive */ MCA_OOB_TCP_HDR_NTOH(&msg->msg_hdr); if(msg->msg_hdr.msg_size > 0) { msg->msg_rwbuf = malloc(msg->msg_hdr.msg_size); if(NULL == msg->msg_rwbuf) { opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_msg_recv_handler: malloc(%d) failed\n", ORTE_NAME_ARGS(orte_process_info.my_name), ORTE_NAME_ARGS(&(peer->peer_name)), msg->msg_hdr.msg_size); mca_oob_tcp_peer_close(peer); return false; } msg->msg_rwiov[1].iov_base = (ompi_iov_base_ptr_t)msg->msg_rwbuf; msg->msg_rwiov[1].iov_len = msg->msg_hdr.msg_size;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -