📄 convertor.c
字号:
/* -*- Mode: C; c-basic-offset:4 ; -*- *//* * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */#include "ompi_config.h"#ifdef HAVE_STRINGS_H#include <strings.h>#endif#include "ompi/datatype/datatype.h"#include "ompi/datatype/convertor.h"#include "ompi/datatype/datatype_internal.h"#include "ompi/datatype/datatype_checksum.h"#include "ompi/datatype/datatype_prototypes.h"#include "ompi/datatype/convertor_internal.h"#include "ompi/datatype/dt_arch.h"extern size_t ompi_ddt_local_sizes[DT_MAX_PREDEFINED];extern int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* convertor, int starting_point, const int* sizes );static void ompi_convertor_construct( ompi_convertor_t* convertor ){ convertor->pStack = convertor->static_stack; convertor->stack_size = DT_STATIC_STACK_SIZE; convertor->partial_length = 0; convertor->remoteArch = ompi_mpi_local_arch;}static void ompi_convertor_destruct( ompi_convertor_t* convertor ){ ompi_convertor_cleanup( convertor );}OBJ_CLASS_INSTANCE(ompi_convertor_t, opal_object_t, ompi_convertor_construct, ompi_convertor_destruct );static ompi_convertor_master_t* ompi_convertor_master_list = NULL;extern conversion_fct_t ompi_ddt_heterogeneous_copy_functions[DT_MAX_PREDEFINED];extern conversion_fct_t ompi_ddt_copy_functions[DT_MAX_PREDEFINED];void ompi_convertor_destroy_masters( void ){ ompi_convertor_master_t* master = ompi_convertor_master_list; while( NULL != master ) { ompi_convertor_master_list = master->next; master->next = NULL; /* Cleanup the conversion function if not one of the defaults */ if( (master->pFunctions != ompi_ddt_heterogeneous_copy_functions) && (master->pFunctions != ompi_ddt_copy_functions) ) free( master->pFunctions ); free( master ); master = ompi_convertor_master_list; }}/** * Find or create a convertor suitable for the remote architecture. If there * is already a master convertor for this architecture then return it. * Otherwise, create and initialize a full featured master convertor. */ompi_convertor_master_t*ompi_convertor_find_or_create_master( uint32_t remote_arch ){ ompi_convertor_master_t* master = ompi_convertor_master_list; int i; size_t* remote_sizes; while( NULL != master ) { if( master->remote_arch == remote_arch ) return master; master = master->next; } /* Create a new convertor matching the specified architecture and add it to the * master convertor list. */ master = (ompi_convertor_master_t*)malloc( sizeof(ompi_convertor_master_t) ); master->next = ompi_convertor_master_list; ompi_convertor_master_list = master; master->remote_arch = remote_arch; master->flags = 0; master->hetero_mask = 0; /* Most of the sizes will be identical, so for now just make a copy of * the local ones. As master->remote_sizes is defined as being an array of * consts we have to manually cast it before using it for writing purposes. */ remote_sizes = (size_t*)master->remote_sizes; for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) { remote_sizes[i] = ompi_ddt_local_sizes[i]; } /** * If the local and remote architecture are the same there is no need * to check for the remote data sizes. They will always be the same as * the local ones. */ if( master->remote_arch == ompi_mpi_local_arch ) { master->pFunctions = ompi_ddt_copy_functions; master->flags |= CONVERTOR_HOMOGENEOUS; return master; } /* Find out the remote bool size */ if( ompi_arch_checkmask( &master->remote_arch, OMPI_ARCH_BOOLIS8 ) ) { remote_sizes[DT_CXX_BOOL] = 1; } else if( ompi_arch_checkmask( &master->remote_arch, OMPI_ARCH_BOOLIS16 ) ) { remote_sizes[DT_CXX_BOOL] = 2; } else if( ompi_arch_checkmask( &master->remote_arch, OMPI_ARCH_BOOLIS32 ) ) { remote_sizes[DT_CXX_BOOL] = 4; } else { opal_output( 0, "Unknown sizeof(bool) for the remote architecture\n" ); } /* check the length of the long */ if( ompi_arch_checkmask( &master->remote_arch, OMPI_ARCH_LONGIS64 ) ) { remote_sizes[DT_LONG] = 8; remote_sizes[DT_UNSIGNED_LONG] = 8; remote_sizes[DT_LONG_LONG_INT] = 8; remote_sizes[DT_UNSIGNED_LONG_LONG] = 8; } /* find out the remote logical size. It can happens that the size will be * unknown (if Fortran is not supported on the remote library). If this is * the case, just let the remote logical size to match the local size. */ if( ompi_arch_checkmask( &master->remote_arch, OMPI_ARCH_LOGICALIS8 ) ) { remote_sizes[DT_LOGIC] = 1; } else if( ompi_arch_checkmask( &master->remote_arch, OMPI_ARCH_LOGICALIS16 ) ) { remote_sizes[DT_LOGIC] = 2; } else if( ompi_arch_checkmask( &master->remote_arch, OMPI_ARCH_LOGICALIS32 ) ) { remote_sizes[DT_LOGIC] = 4; } else { opal_output( 0, "Unknown sizeof(fortran logical) for the remote architecture\n" ); } /** * Now we can compute the conversion mask. For all sizes where the remote * and local architecture differ a conversion is needed. Moreover, if the * 2 architectures don't have the same endianess all data with a length * over 2 bytes (with the exception of logicals) have to be byte-swapped. */ for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) { if( remote_sizes[i] != ompi_ddt_local_sizes[i] ) master->hetero_mask |= (((uint64_t)1) << i); } if( ompi_arch_checkmask( &master->remote_arch, OMPI_ARCH_ISBIGENDIAN ) != ompi_arch_checkmask( &ompi_mpi_local_arch, OMPI_ARCH_ISBIGENDIAN ) ) { uint64_t hetero_mask = 0; for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) { if( remote_sizes[i] > 1 ) hetero_mask |= (((uint64_t)1) << i); } hetero_mask &= ~((((uint64_t)1) << DT_LOGIC) | (((uint64_t)1) << DT_CXX_BOOL)); master->hetero_mask |= hetero_mask; } master->pFunctions = (conversion_fct_t*)malloc( sizeof(ompi_ddt_heterogeneous_copy_functions) ); /** * Usually the heterogeneous functions are slower than the copy ones. Let's * try to minimize the usage of the heterogeneous versions. */ for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) { if( master->hetero_mask & (((uint64_t)1) << i) ) master->pFunctions[i] = ompi_ddt_heterogeneous_copy_functions[i]; else master->pFunctions[i] = ompi_ddt_copy_functions[i]; } /* We're done so far, return the mater convertor */ return master;}ompi_convertor_t* ompi_convertor_create( int32_t remote_arch, int32_t mode ){ ompi_convertor_t* convertor = OBJ_NEW(ompi_convertor_t); ompi_convertor_master_t* master; master = ompi_convertor_find_or_create_master( remote_arch ); convertor->remoteArch = remote_arch; convertor->stack_pos = 0; convertor->flags = master->flags; convertor->master = master; return convertor;}#define OMPI_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( CONVERTOR, IOV, OUT, MAX_DATA ) \ do { \ (CONVERTOR)->checksum = OPAL_CSUM_ZERO; \ (CONVERTOR)->csum_ui1 = 0; \ (CONVERTOR)->csum_ui2 = 0; \ \ /* protect against over packing data */ \ if( (CONVERTOR)->flags & CONVERTOR_COMPLETED ) { \ (IOV)[0].iov_len = 0; \ *(OUT) = 0; \ *(MAX_DATA) = 0; \ return 1; /* nothing to do */ \ } \ assert( (CONVERTOR)->bConverted < (CONVERTOR)->local_size ); \ } while(0)/* * Return 0 if everything went OK and if there is still room before the complete * conversion of the data (need additional call with others input buffers ) * 1 if everything went fine and the data was completly converted * -1 something wrong occurs. */int32_t ompi_convertor_pack( ompi_convertor_t* pConv, struct iovec* iov, uint32_t* out_size, size_t* max_data ){ OMPI_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( pConv, iov, out_size, max_data ); if( !(pConv->flags & CONVERTOR_WITH_CHECKSUM) && (pConv->flags & DT_FLAG_NO_GAPS) ) { /* We are doing conversion on a predefined contiguous datatype. The * convertor contain minimal informations, we only use the bConverted * to manage the conversion. */ uint32_t i; size_t initial_bConverted = pConv->bConverted; size_t pending_length = pConv->local_size - pConv->bConverted; char* base_pointer; if( (*max_data) < pending_length ) pending_length = (*max_data); for( i = 0; (i < *out_size) && (0 != pending_length); i++ ) { base_pointer = pConv->pBaseBuf + pConv->bConverted + pConv->pDesc->true_lb; if( iov[i].iov_len > pending_length ) iov[i].iov_len = pending_length; if( NULL == iov[i].iov_base ) { iov[i].iov_base = base_pointer; } else { MEMCPY( iov[i].iov_base, base_pointer, iov[i].iov_len ); } pConv->bConverted += iov[i].iov_len; pending_length -= iov[i].iov_len; } *out_size = i; *max_data = pConv->bConverted - initial_bConverted; if( pConv->bConverted == pConv->local_size ) { pConv->flags |= CONVERTOR_COMPLETED; return 1; } return 0; } return pConv->fAdvance( pConv, iov, out_size, max_data );}int32_t ompi_convertor_unpack( ompi_convertor_t* pConv, struct iovec* iov, uint32_t* out_size, size_t* max_data ){ OMPI_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( pConv, iov, out_size, max_data ); if( !(pConv->flags & CONVERTOR_WITH_CHECKSUM) && ((pConv->flags & (CONVERTOR_HOMOGENEOUS | DT_FLAG_NO_GAPS)) == (CONVERTOR_HOMOGENEOUS | DT_FLAG_NO_GAPS)) ) { /* We are doing conversion on a contiguous datatype on a homogeneous * environment. The convertor contain minimal informations, we only * use the bConverted to manage the conversion. */ uint32_t i; char* base_pointer; *max_data = pConv->bConverted; base_pointer = pConv->pBaseBuf + pConv->bConverted + pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; for( i = 0; i < *out_size; i++ ) { if( (pConv->bConverted + iov[i].iov_len) >= pConv->local_size ) { goto predefined_data_unpack; } MEMCPY( base_pointer, iov[i].iov_base, iov[i].iov_len ); /*opal_output( 0, "copy at %p %d bytes [initial ptr %p]\n", base_pointer, iov[i].iov_len, pConv->pBaseBuf );*/ pConv->bConverted += iov[i].iov_len; base_pointer += iov[i].iov_len; } *max_data = pConv->bConverted - (*max_data); return 0; predefined_data_unpack: iov[i].iov_len = pConv->local_size - pConv->bConverted; MEMCPY( base_pointer, iov[i].iov_base, iov[i].iov_len ); /*opal_output( 0, "copy at %p %d bytes [initial ptr %p] *last*\n", base_pointer, iov[i].iov_len, pConv->pBaseBuf );*/ pConv->bConverted = pConv->local_size; *out_size = i + 1; *max_data = pConv->bConverted - (*max_data); pConv->flags |= CONVERTOR_COMPLETED; return 1; } return pConv->fAdvance( pConv, iov, out_size, max_data );}static inlineint ompi_convertor_create_stack_with_pos_contig( ompi_convertor_t* pConvertor, size_t starting_point, const size_t* sizes ){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -