📄 convertor.c

📁 MPI stands for the Message Passing Interface. Written by the MPI Forum (a large committee comprising
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* -*- Mode: C; c-basic-offset:4 ; -*- *//* * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana *                         University Research and Technology *                         Corporation.  All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University *                         of Tennessee Research Foundation.  All rights *                         reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, *                         University of Stuttgart.  All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. *                         All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */#include "ompi_config.h"#ifdef HAVE_STRINGS_H#include <strings.h>#endif#include "ompi/datatype/datatype.h"#include "ompi/datatype/convertor.h"#include "ompi/datatype/datatype_internal.h"#include "ompi/datatype/datatype_checksum.h"#include "ompi/datatype/datatype_prototypes.h"#include "ompi/datatype/convertor_internal.h"#include "ompi/datatype/dt_arch.h"extern size_t ompi_ddt_local_sizes[DT_MAX_PREDEFINED];extern int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* convertor,                                                         int starting_point, const int* sizes );static void ompi_convertor_construct( ompi_convertor_t* convertor ){    convertor->pStack         = convertor->static_stack;    convertor->stack_size     = DT_STATIC_STACK_SIZE;    convertor->partial_length = 0;    convertor->remoteArch     = ompi_mpi_local_arch;}static void ompi_convertor_destruct( ompi_convertor_t* convertor ){    ompi_convertor_cleanup( convertor );}OBJ_CLASS_INSTANCE(ompi_convertor_t, opal_object_t, ompi_convertor_construct, ompi_convertor_destruct );static ompi_convertor_master_t* ompi_convertor_master_list = NULL;extern conversion_fct_t ompi_ddt_heterogeneous_copy_functions[DT_MAX_PREDEFINED];extern conversion_fct_t ompi_ddt_copy_functions[DT_MAX_PREDEFINED];void ompi_convertor_destroy_masters( void ){    ompi_convertor_master_t* master = ompi_convertor_master_list;    while( NULL != master ) {        ompi_convertor_master_list = master->next;        master->next = NULL;        /* Cleanup the conversion function if not one of the defaults */        if( (master->pFunctions != ompi_ddt_heterogeneous_copy_functions) &&            (master->pFunctions != ompi_ddt_copy_functions) )            free( master->pFunctions );        free( master );        master = ompi_convertor_master_list;    }}/** * Find or create a convertor suitable for the remote architecture. If there * is already a master convertor for this architecture then return it. * Otherwise, create and initialize a full featured master convertor. */ompi_convertor_master_t*ompi_convertor_find_or_create_master( uint32_t remote_arch ){    ompi_convertor_master_t* master = ompi_convertor_master_list;    int i;    size_t* remote_sizes;    while( NULL != master ) {        if( master->remote_arch == remote_arch )            return master;        master = master->next;    }    /* Create a new convertor matching the specified architecture and add it to the     * master convertor list.     */    master = (ompi_convertor_master_t*)malloc( sizeof(ompi_convertor_master_t) );    master->next = ompi_convertor_master_list;    ompi_convertor_master_list = master;    master->remote_arch = remote_arch;    master->flags       = 0;    master->hetero_mask = 0;    /* Most of the sizes will be identical, so for now just make a copy of     * the local ones. As master->remote_sizes is defined as being an array of     * consts we have to manually cast it before using it for writing purposes.     */    remote_sizes = (size_t*)master->remote_sizes;    for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) {        remote_sizes[i] = ompi_ddt_local_sizes[i];    }    /**     * If the local and remote architecture are the same there is no need     * to check for the remote data sizes. They will always be the same as     * the local ones.     */    if( master->remote_arch == ompi_mpi_local_arch ) {        master->pFunctions = ompi_ddt_copy_functions;        master->flags |= CONVERTOR_HOMOGENEOUS;        return master;    }    /* Find out the remote bool size */    if( ompi_arch_checkmask( &master->remote_arch, OMPI_ARCH_BOOLIS8 ) ) {        remote_sizes[DT_CXX_BOOL] = 1;    } else if( ompi_arch_checkmask( &master->remote_arch, OMPI_ARCH_BOOLIS16 ) ) {        remote_sizes[DT_CXX_BOOL] = 2;    } else if( ompi_arch_checkmask( &master->remote_arch, OMPI_ARCH_BOOLIS32 ) ) {        remote_sizes[DT_CXX_BOOL] = 4;    } else {        opal_output( 0, "Unknown sizeof(bool) for the remote architecture\n" );    }    /* check the length of the long */    if( ompi_arch_checkmask( &master->remote_arch, OMPI_ARCH_LONGIS64 ) ) {        remote_sizes[DT_LONG]               = 8;        remote_sizes[DT_UNSIGNED_LONG]      = 8;        remote_sizes[DT_LONG_LONG_INT]      = 8;        remote_sizes[DT_UNSIGNED_LONG_LONG] = 8;    }    /* find out the remote logical size. It can happens that the size will be     * unknown (if Fortran is not supported on the remote library). If this is     * the case, just let the remote logical size to match the local size.     */    if( ompi_arch_checkmask( &master->remote_arch, OMPI_ARCH_LOGICALIS8 ) ) {        remote_sizes[DT_LOGIC] = 1;    } else if( ompi_arch_checkmask( &master->remote_arch, OMPI_ARCH_LOGICALIS16 ) ) {        remote_sizes[DT_LOGIC] = 2;    } else if( ompi_arch_checkmask( &master->remote_arch, OMPI_ARCH_LOGICALIS32 ) ) {        remote_sizes[DT_LOGIC] = 4;    } else {        opal_output( 0, "Unknown sizeof(fortran logical) for the remote architecture\n" );    }    /**     * Now we can compute the conversion mask. For all sizes where the remote     * and local architecture differ a conversion is needed. Moreover, if the     * 2 architectures don't have the same endianess all data with a length     * over 2 bytes (with the exception of logicals) have to be byte-swapped.     */    for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) {        if( remote_sizes[i] != ompi_ddt_local_sizes[i] )            master->hetero_mask |= (((uint64_t)1) << i);    }    if( ompi_arch_checkmask( &master->remote_arch, OMPI_ARCH_ISBIGENDIAN ) !=        ompi_arch_checkmask( &ompi_mpi_local_arch, OMPI_ARCH_ISBIGENDIAN ) ) {        uint64_t hetero_mask = 0;        for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) {            if( remote_sizes[i] > 1 )                hetero_mask |= (((uint64_t)1) << i);        }        hetero_mask &= ~((((uint64_t)1) << DT_LOGIC) | (((uint64_t)1) << DT_CXX_BOOL));        master->hetero_mask |= hetero_mask;    }    master->pFunctions = (conversion_fct_t*)malloc( sizeof(ompi_ddt_heterogeneous_copy_functions) );    /**     * Usually the heterogeneous functions are slower than the copy ones. Let's     * try to minimize the usage of the heterogeneous versions.     */    for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) {        if( master->hetero_mask & (((uint64_t)1) << i) )            master->pFunctions[i] = ompi_ddt_heterogeneous_copy_functions[i];        else            master->pFunctions[i] = ompi_ddt_copy_functions[i];    }    /* We're done so far, return the mater convertor */    return master;}ompi_convertor_t* ompi_convertor_create( int32_t remote_arch, int32_t mode ){    ompi_convertor_t* convertor = OBJ_NEW(ompi_convertor_t);    ompi_convertor_master_t* master;    master = ompi_convertor_find_or_create_master( remote_arch );    convertor->remoteArch = remote_arch;    convertor->stack_pos  = 0;    convertor->flags      = master->flags;    convertor->master     = master;    return convertor;}#define OMPI_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( CONVERTOR, IOV, OUT, MAX_DATA ) \    do {                                                                \        (CONVERTOR)->checksum = OPAL_CSUM_ZERO;                         \        (CONVERTOR)->csum_ui1 = 0;                                      \        (CONVERTOR)->csum_ui2 = 0;                                      \                                                                        \        /* protect against over packing data */                         \        if( (CONVERTOR)->flags & CONVERTOR_COMPLETED ) {                \            (IOV)[0].iov_len = 0;                                       \            *(OUT) = 0;                                                 \            *(MAX_DATA) = 0;                                            \            return 1;  /* nothing to do */                              \        }                                                               \        assert( (CONVERTOR)->bConverted < (CONVERTOR)->local_size );    \    } while(0)/*  * Return 0 if everything went OK and if there is still room before the complete *          conversion of the data (need additional call with others input buffers ) *        1 if everything went fine and the data was completly converted *       -1 something wrong occurs. */int32_t ompi_convertor_pack( ompi_convertor_t* pConv,                             struct iovec* iov, uint32_t* out_size,                             size_t* max_data ){    OMPI_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( pConv, iov, out_size, max_data );    if( !(pConv->flags & CONVERTOR_WITH_CHECKSUM) &&        (pConv->flags & DT_FLAG_NO_GAPS) ) {        /* We are doing conversion on a predefined contiguous datatype. The         * convertor contain minimal informations, we only use the bConverted         * to manage the conversion.         */        uint32_t i;        size_t initial_bConverted = pConv->bConverted;        size_t pending_length = pConv->local_size - pConv->bConverted;        char* base_pointer;        if( (*max_data) < pending_length )            pending_length = (*max_data);        for( i = 0; (i < *out_size) && (0 != pending_length); i++ ) {            base_pointer = pConv->pBaseBuf + pConv->bConverted + pConv->pDesc->true_lb;            if( iov[i].iov_len > pending_length )                iov[i].iov_len = pending_length;            if( NULL == iov[i].iov_base ) {                iov[i].iov_base = base_pointer;            } else {                MEMCPY( iov[i].iov_base, base_pointer, iov[i].iov_len );            }            pConv->bConverted += iov[i].iov_len;            pending_length -= iov[i].iov_len;        }        *out_size = i;        *max_data = pConv->bConverted - initial_bConverted;        if( pConv->bConverted == pConv->local_size ) {            pConv->flags |= CONVERTOR_COMPLETED;            return 1;        }        return 0;    }    return pConv->fAdvance( pConv, iov, out_size, max_data );}int32_t ompi_convertor_unpack( ompi_convertor_t* pConv,                               struct iovec* iov, uint32_t* out_size,                               size_t* max_data ){    OMPI_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( pConv, iov, out_size, max_data );    if( !(pConv->flags & CONVERTOR_WITH_CHECKSUM) &&        ((pConv->flags & (CONVERTOR_HOMOGENEOUS | DT_FLAG_NO_GAPS)) ==         (CONVERTOR_HOMOGENEOUS | DT_FLAG_NO_GAPS)) ) {        /* We are doing conversion on a contiguous datatype on a homogeneous         * environment. The convertor contain minimal informations, we only         * use the bConverted to manage the conversion.         */        uint32_t i;        char* base_pointer;        *max_data = pConv->bConverted;        base_pointer = pConv->pBaseBuf + pConv->bConverted +             pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp;        for( i = 0; i < *out_size; i++ ) {            if( (pConv->bConverted + iov[i].iov_len) >= pConv->local_size ) {                goto predefined_data_unpack;            }            MEMCPY( base_pointer, iov[i].iov_base, iov[i].iov_len );            /*opal_output( 0, "copy at %p %d bytes [initial ptr %p]\n", base_pointer,              iov[i].iov_len, pConv->pBaseBuf );*/            pConv->bConverted += iov[i].iov_len;            base_pointer += iov[i].iov_len;        }        *max_data = pConv->bConverted - (*max_data);        return 0;    predefined_data_unpack:        iov[i].iov_len = pConv->local_size - pConv->bConverted;        MEMCPY( base_pointer, iov[i].iov_base, iov[i].iov_len );        /*opal_output( 0, "copy at %p %d bytes [initial ptr %p] *last*\n", base_pointer,          iov[i].iov_len, pConv->pBaseBuf );*/        pConv->bConverted = pConv->local_size;        *out_size = i + 1;        *max_data = pConv->bConverted - (*max_data);        pConv->flags |= CONVERTOR_COMPLETED;        return 1;    }    return pConv->fAdvance( pConv, iov, out_size, max_data );}static inlineint ompi_convertor_create_stack_with_pos_contig( ompi_convertor_t* pConvertor,                                                 size_t starting_point, const size_t* sizes ){
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -