⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 convertor.c

📁 MPI stands for the Message Passing Interface. Written by the MPI Forum (a large committee comprising
💻 C
📖 第 1 页 / 共 2 页
字号:
    dt_stack_t* pStack;   /* pointer to the position on the stack */    const ompi_datatype_t* pData = pConvertor->pDesc;    dt_elem_desc_t* pElems;    uint32_t count;    ptrdiff_t extent;    pStack = pConvertor->pStack;    /* The prepare function already make the selection on which data representation     * we have to use: normal one or the optimized version ?     */    pElems = pConvertor->use_desc->desc;    count = (uint32_t)(starting_point / pData->size);    extent = pData->ub - pData->lb;    pStack[0].type     = DT_LOOP;  /* the first one is always the loop */    pStack[0].count    = pConvertor->count - count;    pStack[0].index    = -1;    pStack[0].disp     = count * extent;    /* now compute the number of pending bytes */    count = (uint32_t)(starting_point - count * pData->size);    /* we save the current displacement starting from the begining     * of this data.     */    if( 0 == count ) {        pStack[1].type     = pElems->elem.common.type;        pStack[1].count    = pElems->elem.count;        pStack[1].disp     = pElems->elem.disp;    } else {        pStack[1].type  = DT_BYTE;        pStack[1].count = pData->size - count;        pStack[1].disp  = pData->true_lb + count;    }    pStack[1].index    = 0;  /* useless */    pConvertor->bConverted = starting_point;    pConvertor->stack_pos = 1;    assert( 0 == pConvertor->partial_length );    return OMPI_SUCCESS;}static inlineint ompi_convertor_create_stack_at_begining( ompi_convertor_t* convertor,                                             const size_t* sizes ){    dt_stack_t* pStack = convertor->pStack;    dt_elem_desc_t* pElems;    convertor->stack_pos      = 1;    convertor->partial_length = 0;    convertor->bConverted     = 0;    /* Fill the first position on the stack. This one correspond to the     * last fake DT_END_LOOP that we add to the data representation and     * allow us to move quickly inside the datatype when we have a count.     */    pStack[0].index = -1;    pStack[0].count = convertor->count;    pStack[0].disp  = 0;    /* The prepare function already make the selection on which data representation     * we have to use: normal one or the optimized version ?     */    pElems = convertor->use_desc->desc;    pStack[1].index = 0;    pStack[1].disp = 0;    if( pElems[0].elem.common.type == DT_LOOP ) {        pStack[1].count = pElems[0].loop.loops;    } else {        pStack[1].count = pElems[0].elem.count;    }    return OMPI_SUCCESS;}int32_t ompi_convertor_set_position_nocheck( ompi_convertor_t* convertor,                                             size_t* position ){    int32_t rc;    /*     * If we plan to rollback the convertor then first we have to set it     * at the beginning.     */    if( (0 == (*position)) || ((*position) < convertor->bConverted) ) {        rc = ompi_convertor_create_stack_at_begining( convertor, ompi_ddt_local_sizes );        if( 0 == (*position) ) return rc;    }    if( convertor->flags & DT_FLAG_CONTIGUOUS ) {        rc = ompi_convertor_create_stack_with_pos_contig( convertor, (*position),                                                          ompi_ddt_local_sizes );    } else {        rc = ompi_convertor_generic_simple_position( convertor, position );    }    *position = convertor->bConverted;    return rc;}/* This macro will initialize a convertor based on a previously created convertor. The idea * is the move outside these function the heavy selection of architecture features for the convertors. * * I consider here that the convertor is clean, either never initialized or already cleanup. */#define OMPI_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf )  \    {                                                                   \        convertor->pBaseBuf        = (char*)pUserBuf;                   \        convertor->count           = count;                             \                                                                        \        /* Compute the local and remote sizes */                        \        convertor->local_size = convertor->count * datatype->size;      \        /* Grab the datatype part of the flags */                       \        convertor->flags         &= CONVERTOR_TYPE_MASK;                \        convertor->flags         |= (CONVERTOR_DATATYPE_MASK & datatype->flags); \        convertor->pDesc          = (ompi_datatype_t*)datatype;         \                                                                        \        /* If the data is empty we just mark the convertor as           \         * completed. With this flag set the pack and unpack functions  \         * will not do anything. In order to decrease the data          \         * dependencies (and to speed-up this code) we will not test    \         * the convertor->local_size but we can test the 2 components.  \         */                                                             \        if( (0 == convertor->count) || (0 == datatype->size) ) {        \            convertor->flags |= CONVERTOR_COMPLETED;                    \            convertor->local_size = convertor->remote_size = 0;         \            return OMPI_SUCCESS;                                        \        }                                                               \                                                                        \        convertor->flags |= CONVERTOR_HOMOGENEOUS;                      \        if( convertor->remoteArch == ompi_mpi_local_arch ) {            \            convertor->remote_size = convertor->local_size;             \            convertor->use_desc = &(datatype->opt_desc);                \        } else {                                                        \            ompi_convertor_master_t* master;                            \            int i;                                                      \            uint64_t bdt_mask = datatype->bdt_used;                     \            master = convertor->master;                                 \            convertor->remote_size = 0;                                 \            for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) {            \                if( bdt_mask & ((uint64_t)1 << i) ) {                   \                    convertor->remote_size += (datatype->btypes[i] *    \                                               master->remote_sizes[i]);\                }                                                       \            }                                                           \            convertor->remote_size *= convertor->count;                 \            convertor->use_desc = &(datatype->desc);                    \            bdt_mask = datatype->bdt_used & master->hetero_mask;        \            if( 0 != bdt_mask )                                         \                convertor->flags ^= CONVERTOR_HOMOGENEOUS;              \        }                                                               \        assert( NULL != convertor->use_desc->desc );                    \        /* For predefined datatypes (contiguous) do nothing more */     \        /* if checksum is enabled then always continue */               \        if( !(convertor->flags & CONVERTOR_WITH_CHECKSUM) &&            \            (convertor->flags & DT_FLAG_NO_GAPS) &&                     \            ((convertor->flags & CONVERTOR_SEND) ||                     \             (convertor->flags & CONVERTOR_HOMOGENEOUS)) ) {            \            convertor->bConverted = 0;                                  \            return OMPI_SUCCESS;                                        \        }                                                               \        {                                                               \            uint32_t required_stack_length = datatype->btypes[DT_LOOP] + 1; \                                                                        \            if( required_stack_length > convertor->stack_size ) {       \                convertor->stack_size = required_stack_length;          \                convertor->pStack     = (dt_stack_t*)malloc(sizeof(dt_stack_t) * \                                                            convertor->stack_size ); \            } else {                                                    \                convertor->pStack = convertor->static_stack;            \                convertor->stack_size = DT_STATIC_STACK_SIZE;           \            }                                                           \        }                                                               \        ompi_convertor_create_stack_at_begining( convertor, ompi_ddt_local_sizes ); \    }int32_tompi_convertor_prepare_for_recv( ompi_convertor_t* convertor,                                 const struct ompi_datatype_t* datatype,                                 int32_t count,                                 const void* pUserBuf ){    /* Here I should check that the data is not overlapping */    convertor->flags      |= CONVERTOR_RECV;    OMPI_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf );    if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) {#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT        if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) {            convertor->fAdvance = ompi_unpack_general_checksum;        } else#endif        if( convertor->pDesc->flags & DT_FLAG_CONTIGUOUS ) {            convertor->fAdvance = ompi_unpack_homogeneous_contig_checksum;        } else {            convertor->fAdvance = ompi_generic_simple_unpack_checksum;        }    } else {#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT        if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) {            convertor->fAdvance = ompi_unpack_general;        } else#endif        if( convertor->pDesc->flags & DT_FLAG_CONTIGUOUS ) {            convertor->fAdvance = ompi_unpack_homogeneous_contig;        } else {            convertor->fAdvance = ompi_generic_simple_unpack;        }    }    return OMPI_SUCCESS;}int32_tompi_convertor_prepare_for_send( ompi_convertor_t* convertor,                                 const struct ompi_datatype_t* datatype,                                 int32_t count,                                 const void* pUserBuf ){    convertor->flags            |= CONVERTOR_SEND;    OMPI_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf );    if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) {        if( datatype->flags & DT_FLAG_CONTIGUOUS ) {            if( ((datatype->ub - datatype->lb) == (ptrdiff_t)datatype->size)                 || (1 >= convertor->count) )                convertor->fAdvance = ompi_pack_homogeneous_contig_checksum;            else                convertor->fAdvance = ompi_pack_homogeneous_contig_with_gaps_checksum;        } else {            convertor->fAdvance = ompi_generic_simple_pack_checksum;        }    } else {        if( datatype->flags & DT_FLAG_CONTIGUOUS ) {            if( ((datatype->ub - datatype->lb) == (ptrdiff_t)datatype->size)                 || (1 >= convertor->count) )                convertor->fAdvance = ompi_pack_homogeneous_contig;            else                convertor->fAdvance = ompi_pack_homogeneous_contig_with_gaps;        } else {            convertor->fAdvance = ompi_generic_simple_pack;        }    }    return OMPI_SUCCESS;}/* * These functions can be used in order to create an IDENTICAL copy of one convertor. In this * context IDENTICAL means that the datatype and count and all other properties of the basic * convertor get replicated on this new convertor. However, the references to the datatype * are not increased. This function take special care about the stack. If all the cases the * stack is created with the correct number of entries but if the copy_stack is true (!= 0) * then the content of the old stack is copied on the new one. The result will be a convertor * ready to use starting from the old position. If copy_stack is false then the convertor * is created with a empty stack (you have to use ompi_convertor_set_position before using it). */int ompi_convertor_clone( const ompi_convertor_t* source,                          ompi_convertor_t* destination,                          int32_t copy_stack ){    destination->remoteArch        = source->remoteArch;    destination->flags             = source->flags | CONVERTOR_CLONE;    destination->pDesc             = source->pDesc;    destination->use_desc          = source->use_desc;    destination->count             = source->count;    destination->pBaseBuf          = source->pBaseBuf;    destination->fAdvance          = source->fAdvance;    destination->master            = source->master;    destination->local_size        = source->local_size;    destination->remote_size       = source->remote_size;    /* create the stack */    if( source->stack_size > DT_STATIC_STACK_SIZE ) {        destination->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * source->stack_size );    } else {        destination->pStack = destination->static_stack;    }    destination->stack_size = source->stack_size;    /* initialize the stack */    if( 0 == copy_stack ) {        destination->bConverted = -1;        destination->stack_pos  = -1;    } else {        memcpy( destination->pStack, source->pStack, sizeof(dt_stack_t) * (source->stack_pos+1) );        destination->bConverted = source->bConverted;        destination->stack_pos  = source->stack_pos;    }    return OMPI_SUCCESS;}void ompi_convertor_dump( ompi_convertor_t* convertor ){    printf( "Convertor %p count %d stack position %d bConverted %ld\n", (void*)convertor,            convertor->count, convertor->stack_pos, (unsigned long)convertor->bConverted );    printf( "\tlocal_size %ld remote_size %ld flags %X stack_size %d pending_length %d\n",            (unsigned long)convertor->local_size, (unsigned long)convertor->remote_size,            convertor->flags, convertor->stack_size, convertor->partial_length );    ompi_ddt_dump( convertor->pDesc );    printf( "Actual stack representation\n" );    ompi_ddt_dump_stack( convertor->pStack, convertor->stack_pos,                         convertor->pDesc->desc.desc, convertor->pDesc->name );}void ompi_ddt_dump_stack( const dt_stack_t* pStack, int stack_pos,                          const union dt_elem_desc* pDesc, const char* name ){    opal_output( 0, "\nStack %p stack_pos %d name %s\n", (void*)pStack, stack_pos, name );    for( ; stack_pos >= 0; stack_pos-- ) {        opal_output( 0, "%d: pos %d count %d disp %ld ", stack_pos, pStack[stack_pos].index,                     (int)pStack[stack_pos].count, (long)pStack[stack_pos].disp );        if( pStack->index != -1 )            opal_output( 0, "\t[desc count %d disp %ld extent %ld]\n",                         pDesc[pStack[stack_pos].index].elem.count,                         (long)pDesc[pStack[stack_pos].index].elem.disp,                         (long)pDesc[pStack[stack_pos].index].elem.extent );        else            opal_output( 0, "\n" );    }    opal_output( 0, "\n" );}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -