📄 convertor.c
字号:
dt_stack_t* pStack; /* pointer to the position on the stack */ const ompi_datatype_t* pData = pConvertor->pDesc; dt_elem_desc_t* pElems; uint32_t count; ptrdiff_t extent; pStack = pConvertor->pStack; /* The prepare function already make the selection on which data representation * we have to use: normal one or the optimized version ? */ pElems = pConvertor->use_desc->desc; count = (uint32_t)(starting_point / pData->size); extent = pData->ub - pData->lb; pStack[0].type = DT_LOOP; /* the first one is always the loop */ pStack[0].count = pConvertor->count - count; pStack[0].index = -1; pStack[0].disp = count * extent; /* now compute the number of pending bytes */ count = (uint32_t)(starting_point - count * pData->size); /* we save the current displacement starting from the begining * of this data. */ if( 0 == count ) { pStack[1].type = pElems->elem.common.type; pStack[1].count = pElems->elem.count; pStack[1].disp = pElems->elem.disp; } else { pStack[1].type = DT_BYTE; pStack[1].count = pData->size - count; pStack[1].disp = pData->true_lb + count; } pStack[1].index = 0; /* useless */ pConvertor->bConverted = starting_point; pConvertor->stack_pos = 1; assert( 0 == pConvertor->partial_length ); return OMPI_SUCCESS;}static inlineint ompi_convertor_create_stack_at_begining( ompi_convertor_t* convertor, const size_t* sizes ){ dt_stack_t* pStack = convertor->pStack; dt_elem_desc_t* pElems; convertor->stack_pos = 1; convertor->partial_length = 0; convertor->bConverted = 0; /* Fill the first position on the stack. This one correspond to the * last fake DT_END_LOOP that we add to the data representation and * allow us to move quickly inside the datatype when we have a count. */ pStack[0].index = -1; pStack[0].count = convertor->count; pStack[0].disp = 0; /* The prepare function already make the selection on which data representation * we have to use: normal one or the optimized version ? */ pElems = convertor->use_desc->desc; pStack[1].index = 0; pStack[1].disp = 0; if( pElems[0].elem.common.type == DT_LOOP ) { pStack[1].count = pElems[0].loop.loops; } else { pStack[1].count = pElems[0].elem.count; } return OMPI_SUCCESS;}int32_t ompi_convertor_set_position_nocheck( ompi_convertor_t* convertor, size_t* position ){ int32_t rc; /* * If we plan to rollback the convertor then first we have to set it * at the beginning. */ if( (0 == (*position)) || ((*position) < convertor->bConverted) ) { rc = ompi_convertor_create_stack_at_begining( convertor, ompi_ddt_local_sizes ); if( 0 == (*position) ) return rc; } if( convertor->flags & DT_FLAG_CONTIGUOUS ) { rc = ompi_convertor_create_stack_with_pos_contig( convertor, (*position), ompi_ddt_local_sizes ); } else { rc = ompi_convertor_generic_simple_position( convertor, position ); } *position = convertor->bConverted; return rc;}/* This macro will initialize a convertor based on a previously created convertor. The idea * is the move outside these function the heavy selection of architecture features for the convertors. * * I consider here that the convertor is clean, either never initialized or already cleanup. */#define OMPI_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ) \ { \ convertor->pBaseBuf = (char*)pUserBuf; \ convertor->count = count; \ \ /* Compute the local and remote sizes */ \ convertor->local_size = convertor->count * datatype->size; \ /* Grab the datatype part of the flags */ \ convertor->flags &= CONVERTOR_TYPE_MASK; \ convertor->flags |= (CONVERTOR_DATATYPE_MASK & datatype->flags); \ convertor->pDesc = (ompi_datatype_t*)datatype; \ \ /* If the data is empty we just mark the convertor as \ * completed. With this flag set the pack and unpack functions \ * will not do anything. In order to decrease the data \ * dependencies (and to speed-up this code) we will not test \ * the convertor->local_size but we can test the 2 components. \ */ \ if( (0 == convertor->count) || (0 == datatype->size) ) { \ convertor->flags |= CONVERTOR_COMPLETED; \ convertor->local_size = convertor->remote_size = 0; \ return OMPI_SUCCESS; \ } \ \ convertor->flags |= CONVERTOR_HOMOGENEOUS; \ if( convertor->remoteArch == ompi_mpi_local_arch ) { \ convertor->remote_size = convertor->local_size; \ convertor->use_desc = &(datatype->opt_desc); \ } else { \ ompi_convertor_master_t* master; \ int i; \ uint64_t bdt_mask = datatype->bdt_used; \ master = convertor->master; \ convertor->remote_size = 0; \ for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) { \ if( bdt_mask & ((uint64_t)1 << i) ) { \ convertor->remote_size += (datatype->btypes[i] * \ master->remote_sizes[i]);\ } \ } \ convertor->remote_size *= convertor->count; \ convertor->use_desc = &(datatype->desc); \ bdt_mask = datatype->bdt_used & master->hetero_mask; \ if( 0 != bdt_mask ) \ convertor->flags ^= CONVERTOR_HOMOGENEOUS; \ } \ assert( NULL != convertor->use_desc->desc ); \ /* For predefined datatypes (contiguous) do nothing more */ \ /* if checksum is enabled then always continue */ \ if( !(convertor->flags & CONVERTOR_WITH_CHECKSUM) && \ (convertor->flags & DT_FLAG_NO_GAPS) && \ ((convertor->flags & CONVERTOR_SEND) || \ (convertor->flags & CONVERTOR_HOMOGENEOUS)) ) { \ convertor->bConverted = 0; \ return OMPI_SUCCESS; \ } \ { \ uint32_t required_stack_length = datatype->btypes[DT_LOOP] + 1; \ \ if( required_stack_length > convertor->stack_size ) { \ convertor->stack_size = required_stack_length; \ convertor->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * \ convertor->stack_size ); \ } else { \ convertor->pStack = convertor->static_stack; \ convertor->stack_size = DT_STATIC_STACK_SIZE; \ } \ } \ ompi_convertor_create_stack_at_begining( convertor, ompi_ddt_local_sizes ); \ }int32_tompi_convertor_prepare_for_recv( ompi_convertor_t* convertor, const struct ompi_datatype_t* datatype, int32_t count, const void* pUserBuf ){ /* Here I should check that the data is not overlapping */ convertor->flags |= CONVERTOR_RECV; OMPI_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ); if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) {#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) { convertor->fAdvance = ompi_unpack_general_checksum; } else#endif if( convertor->pDesc->flags & DT_FLAG_CONTIGUOUS ) { convertor->fAdvance = ompi_unpack_homogeneous_contig_checksum; } else { convertor->fAdvance = ompi_generic_simple_unpack_checksum; } } else {#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) { convertor->fAdvance = ompi_unpack_general; } else#endif if( convertor->pDesc->flags & DT_FLAG_CONTIGUOUS ) { convertor->fAdvance = ompi_unpack_homogeneous_contig; } else { convertor->fAdvance = ompi_generic_simple_unpack; } } return OMPI_SUCCESS;}int32_tompi_convertor_prepare_for_send( ompi_convertor_t* convertor, const struct ompi_datatype_t* datatype, int32_t count, const void* pUserBuf ){ convertor->flags |= CONVERTOR_SEND; OMPI_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ); if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) { if( datatype->flags & DT_FLAG_CONTIGUOUS ) { if( ((datatype->ub - datatype->lb) == (ptrdiff_t)datatype->size) || (1 >= convertor->count) ) convertor->fAdvance = ompi_pack_homogeneous_contig_checksum; else convertor->fAdvance = ompi_pack_homogeneous_contig_with_gaps_checksum; } else { convertor->fAdvance = ompi_generic_simple_pack_checksum; } } else { if( datatype->flags & DT_FLAG_CONTIGUOUS ) { if( ((datatype->ub - datatype->lb) == (ptrdiff_t)datatype->size) || (1 >= convertor->count) ) convertor->fAdvance = ompi_pack_homogeneous_contig; else convertor->fAdvance = ompi_pack_homogeneous_contig_with_gaps; } else { convertor->fAdvance = ompi_generic_simple_pack; } } return OMPI_SUCCESS;}/* * These functions can be used in order to create an IDENTICAL copy of one convertor. In this * context IDENTICAL means that the datatype and count and all other properties of the basic * convertor get replicated on this new convertor. However, the references to the datatype * are not increased. This function take special care about the stack. If all the cases the * stack is created with the correct number of entries but if the copy_stack is true (!= 0) * then the content of the old stack is copied on the new one. The result will be a convertor * ready to use starting from the old position. If copy_stack is false then the convertor * is created with a empty stack (you have to use ompi_convertor_set_position before using it). */int ompi_convertor_clone( const ompi_convertor_t* source, ompi_convertor_t* destination, int32_t copy_stack ){ destination->remoteArch = source->remoteArch; destination->flags = source->flags | CONVERTOR_CLONE; destination->pDesc = source->pDesc; destination->use_desc = source->use_desc; destination->count = source->count; destination->pBaseBuf = source->pBaseBuf; destination->fAdvance = source->fAdvance; destination->master = source->master; destination->local_size = source->local_size; destination->remote_size = source->remote_size; /* create the stack */ if( source->stack_size > DT_STATIC_STACK_SIZE ) { destination->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * source->stack_size ); } else { destination->pStack = destination->static_stack; } destination->stack_size = source->stack_size; /* initialize the stack */ if( 0 == copy_stack ) { destination->bConverted = -1; destination->stack_pos = -1; } else { memcpy( destination->pStack, source->pStack, sizeof(dt_stack_t) * (source->stack_pos+1) ); destination->bConverted = source->bConverted; destination->stack_pos = source->stack_pos; } return OMPI_SUCCESS;}void ompi_convertor_dump( ompi_convertor_t* convertor ){ printf( "Convertor %p count %d stack position %d bConverted %ld\n", (void*)convertor, convertor->count, convertor->stack_pos, (unsigned long)convertor->bConverted ); printf( "\tlocal_size %ld remote_size %ld flags %X stack_size %d pending_length %d\n", (unsigned long)convertor->local_size, (unsigned long)convertor->remote_size, convertor->flags, convertor->stack_size, convertor->partial_length ); ompi_ddt_dump( convertor->pDesc ); printf( "Actual stack representation\n" ); ompi_ddt_dump_stack( convertor->pStack, convertor->stack_pos, convertor->pDesc->desc.desc, convertor->pDesc->name );}void ompi_ddt_dump_stack( const dt_stack_t* pStack, int stack_pos, const union dt_elem_desc* pDesc, const char* name ){ opal_output( 0, "\nStack %p stack_pos %d name %s\n", (void*)pStack, stack_pos, name ); for( ; stack_pos >= 0; stack_pos-- ) { opal_output( 0, "%d: pos %d count %d disp %ld ", stack_pos, pStack[stack_pos].index, (int)pStack[stack_pos].count, (long)pStack[stack_pos].disp ); if( pStack->index != -1 ) opal_output( 0, "\t[desc count %d disp %ld extent %ld]\n", pDesc[pStack[stack_pos].index].elem.count, (long)pDesc[pStack[stack_pos].index].elem.disp, (long)pDesc[pStack[stack_pos].index].elem.extent ); else opal_output( 0, "\n" ); } opal_output( 0, "\n" );}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -