📄 datatype_pack.c
字号:
/* -*- Mode: C; c-basic-offset:4 ; -*- *//* * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */#include "ompi_config.h"#include "ompi/datatype/convertor_internal.h"#include "ompi/datatype/datatype_internal.h"#if OMPI_ENABLE_DEBUGextern int ompi_pack_debug;#define DO_DEBUG(INST) if( ompi_pack_debug ) { INST }#else#define DO_DEBUG(INST)#endif /* OMPI_ENABLE_DEBUG */#include "ompi/datatype/datatype_checksum.h"#include "ompi/datatype/datatype_pack.h"#include "ompi/datatype/datatype_prototypes.h"#if defined(CHECKSUM)#define ompi_pack_homogeneous_contig_function ompi_pack_homogeneous_contig_checksum#define ompi_pack_homogeneous_contig_with_gaps_function ompi_pack_homogeneous_contig_with_gaps_checksum#define ompi_generic_simple_pack_function ompi_generic_simple_pack_checksum#else#define ompi_pack_homogeneous_contig_function ompi_pack_homogeneous_contig#define ompi_pack_homogeneous_contig_with_gaps_function ompi_pack_homogeneous_contig_with_gaps#define ompi_generic_simple_pack_function ompi_generic_simple_pack#endif /* defined(CHECKSUM) */#define IOVEC_MEM_LIMIT 8192/* the contig versions does not use the stack. They can easily retrieve * the status with just the informations from pConvertor->bConverted. */int32_tompi_pack_homogeneous_contig_function( ompi_convertor_t* pConv, struct iovec* iov, uint32_t* out_size, size_t* max_data ){ dt_stack_t* pStack = pConv->pStack; char *source_base = NULL; uint32_t iov_count; size_t length = pConv->local_size - pConv->bConverted, initial_amount = pConv->bConverted; ptrdiff_t initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; source_base = (pConv->pBaseBuf + initial_displ + pStack[0].disp + pStack[1].disp); /* There are some optimizations that can be done if the upper level * does not provide a buffer. */ for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { if( 0 == length ) break; if( (size_t)iov[iov_count].iov_len > length ) iov[iov_count].iov_len = length; if( iov[iov_count].iov_base == NULL ) { iov[iov_count].iov_base = source_base; COMPUTE_CSUM( iov[iov_count].iov_base, iov[iov_count].iov_len, pConv ); } else { /* contiguous data just memcpy the smallest data in the user buffer */ OMPI_DDT_SAFEGUARD_POINTER( source_base, iov[iov_count].iov_len, pConv->pBaseBuf, pConv->pDesc, pConv->count ); MEMCPY_CSUM( iov[iov_count].iov_base, source_base, iov[iov_count].iov_len, pConv ); } length -= iov[iov_count].iov_len; pConv->bConverted += iov[iov_count].iov_len; pStack[0].disp += iov[iov_count].iov_len; source_base += iov[iov_count].iov_len; } /* update the return value */ *max_data = pConv->bConverted - initial_amount; *out_size = iov_count; if( pConv->bConverted == pConv->local_size ) { pConv->flags |= CONVERTOR_COMPLETED; return 1; } return 0;}int32_tompi_pack_homogeneous_contig_with_gaps_function( ompi_convertor_t* pConv, struct iovec* iov, uint32_t* out_size, size_t* max_data ){ const ompi_datatype_t* pData = pConv->pDesc; dt_stack_t* pStack = pConv->pStack; char *user_memory, *packed_buffer; uint32_t i, index, iov_count; size_t max_allowed, total_bytes_converted = 0; ptrdiff_t extent; ptrdiff_t initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; extent = pData->ub - pData->lb; assert( (pData->flags & DT_FLAG_CONTIGUOUS) && ((ptrdiff_t)pData->size != extent) ); /* Limit the amount of packed data to the data left over on this convertor */ max_allowed = pConv->local_size - pConv->bConverted; if( max_allowed > (*max_data) ) max_allowed = (*max_data); i = (uint32_t)(pConv->bConverted / pData->size); /* how many we already pack */ /* There are some optimizations that can be done if the upper level * does not provide a buffer. */ user_memory = pConv->pBaseBuf + initial_displ + pStack[0].disp + pStack[1].disp; for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { if( 0 == max_allowed ) break; /* we're done this time */ if( iov[iov_count].iov_base == NULL ) { /* special case for small data. We avoid allocating memory if we * can fill the iovec directly with the address of the remaining * data. */ if( (uint32_t)pStack->count < ((*out_size) - iov_count) ) { pStack[1].count = pData->size - (pConv->bConverted % pData->size); for( index = iov_count; i < pConv->count; i++, index++ ) { iov[index].iov_base = user_memory; iov[index].iov_len = pStack[1].count; pStack[0].disp += extent; total_bytes_converted += pStack[1].count; pStack[1].disp = 0; /* reset it for the next round */ pStack[1].count = pData->size; user_memory = pConv->pBaseBuf + initial_displ + pStack[0].disp; COMPUTE_CSUM( iov[index].iov_base, iov[index].iov_len, pConv ); } *out_size = iov_count + index; pConv->bConverted += total_bytes_converted; *max_data = total_bytes_converted; pConv->flags |= CONVERTOR_COMPLETED; return 1; /* we're done */ } /* now special case for big contiguous data with gaps around */ if( pData->size >= IOVEC_MEM_LIMIT ) { /* as we dont have to copy any data, we can simply fill the iovecs * with data from the user data description. */ for( index = iov_count; (i < pConv->count) && (index < (*out_size)); i++, index++ ) { if( max_allowed < pData->size ) { iov[index].iov_base = user_memory; iov[index].iov_len = max_allowed; max_allowed = 0; COMPUTE_CSUM( iov[index].iov_base, iov[index].iov_len, pConv ); break; } else { iov[index].iov_base = user_memory; iov[index].iov_len = pData->size; user_memory += extent; COMPUTE_CSUM( iov[index].iov_base, (size_t)iov[index].iov_len, pConv ); } max_allowed -= iov[index].iov_len; total_bytes_converted += iov[index].iov_len; } *out_size = index; *max_data = total_bytes_converted; pConv->bConverted += total_bytes_converted; if( pConv->bConverted == pConv->local_size ) { pConv->flags |= CONVERTOR_COMPLETED; return 1; } return 0; } } { uint32_t counter; size_t done; packed_buffer = iov[iov_count].iov_base; done = pConv->bConverted - i * pData->size; /* partial data from last pack */ if( done != 0 ) { /* still some data to copy from the last time */ done = pData->size - done; OMPI_DDT_SAFEGUARD_POINTER( user_memory, done, pConv->pBaseBuf, pData, pConv->count ); MEMCPY_CSUM( packed_buffer, user_memory, done, pConv ); packed_buffer += done;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -