📄 typesize_support.c
字号:
/* -*- Mode: C; c-basic-offset:4 ; -*- *//* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. *//* Notes: * - Not used my MPICH2 * - "alignsz" value for non-structs is based simply on element * size. This seems nonintuitive, but so far is working for * MPICH2. If we start to get weird alignment problems with * complex structs of non-basics, that would be a place to look. * - Depends on configure tests that define (if available): * HAVE_LONG_LONG_INT * HAVE_LONG_DOUBLE */#include "./dataloop.h"#include "typesize_support.h"static void DLOOP_Type_calc_footprint_struct(MPI_Datatype type, int combiner, int *ints, MPI_Aint *aints, MPI_Datatype *types, DLOOP_Type_footprint *tfp);static int DLOOP_Named_type_alignsize(MPI_Datatype type, MPI_Aint disp);static int DLOOP_Structalign_integer_max(void);static int DLOOP_Structalign_float_max(void);static int DLOOP_Structalign_double_max(void);static int DLOOP_Structalign_long_double_max(void);static int DLOOP_Structalign_double_position(void);static int DLOOP_Structalign_llint_position(void);/* LB/UB calculation helper macros, from MPICH2 *//* DLOOP_DATATYPE_CONTIG_LB_UB() * * Determines the new LB and UB for a block of old types given the * old type's LB, UB, and extent, and a count of these types in the * block. * * Note: if the displacement is non-zero, the DLOOP_DATATYPE_BLOCK_LB_UB() * should be used instead (see below). */#define DLOOP_DATATYPE_CONTIG_LB_UB(cnt_, \ old_lb_, \ old_ub_, \ old_extent_, \ lb_, \ ub_) \ { \ if (cnt_ == 0) { \ lb_ = old_lb_; \ ub_ = old_ub_; \ } \ else if (old_ub_ >= old_lb_) { \ lb_ = old_lb_; \ ub_ = old_ub_ + (old_extent_) * (cnt_ - 1); \ } \ else /* negative extent */ { \ lb_ = old_lb_ + (old_extent_) * (cnt_ - 1); \ ub_ = old_ub_; \ } \ }/* DLOOP_DATATYPE_VECTOR_LB_UB() * * Determines the new LB and UB for a vector of blocks of old types * given the old type's LB, UB, and extent, and a count, stride, and * blocklen describing the vectorization. */#define DLOOP_DATATYPE_VECTOR_LB_UB(cnt_, \ stride_, \ blklen_, \ old_lb_, \ old_ub_, \ old_extent_, \ lb_, \ ub_) \ { \ if (cnt_ == 0 || blklen_ == 0) { \ lb_ = old_lb_; \ ub_ = old_ub_; \ } \ else if (stride_ >= 0 && (old_extent_) >= 0) { \ lb_ = old_lb_; \ ub_ = old_ub_ + (old_extent_) * ((blklen_) - 1) + \ (stride_) * ((cnt_) - 1); \ } \ else if (stride_ < 0 && (old_extent_) >= 0) { \ lb_ = old_lb_ + (stride_) * ((cnt_) - 1); \ ub_ = old_ub_ + (old_extent_) * ((blklen_) - 1); \ } \ else if (stride_ >= 0 && (old_extent_) < 0) { \ lb_ = old_lb_ + (old_extent_) * ((blklen_) - 1); \ ub_ = old_ub_ + (stride_) * ((cnt_) - 1); \ } \ else { \ lb_ = old_lb_ + (old_extent_) * ((blklen_) - 1) + \ (stride_) * ((cnt_) - 1); \ ub_ = old_ub_; \ } \ }/* DLOOP_DATATYPE_BLOCK_LB_UB() * * Determines the new LB and UB for a block of old types given the LB, * UB, and extent of the old type as well as a new displacement and count * of types. * * Note: we need the extent here in addition to the lb and ub because the * extent might have some padding in it that we need to take into account. */#define DLOOP_DATATYPE_BLOCK_LB_UB(cnt_, \ disp_, \ old_lb_, \ old_ub_, \ old_extent_, \ lb_, \ ub_) \ { \ if (cnt_ == 0) { \ lb_ = old_lb_ + (disp_); \ ub_ = old_ub_ + (disp_); \ } \ else if (old_ub_ >= old_lb_) { \ lb_ = old_lb_ + (disp_); \ ub_ = old_ub_ + (disp_) + (old_extent_) * ((cnt_) - 1); \ } \ else /* negative extent */ { \ lb_ = old_lb_ + (disp_) + (old_extent_) * ((cnt_) - 1); \ ub_ = old_ub_ + (disp_); \ } \ }void PREPEND_PREFIX(Type_calc_footprint)(MPI_Datatype type, DLOOP_Type_footprint *tfp){ int mpi_errno; int nr_ints, nr_aints, nr_types, combiner; int *ints; MPI_Aint *aints; MPI_Datatype *types; /* used to store parameters for constituent types */ DLOOP_Offset size = 0, lb = 0, ub = 0, true_lb = 0, true_ub = 0; DLOOP_Offset extent = 0, alignsz; int has_sticky_lb, has_sticky_ub; /* used for vector/hvector/hvector_integer calculations */ DLOOP_Offset stride; /* used for indexed/hindexed calculations */ DLOOP_Offset disp; /* used for calculations on types with more than one block of data */ DLOOP_Offset i, min_lb, max_ub, ntypes, tmp_lb, tmp_ub; /* used for processing subarray and darray types */ int ndims; MPI_Datatype tmptype; mpi_errno = PMPI_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner); DLOOP_Assert(mpi_errno == MPI_SUCCESS); if (combiner == MPI_COMBINER_NAMED) { int mpisize; MPI_Aint mpiextent; PMPI_Type_size(type, &mpisize); PMPI_Type_extent(type, &mpiextent); tfp->size = (DLOOP_Offset) mpisize; tfp->lb = 0; tfp->ub = (DLOOP_Offset) mpiextent; tfp->true_lb = 0; tfp->true_ub = (DLOOP_Offset) mpiextent; tfp->extent = (DLOOP_Offset) mpiextent; tfp->alignsz = DLOOP_Named_type_alignsize(type, (MPI_Aint) 0); tfp->has_sticky_lb = (type == MPI_LB) ? 1 : 0; tfp->has_sticky_ub = (type == MPI_UB) ? 1 : 0; goto clean_exit; } /* get access to contents; need it immediately to check for zero count */ PREPEND_PREFIX(Type_access_contents)(type, &ints, &aints, &types); /* knock out all the zero count cases */ if ((combiner == MPI_COMBINER_CONTIGUOUS || combiner == MPI_COMBINER_VECTOR || combiner == MPI_COMBINER_HVECTOR_INTEGER || combiner == MPI_COMBINER_HVECTOR || combiner == MPI_COMBINER_INDEXED_BLOCK || combiner == MPI_COMBINER_INDEXED || combiner == MPI_COMBINER_HINDEXED_INTEGER || combiner == MPI_COMBINER_STRUCT_INTEGER || combiner == MPI_COMBINER_STRUCT) && ints[0] == 0) { tfp->size = tfp->lb = tfp->ub = tfp->extent = tfp->alignsz = 0; tfp->true_lb = tfp->true_ub = 0; tfp->has_sticky_lb = tfp->has_sticky_ub = 0; goto clean_exit; } if (combiner != MPI_COMBINER_STRUCT && combiner != MPI_COMBINER_STRUCT_INTEGER) { DLOOP_Type_footprint cfp; PREPEND_PREFIX(Type_calc_footprint)(types[0], &cfp); size = cfp.size; lb = cfp.lb; ub = cfp.ub; true_lb = cfp.true_lb; true_ub = cfp.true_ub; extent = cfp.extent; alignsz = cfp.alignsz; has_sticky_lb = cfp.has_sticky_lb; has_sticky_ub = cfp.has_sticky_ub; /* initialize some common values so we don't have to assign * them in every case below. */ tfp->alignsz = alignsz; tfp->has_sticky_lb = has_sticky_lb; tfp->has_sticky_ub = has_sticky_ub; } switch(combiner) { case MPI_COMBINER_DUP: tfp->size = size; tfp->lb = lb; tfp->ub = ub; tfp->true_lb = true_lb; tfp->true_ub = true_ub; tfp->extent = extent; break; case MPI_COMBINER_RESIZED: tfp->size = size; tfp->lb = aints[0]; /* lb */ tfp->ub = aints[0] + aints[1]; tfp->true_lb = true_lb; tfp->true_ub = true_ub; tfp->extent = aints[1]; /* extent */ tfp->has_sticky_lb = 1; tfp->has_sticky_ub = 1; break; case MPI_COMBINER_CONTIGUOUS: DLOOP_DATATYPE_CONTIG_LB_UB(ints[0] /* count */, lb, ub, extent, tfp->lb, tfp->ub); tfp->true_lb = tfp->lb + (true_lb - lb); tfp->true_ub = tfp->ub + (true_ub - ub); tfp->size = ints[0] * size; tfp->extent = tfp->ub - tfp->lb; break; case MPI_COMBINER_VECTOR: case MPI_COMBINER_HVECTOR: case MPI_COMBINER_HVECTOR_INTEGER: if (combiner == MPI_COMBINER_VECTOR) stride = ints[2] * extent; else if (combiner == MPI_COMBINER_HVECTOR) stride = aints[0]; else /* HVECTOR_INTEGER */ stride = ints[2]; DLOOP_DATATYPE_VECTOR_LB_UB(ints[0] /* count */, stride /* stride in bytes */, ints[1] /* blklen */, lb, ub, extent, tfp->lb, tfp->ub); tfp->true_lb = tfp->lb + (true_lb - lb); tfp->true_ub = tfp->ub + (true_ub - ub); tfp->size = ints[0] * ints[1] * size; tfp->extent = tfp->ub - tfp->lb; break; case MPI_COMBINER_INDEXED_BLOCK: /* prime min_lb and max_ub */ DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */, ints[2] * extent /* disp */, lb, ub, extent, min_lb, max_ub); for (i=1; i < ints[0]; i++) { DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */, ints[i+2] * extent /* disp */, lb, ub, extent, tmp_lb, tmp_ub); if (tmp_lb < min_lb) min_lb = tmp_lb; if (tmp_ub > max_ub) max_ub = tmp_ub; } tfp->size = ints[0] * ints[1] * size; tfp->lb = min_lb; tfp->ub = max_ub; tfp->true_lb = min_lb + (true_lb - lb); tfp->true_ub = max_ub + (true_ub - ub); tfp->extent = tfp->ub - tfp->lb; break; case MPI_COMBINER_INDEXED: case MPI_COMBINER_HINDEXED_INTEGER: case MPI_COMBINER_HINDEXED: /* find first non-zero blocklength element */ for (i=0; i < ints[0] && ints[i+1] == 0; i++); if (i == ints[0]) { /* all zero blocklengths */ tfp->size = tfp->lb = tfp->ub = tfp->extent = tfp->alignsz = 0; tfp->has_sticky_lb = tfp->has_sticky_ub = 0; } else { /* prime min_lb, max_ub, count */ ntypes = ints[i+1]; if (combiner == MPI_COMBINER_INDEXED) disp = ints[ints[0]+i+1] * extent; else if (combiner == MPI_COMBINER_HINDEXED_INTEGER) disp = ints[ints[0]+i+1]; else /* MPI_COMBINER_HINDEXED */ disp = aints[i]; DLOOP_DATATYPE_BLOCK_LB_UB(ints[i+1] /* blklen */, disp, lb, ub, extent, min_lb, max_ub); for (i++; i < ints[0]; i++) { /* skip zero blocklength elements */ if (ints[i+1] == 0) continue; ntypes += ints[i+1]; if (combiner == MPI_COMBINER_INDEXED) disp = ints[ints[0]+i+1] * extent; else if (combiner == MPI_COMBINER_HINDEXED_INTEGER) disp = ints[ints[0]+i+1]; else /* MPI_COMBINER_HINDEXED */ disp = aints[i]; DLOOP_DATATYPE_BLOCK_LB_UB(ints[i+1], disp, lb, ub, extent, tmp_lb, tmp_ub); if (tmp_lb < min_lb) min_lb = tmp_lb; if (tmp_ub > max_ub) max_ub = tmp_ub; } tfp->size = ntypes * size; tfp->lb = min_lb; tfp->ub = max_ub; tfp->true_lb = min_lb + (true_lb - lb); tfp->true_ub = max_ub + (true_ub - ub); tfp->extent = tfp->ub - tfp->lb; } break; case MPI_COMBINER_STRUCT_INTEGER: DLOOP_Assert(combiner != MPI_COMBINER_STRUCT_INTEGER); break; case MPI_COMBINER_STRUCT: /* sufficiently complicated to pull out into separate fn */ DLOOP_Type_calc_footprint_struct(type, combiner, ints, aints, types, tfp); break; case MPI_COMBINER_SUBARRAY: ndims = ints[0]; PREPEND_PREFIX(Type_convert_subarray)(ndims, &ints[1] /* sizes */, &ints[1+ndims] /* subsz */, &ints[1+2*ndims] /* strts */, ints[1+3*ndims] /* order */, types[0], &tmptype); PREPEND_PREFIX(Type_calc_footprint)(tmptype, tfp); PMPI_Type_free(&tmptype); break; case MPI_COMBINER_DARRAY: ndims = ints[2]; PREPEND_PREFIX(Type_convert_darray)(ints[0] /* size */, ints[1] /* rank */, ndims, &ints[3] /* gsizes */, &ints[3+ndims] /*distribs */, &ints[3+2*ndims] /* dargs */, &ints[3+3*ndims] /* psizes */, ints[3+4*ndims] /* order */, types[0], &tmptype); PREPEND_PREFIX(Type_calc_footprint)(tmptype, tfp); PMPI_Type_free(&tmptype); break; case MPI_COMBINER_F90_REAL: case MPI_COMBINER_F90_COMPLEX: case MPI_COMBINER_F90_INTEGER: default: DLOOP_Assert(0); break; } clean_exit: PREPEND_PREFIX(Type_release_contents)(type, &ints, &aints, &types); return;}/* DLOOP_Type_calc_footprint_struct - calculate size, lb, ub, extent, and alignsize for a struct type*/static void DLOOP_Type_calc_footprint_struct(MPI_Datatype type, int struct_combiner, int *ints, MPI_Aint *aints, MPI_Datatype *types, DLOOP_Type_footprint *tfp){ int i, found_sticky_lb = 0, found_sticky_ub = 0, first_iter = 1; DLOOP_Offset tmp_lb, tmp_ub, tmp_extent, tmp_true_lb, tmp_true_ub; DLOOP_Offset max_alignsz = 0, tmp_size = 0, min_lb = 0, max_ub = 0; DLOOP_Offset min_true_lb = 0, max_true_ub = 0; int nr_ints, nr_aints, nr_types, combiner; /* used to store parameters for constituent types */ DLOOP_Type_footprint cfp; DLOOP_Offset size, lb, ub, true_lb, true_ub, extent, alignsz; int sticky_lb, sticky_ub; /* find first non-zero blocklength element */ for (i=0; i < ints[0] && ints[i+1] == 0; i++); if (i == ints[0]) /* all zero-length blocks */ { tfp->size = tfp->lb = tfp->ub = tfp->extent = tfp->alignsz = 0; tfp->has_sticky_lb = tfp->has_sticky_ub = 0; return; } for (; i < ints[0]; i++) { /* skip zero blocklength elements */ if (ints[i+1] == 0) continue; PMPI_Type_get_envelope(types[i], &nr_ints, &nr_aints, &nr_types, &combiner); /* opt: could just inline assignments for combiner == NAMED case */ PREPEND_PREFIX(Type_calc_footprint)(types[i], &cfp); size = cfp.size; lb = cfp.lb; ub = cfp.ub; true_lb = cfp.true_lb; true_ub = cfp.true_ub; extent = cfp.extent; alignsz = cfp.alignsz; sticky_lb = cfp.has_sticky_lb; sticky_ub = cfp.has_sticky_ub; DLOOP_DATATYPE_BLOCK_LB_UB(ints[i+1] /* blklen */, aints[i] /* disp */, lb, ub, extent, tmp_lb, tmp_ub); tmp_true_lb = tmp_lb + (true_lb - lb); tmp_true_ub = tmp_ub + (true_ub - ub); tmp_size += size * ints[i+1];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -