📄 gen_type_blockindexed.c
字号:
/* -*- Mode: C; c-basic-offset:4 ; -*- *//* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */#include <mpiimpl.h>#include <mpid_dataloop.h>int MPIDI_Type_blockindexed_count_contig(int count, int blklen, void *disp_array, int dispinbytes, MPI_Aint old_extent);static void DLOOP_Type_blockindexed_array_copy(int count, void *disp_array, MPI_Aint *out_disp_array, int dispinbytes, MPI_Aint old_extent);/*@ Dataloop_create_blockindexed - create blockindexed dataloop Arguments:+ int count. void *displacement_array. int displacement_in_bytes (boolean). MPI_Datatype old_type. MPID_Dataloop **output_dataloop_ptr. int output_dataloop_size. int output_dataloop_depth- int flags.N Errors.N Returns 0 on success, -1 on failure.@*/int PREPEND_PREFIX(Dataloop_create_blockindexed)(int count, int blklen, void *disp_array, int dispinbytes, DLOOP_Type oldtype, DLOOP_Dataloop **dlp_p, int *dlsz_p, int *dldepth_p, int flags){ int err, is_builtin, is_vectorizable = 1; int i, new_loop_sz, old_loop_depth; int contig_count; DLOOP_Offset old_extent, eff_disp0, eff_disp1, last_stride; DLOOP_Dataloop *new_dlp; /* if count or blklen are zero, handle with contig code, call it a int */ if (count == 0 || blklen == 0) { err = PREPEND_PREFIX(Dataloop_create_contiguous)(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flags); return err; } is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1; if (is_builtin) { old_extent = MPID_Datatype_get_basic_size(oldtype); old_loop_depth = 0; } else { DLOOP_Handle_get_extent_macro(oldtype, old_extent); DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, 0); } /* TODO: WHAT DO WE DO ABOUT THIS? */ contig_count = MPIDI_Type_blockindexed_count_contig(count, blklen, disp_array, dispinbytes, old_extent); /* optimization: * * if contig_count == 1 and block starts at displacement 0, * store it as a contiguous rather than a blockindexed dataloop. */ if ((contig_count == 1) && ((!dispinbytes && ((int *) disp_array)[0] == 0) || (dispinbytes && ((DLOOP_Offset *) disp_array)[0] == 0))) { err = PREPEND_PREFIX(Dataloop_create_contiguous)(count * blklen, oldtype, dlp_p, dlsz_p, dldepth_p, flags); return err; } /* optimization: * * if contig_count == 1 store it as a blockindexed with one * element rather than as a lot of individual blocks. */ if (contig_count == 1) { /* adjust count and blklen and drop through */ blklen *= count; count = 1; } /* optimization: * * if displacements start at zero and result in a fixed stride, * store it as a vector rather than a blockindexed dataloop. */ eff_disp0 = (dispinbytes) ? ((DLOOP_Offset *) disp_array)[0] : (((MPI_Aint) ((int *) disp_array)[0]) * old_extent); if (count > 1 && eff_disp0 == (DLOOP_Offset) 0) { eff_disp1 = (dispinbytes) ? ((DLOOP_Offset *) disp_array)[1] : (((DLOOP_Offset) ((int *) disp_array)[1]) * old_extent); last_stride = eff_disp1 - eff_disp0; for (i=2; i < count; i++) { eff_disp0 = eff_disp1; eff_disp1 = (dispinbytes) ? ((DLOOP_Offset *) disp_array)[i] : (((DLOOP_Offset) ((int *) disp_array)[1]) * old_extent); if (eff_disp1 - eff_disp0 != last_stride) { is_vectorizable = 0; break; } } if (is_vectorizable) { err = PREPEND_PREFIX(Dataloop_create_vector)(count, blklen, last_stride, 1, /* strideinbytes */ oldtype, dlp_p, dlsz_p, dldepth_p, flags); return err; } } /* TODO: optimization: * * if displacements result in a fixed stride, but first displacement * is not zero, store it as a blockindexed (blklen == 1) of a vector. */ /* TODO: optimization: * * if a blockindexed of a contig, absorb the contig into the blocklen * parameter and keep the same overall depth */ /* otherwise storing as a blockindexed dataloop */ /* Q: HOW CAN WE TELL IF IT IS WORTH IT TO STORE AS AN * INDEXED WITH FEWER CONTIG BLOCKS (IF CONTIG_COUNT IS SMALL)? */ if (is_builtin) { PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_BLOCKINDEXED, count, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_BLOCKINDEXED | DLOOP_FINAL_MASK; /* TODO: MPID FLAGS? */ if (flags & MPID_DATALOOP_ALL_BYTES) { blklen *= old_extent; new_dlp->el_size = 1; new_dlp->el_extent = 1; new_dlp->el_type = MPI_BYTE; } else { new_dlp->el_size = old_extent; new_dlp->el_extent = old_extent; new_dlp->el_type = oldtype; } } else { DLOOP_Dataloop *old_loop_ptr = NULL; int old_loop_sz = 0; DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, 0); DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, 0); PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_BLOCKINDEXED, count, old_loop_ptr, old_loop_sz, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_BLOCKINDEXED; DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size); DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent); DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type); } new_dlp->loop_params.bi_t.count = count; new_dlp->loop_params.bi_t.blocksize = blklen; /* copy in displacement parameters * * regardless of dispinbytes, we store displacements in bytes in loop. */ DLOOP_Type_blockindexed_array_copy(count, disp_array, new_dlp->loop_params.bi_t.offset_array, dispinbytes, old_extent); *dlp_p = new_dlp; *dlsz_p = new_loop_sz; *dldepth_p = old_loop_depth + 1; return 0;}/* DLOOP_Type_blockindexed_array_copy * * Unlike the indexed version, this one does not compact adjacent * blocks, because that would really mess up the blockindexed type! */static void DLOOP_Type_blockindexed_array_copy(int count, void *in_disp_array, DLOOP_Offset *out_disp_array, int dispinbytes, DLOOP_Offset old_extent){ int i; if (!dispinbytes) { for (i=0; i < count; i++) { out_disp_array[i] = ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent; } } else { for (i=0; i < count; i++) { out_disp_array[i] = ((DLOOP_Offset *) in_disp_array)[i]; } } return;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -