📄 segment_ops.c
字号:
/* -*- Mode: C; c-basic-offset:4 ; -*- *//* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <sys/types.h>#include "dataloop.h"#include "veccpy.h"int PREPEND_PREFIX(Segment_contig_m2m)(DLOOP_Offset *blocks_p, DLOOP_Type el_type, DLOOP_Offset rel_off, void *bufp, /* unused */ void *v_paramp){ DLOOP_Offset el_size; /* DLOOP_Count? */ DLOOP_Offset size; struct PREPEND_PREFIX(m2m_params) *paramp = v_paramp; DLOOP_Handle_get_size_macro(el_type, el_size); size = *blocks_p * el_size;#ifdef MPID_SU_VERBOSE dbg_printf("\t[contig unpack: do=%d, dp=%x, bp=%x, sz=%d, blksz=%d]\n", rel_off, (unsigned) bufp, (unsigned) paramp->u.unpack.unpack_buffer, el_size, (int) *blocks_p);#endif if (paramp->direction == DLOOP_M2M_TO_USERBUF) { memcpy((char *) (paramp->userbuf + rel_off), paramp->streambuf, size); } else { memcpy(paramp->streambuf, (char *) (paramp->userbuf + rel_off), size); } paramp->streambuf += size; return 0;}/* Segment_vector_m2m * * Note: this combines both packing and unpacking functionality. * * Note: this is only called when the starting position is at the beginning * of a whole block in a vector type. */int PREPEND_PREFIX(Segment_vector_m2m)(DLOOP_Offset *blocks_p, DLOOP_Count count, /* unused */ DLOOP_Count blksz, DLOOP_Offset stride, DLOOP_Type el_type, DLOOP_Offset rel_off, /* offset into buffer */ void *bufp, /* unused */ void *v_paramp){ DLOOP_Count i, blocks_left, whole_count; DLOOP_Offset el_size; struct PREPEND_PREFIX(m2m_params) *paramp = v_paramp; char *cbufp; cbufp = paramp->userbuf + rel_off; DLOOP_Handle_get_size_macro(el_type, el_size); whole_count = (blksz > 0) ? (*blocks_p / blksz) : 0; blocks_left = (blksz > 0) ? (*blocks_p % blksz) : 0; if (paramp->direction == DLOOP_M2M_TO_USERBUF) { if (el_size == 8 MPIR_ALIGN8_TEST(paramp->streambuf,cbufp)) { MPIDI_COPY_TO_VEC(paramp->streambuf, cbufp, stride, int64_t, blksz, whole_count); MPIDI_COPY_TO_VEC(paramp->streambuf, cbufp, 0, int64_t, blocks_left, 1); } else if (el_size == 4 MPIR_ALIGN4_TEST(paramp->streambuf,cbufp)) { MPIDI_COPY_TO_VEC((paramp->streambuf), cbufp, stride, int32_t, blksz, whole_count); MPIDI_COPY_TO_VEC(paramp->streambuf, cbufp, 0, int32_t, blocks_left, 1); } else if (el_size == 2) { MPIDI_COPY_TO_VEC(paramp->streambuf, cbufp, stride, int16_t, blksz, whole_count); MPIDI_COPY_TO_VEC(paramp->streambuf, cbufp, 0, int16_t, blocks_left, 1); } else { for (i=0; i < whole_count; i++) { memcpy(cbufp, paramp->streambuf, blksz * el_size); paramp->streambuf += blksz * el_size; cbufp += stride; } if (blocks_left) { memcpy(cbufp, paramp->streambuf, blocks_left * el_size); paramp->streambuf += blocks_left * el_size; } } } else /* M2M_FROM_USERBUF */ { if (el_size == 8 MPIR_ALIGN8_TEST(cbufp,paramp->streambuf)) { MPIDI_COPY_FROM_VEC(cbufp, paramp->streambuf, stride, int64_t, blksz, whole_count); MPIDI_COPY_FROM_VEC(cbufp, paramp->streambuf, 0, int64_t, blocks_left, 1); } else if (el_size == 4 MPIR_ALIGN4_TEST(cbufp,paramp->streambuf)) { MPIDI_COPY_FROM_VEC(cbufp, paramp->streambuf, stride, int32_t, blksz, whole_count); MPIDI_COPY_FROM_VEC(cbufp, paramp->streambuf, 0, int32_t, blocks_left, 1); } else if (el_size == 2) { MPIDI_COPY_FROM_VEC(cbufp, paramp->streambuf, stride, int16_t, blksz, whole_count); MPIDI_COPY_FROM_VEC(cbufp, paramp->streambuf, 0, int16_t, blocks_left, 1); } else { for (i=0; i < whole_count; i++) { memcpy(paramp->streambuf, cbufp, blksz * el_size); paramp->streambuf += blksz * el_size; cbufp += stride; } if (blocks_left) { memcpy(paramp->streambuf, cbufp, blocks_left * el_size); paramp->streambuf += blocks_left * el_size; } } } return 0;}/* MPID_Segment_blkidx_m2m */int PREPEND_PREFIX(Segment_blkidx_m2m)(DLOOP_Offset *blocks_p, DLOOP_Count count, DLOOP_Count blocklen, DLOOP_Offset *offsetarray, DLOOP_Type el_type, DLOOP_Offset rel_off, void *bufp, /*unused */ void *v_paramp){ DLOOP_Count curblock = 0; DLOOP_Offset el_size; DLOOP_Offset blocks_left = *blocks_p; char *cbufp; struct PREPEND_PREFIX(m2m_params) *paramp = v_paramp; DLOOP_Handle_get_size_macro(el_type, el_size); while (blocks_left) { char *src, *dest; DLOOP_Assert(curblock < count); cbufp = paramp->userbuf + rel_off + offsetarray[curblock]; if (blocklen > blocks_left) blocklen = blocks_left; if (paramp->direction == DLOOP_M2M_TO_USERBUF) { src = paramp->streambuf; dest = cbufp; } else { src = cbufp; dest = paramp->streambuf; } /* note: macro modifies dest buffer ptr, so we must reset */ if (el_size == 8 MPIR_ALIGN8_TEST(src, dest)) { MPIDI_COPY_FROM_VEC(src, dest, 0, int64_t, blocklen, 1); } else if (el_size == 4 MPIR_ALIGN4_TEST(src,dest)) { MPIDI_COPY_FROM_VEC(src, dest, 0, int32_t, blocklen, 1); } else if (el_size == 2) { MPIDI_COPY_FROM_VEC(src, dest, 0, int16_t, blocklen, 1); } else { memcpy(dest, src, blocklen * el_size); } paramp->streambuf += blocklen * el_size; blocks_left -= blocklen; curblock++; } return 0;}/* MPID_Segment_index_m2m */int PREPEND_PREFIX(Segment_index_m2m)(DLOOP_Offset *blocks_p, DLOOP_Count count, DLOOP_Count *blockarray, DLOOP_Offset *offsetarray, DLOOP_Type el_type, DLOOP_Offset rel_off, void *bufp, /*unused */ void *v_paramp){ int curblock = 0; DLOOP_Offset el_size; DLOOP_Offset cur_block_sz, blocks_left = *blocks_p; char *cbufp; struct PREPEND_PREFIX(m2m_params) *paramp = v_paramp; DLOOP_Handle_get_size_macro(el_type, el_size); while (blocks_left) { char *src, *dest; DLOOP_Assert(curblock < count); cur_block_sz = blockarray[curblock]; cbufp = paramp->userbuf + rel_off + offsetarray[curblock]; if (cur_block_sz > blocks_left) cur_block_sz = blocks_left; if (paramp->direction == DLOOP_M2M_TO_USERBUF) { src = paramp->streambuf; dest = cbufp; } else { src = cbufp; dest = paramp->streambuf; } /* note: macro modifies dest buffer ptr, so we must reset */ if (el_size == 8 MPIR_ALIGN8_TEST(src, dest)) { MPIDI_COPY_FROM_VEC(src, dest, 0, int64_t, cur_block_sz, 1); } else if (el_size == 4 MPIR_ALIGN4_TEST(src,dest)) { MPIDI_COPY_FROM_VEC(src, dest, 0, int32_t, cur_block_sz, 1); } else if (el_size == 2) { MPIDI_COPY_FROM_VEC(src, dest, 0, int16_t, cur_block_sz, 1); } else { memcpy(dest, src, cur_block_sz * el_size); } paramp->streambuf += cur_block_sz * el_size; blocks_left -= cur_block_sz; curblock++; } return 0;}void PREPEND_PREFIX(Segment_pack)(DLOOP_Segment *segp, DLOOP_Offset first, DLOOP_Offset *lastp, void *streambuf){ struct PREPEND_PREFIX(m2m_params) params; /* experimenting with discarding buf value in the segment, keeping in * per-use structure instead. would require moving the parameters around a * bit. */ params.userbuf = segp->ptr; params.streambuf = streambuf; params.direction = DLOOP_M2M_FROM_USERBUF; PREPEND_PREFIX(Segment_manipulate)(segp, first, lastp, PREPEND_PREFIX(Segment_contig_m2m), PREPEND_PREFIX(Segment_vector_m2m), PREPEND_PREFIX(Segment_blkidx_m2m), PREPEND_PREFIX(Segment_index_m2m), NULL, /* size fn */ ¶ms); return;}void PREPEND_PREFIX(Segment_unpack)(DLOOP_Segment *segp, DLOOP_Offset first, DLOOP_Offset *lastp, void *streambuf){ struct PREPEND_PREFIX(m2m_params) params; /* experimenting with discarding buf value in the segment, keeping in * per-use structure instead. would require moving the parameters around a * bit. */ params.userbuf = segp->ptr; params.streambuf = streambuf; params.direction = DLOOP_M2M_TO_USERBUF; PREPEND_PREFIX(Segment_manipulate)(segp, first, lastp, PREPEND_PREFIX(Segment_contig_m2m), PREPEND_PREFIX(Segment_vector_m2m), PREPEND_PREFIX(Segment_blkidx_m2m), PREPEND_PREFIX(Segment_index_m2m), NULL, /* size fn */ ¶ms); return;}struct PREPEND_PREFIX(contig_blocks_params) { DLOOP_Count count; DLOOP_Offset last_loc;};/* MPID_Segment_contig_count_block * * Note: because bufp is just an offset, we can ignore it in our * calculations of # of contig regions. */static int DLOOP_Segment_contig_count_block(DLOOP_Offset *blocks_p, DLOOP_Type el_type, DLOOP_Offset rel_off, DLOOP_Buffer bufp, /* unused */ void *v_paramp){ DLOOP_Offset size, el_size; struct PREPEND_PREFIX(contig_blocks_params) *paramp = v_paramp; DLOOP_Assert(*blocks_p > 0); DLOOP_Handle_get_size_macro(el_type, el_size); size = *blocks_p * el_size;#ifdef MPID_SP_VERBOSE MPIU_dbg_printf("contig count block: count = %d, buf+off = %d, lastloc = %d\n", (int) paramp->count, (int) ((char *) bufp + rel_off), (int) paramp->last_loc);#endif if (paramp->count > 0 && rel_off == paramp->last_loc) { /* this region is adjacent to the last */ paramp->last_loc += size; } else { /* new region */ paramp->last_loc = rel_off + size; paramp->count++; } return 0;}/* DLOOP_Segment_vector_count_block * * Input Parameters: * blocks_p - [inout] pointer to a count of blocks (total, for all noncontiguous pieces) * count - # of noncontiguous regions * blksz - size of each noncontiguous region * stride - distance in bytes from start of one region to start of next * el_type - elemental type (e.g. MPI_INT) * ... * * Note: this is only called when the starting position is at the beginning * of a whole block in a vector type. */static int DLOOP_Segment_vector_count_block(DLOOP_Offset *blocks_p, DLOOP_Count count, DLOOP_Count blksz, DLOOP_Offset stride, DLOOP_Type el_type, DLOOP_Offset rel_off, /* offset into buffer */ void *bufp, /* unused */ void *v_paramp){ DLOOP_Count new_blk_count; DLOOP_Offset size, el_size; struct PREPEND_PREFIX(contig_blocks_params) *paramp = v_paramp; DLOOP_Assert(count > 0 && blksz > 0 && *blocks_p > 0); DLOOP_Handle_get_size_macro(el_type, el_size); size = el_size * blksz; new_blk_count = count; /* if size == stride, then blocks are adjacent to one another */ if (size == stride) new_blk_count = 1; if (paramp->count > 0 && rel_off == paramp->last_loc) { /* first block sits at end of last block */ new_blk_count--; } paramp->last_loc = rel_off + (count-1) * stride + size; paramp->count += new_blk_count; return 0;}/* DLOOP_Segment_blkidx_count_block * * Note: this is only called when the starting position is at the * beginning of a whole block in a blockindexed type. */static int DLOOP_Segment_blkidx_count_block(DLOOP_Offset *blocks_p, DLOOP_Count count, DLOOP_Count blksz, DLOOP_Offset *offsetarray, DLOOP_Type el_type, DLOOP_Offset rel_off, void *bufp, /* unused */ void *v_paramp){ DLOOP_Count i, new_blk_count; DLOOP_Offset size, el_size, last_loc; struct PREPEND_PREFIX(contig_blocks_params) *paramp = v_paramp; DLOOP_Assert(count > 0 && blksz > 0 && *blocks_p > 0); DLOOP_Handle_get_size_macro(el_type, el_size); size = el_size * blksz; new_blk_count = count; if (paramp->count > 0 && ((rel_off + offsetarray[0]) == paramp->last_loc)) { /* first block sits at end of last block */ new_blk_count--; } last_loc = rel_off + offsetarray[0] + size; for (i=1; i < count; i++) { if (last_loc == rel_off + offsetarray[i]) new_blk_count--; last_loc = rel_off + offsetarray[i] + size; } paramp->last_loc = last_loc; paramp->count += new_blk_count;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -