📄 segment.c
字号:
/* -*- Mode: C; c-basic-offset:4 ; -*- *//* * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */#include <stdio.h>#include <stdlib.h>#include "./dataloop.h"#undef DLOOP_DEBUG_MANIPULATE#ifndef PREPEND_PREFIX#error "You must explicitly include a header that sets the PREPEND_PREFIX and includes dataloop_parts.h"#endif/* Notes on functions: * * There are a few different sets of functions here: * - DLOOP_Segment_manipulate() - uses a "piece" function to perform operations * using segments (piece functions defined elsewhere) * - PREPEND_PREFIX functions - these define the externally visible interface * to segment functionality */static inline DLOOP_Count DLOOP_Stackelm_blocksize(struct DLOOP_Dataloop_stackelm *elmp);static inline DLOOP_Offset DLOOP_Stackelm_offset(struct DLOOP_Dataloop_stackelm *elmp);static inline void DLOOP_Stackelm_load(struct DLOOP_Dataloop_stackelm *elmp, struct DLOOP_Dataloop *dlp, int branch_flag);/* Segment_init * * buf - datatype buffer location * count - number of instances of the datatype in the buffer * handle - handle for datatype (could be derived or not) * segp - pointer to previously allocated segment structure * flag - flag indicating which optimizations are valid * should be one of DLOOP_DATALOOP_HOMOGENEOUS, _HETEROGENEOUS, * of _ALL_BYTES. * * Notes: * - Assumes that the segment has been allocated. * - Older MPICH2 code may pass "0" to indicate HETEROGENEOUS or "1" to * indicate HETEROGENEOUS. * */int PREPEND_PREFIX(Segment_init)(const DLOOP_Buffer buf, DLOOP_Count count, DLOOP_Handle handle, struct DLOOP_Segment *segp, int flag){ DLOOP_Offset elmsize = 0; int i, depth = 0; int branch_detected = 0; struct DLOOP_Dataloop_stackelm *elmp; struct DLOOP_Dataloop *dlp = 0, *sblp = &segp->builtin_loop; DLOOP_Assert(flag == DLOOP_DATALOOP_HETEROGENEOUS || flag == DLOOP_DATALOOP_HOMOGENEOUS || flag == DLOOP_DATALOOP_ALL_BYTES);#ifdef DLOOP_DEBUG_MANIPULATE DLOOP_dbg_printf("DLOOP_Segment_init: count = %d, buf = %x\n", count, buf);#endif if (!DLOOP_Handle_hasloop_macro(handle)) { /* simplest case; datatype has no loop (basic) */ DLOOP_Handle_get_size_macro(handle, elmsize); sblp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK; sblp->loop_params.c_t.count = count; sblp->loop_params.c_t.dataloop = 0; sblp->el_size = elmsize; DLOOP_Handle_get_basic_type_macro(handle, sblp->el_type); DLOOP_Handle_get_extent_macro(handle, sblp->el_extent); dlp = sblp; depth = 1; } else if (count == 0) { /* only use the builtin */ sblp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK; sblp->loop_params.c_t.count = 0; sblp->loop_params.c_t.dataloop = 0; sblp->el_size = 0; sblp->el_extent = 0; dlp = sblp; depth = 1; } else if (count == 1) { /* don't use the builtin */ DLOOP_Handle_get_loopptr_macro(handle, dlp, flag); DLOOP_Handle_get_loopdepth_macro(handle, depth, flag); } else { /* default: need to use builtin to handle contig; must check * loop depth first */ DLOOP_Dataloop *oldloop; /* loop from original type, before new count */ DLOOP_Offset type_size, type_extent; DLOOP_Type el_type; DLOOP_Handle_get_loopdepth_macro(handle, depth, flag); if (depth >= DLOOP_MAX_DATATYPE_DEPTH) return -1; DLOOP_Handle_get_loopptr_macro(handle, oldloop, flag); DLOOP_Assert(oldloop != NULL); DLOOP_Handle_get_size_macro(handle, type_size); DLOOP_Handle_get_extent_macro(handle, type_extent); DLOOP_Handle_get_basic_type_macro(handle, el_type); if (depth == 1 && ((oldloop->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG)) { if (type_size == type_extent) { /* use a contig */ sblp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK; sblp->loop_params.c_t.count = count * oldloop->loop_params.c_t.count; sblp->loop_params.c_t.dataloop = NULL; sblp->el_size = oldloop->el_size; sblp->el_extent = oldloop->el_extent; sblp->el_type = oldloop->el_type; } else { /* use a vector, with extent of original type becoming the stride */ sblp->kind = DLOOP_KIND_VECTOR | DLOOP_FINAL_MASK; sblp->loop_params.v_t.count = count; sblp->loop_params.v_t.blocksize = oldloop->loop_params.c_t.count; sblp->loop_params.v_t.stride = type_extent; sblp->loop_params.v_t.dataloop = NULL; sblp->el_size = oldloop->el_size; sblp->el_extent = oldloop->el_extent; sblp->el_type = oldloop->el_type; } } else { /* general case */ sblp->kind = DLOOP_KIND_CONTIG; sblp->loop_params.c_t.count = count; sblp->loop_params.c_t.dataloop = oldloop; sblp->el_size = type_size; sblp->el_extent = type_extent; sblp->el_type = el_type; depth++; /* we're adding to the depth with the builtin */ } dlp = sblp; } /* initialize the rest of the segment values */ segp->handle = handle; segp->ptr = (DLOOP_Buffer) buf; segp->stream_off = 0; segp->cur_sp = 0; segp->valid_sp = 0; /* initialize the first stackelm in its entirety */ elmp = &(segp->stackelm[0]); DLOOP_Stackelm_load(elmp, dlp, 0); branch_detected = elmp->may_require_reloading; /* Fill in parameters not set by DLOOP_Stackelm_load */ elmp->orig_offset = 0; elmp->curblock = elmp->orig_block; /* DLOOP_Stackelm_offset assumes correct orig_count, curcount, loop_p */ elmp->curoffset = /* elmp->orig_offset + */ DLOOP_Stackelm_offset(elmp); i = 1; while(!(dlp->kind & DLOOP_FINAL_MASK)) { /* get pointer to next dataloop */ switch (dlp->kind & DLOOP_KIND_MASK) { case DLOOP_KIND_CONTIG: case DLOOP_KIND_VECTOR: case DLOOP_KIND_BLOCKINDEXED: case DLOOP_KIND_INDEXED: dlp = dlp->loop_params.cm_t.dataloop; break; case DLOOP_KIND_STRUCT: dlp = dlp->loop_params.s_t.dataloop_array[0]; break; default: /* --BEGIN ERROR HANDLING-- */ DLOOP_Assert(0); break; /* --END ERROR HANDLING-- */ } /* loop_p, orig_count, orig_block, and curcount are all filled by us now. * the rest are filled in at processing time. */ elmp = &(segp->stackelm[i]); DLOOP_Stackelm_load(elmp, dlp, branch_detected); branch_detected = elmp->may_require_reloading; i++; } segp->valid_sp = depth-1; return 0;}/* Segment_alloc * */struct DLOOP_Segment * PREPEND_PREFIX(Segment_alloc)(void){ return (struct DLOOP_Segment *) DLOOP_Malloc(sizeof(struct DLOOP_Segment));}/* Segment_free * * Input Parameters: * segp - pointer to segment */void PREPEND_PREFIX(Segment_free)(struct DLOOP_Segment *segp){ DLOOP_Free(segp); return;}/* DLOOP_Segment_manipulate - do something to a segment * * If you think of all the data to be manipulated (packed, unpacked, whatever), * as a stream of bytes, it's easier to understand how first and last fit in. * * This function does all the work, calling the piecefn passed in when it * encounters a datatype element which falls into the range of first..(last-1). * * piecefn can be NULL, in which case this function doesn't do anything when it * hits a region. This is used internally for repositioning within this stream. * * last is a byte offset to the byte just past the last byte in the stream * to operate on. this makes the calculations all over MUCH cleaner. * * stream_off, stream_el_size, first, and last are all working in terms of the * types and sizes for the stream, which might be different from the local sizes * (in the heterogeneous case). * * This is a horribly long function. Too bad; it's complicated :)! -- Rob * * NOTE: THIS IMPLEMENTATION CANNOT HANDLE STRUCT DATALOOPS. */#define DLOOP_SEGMENT_SAVE_LOCAL_VALUES \{ \ segp->cur_sp = cur_sp; \ segp->valid_sp = valid_sp; \ segp->stream_off = stream_off; \ *lastp = stream_off; \}#define DLOOP_SEGMENT_LOAD_LOCAL_VALUES \{ \ last = *lastp; \ cur_sp = segp->cur_sp; \ valid_sp = segp->valid_sp; \ stream_off = segp->stream_off; \ cur_elmp = &(segp->stackelm[cur_sp]); \}#define DLOOP_SEGMENT_RESET_VALUES \{ \ segp->stream_off = 0; \ segp->cur_sp = 0; \ cur_elmp = &(segp->stackelm[0]); \ cur_elmp->curcount = cur_elmp->orig_count; \ cur_elmp->orig_block = DLOOP_Stackelm_blocksize(cur_elmp); \ cur_elmp->curblock = cur_elmp->orig_block; \ cur_elmp->curoffset = cur_elmp->orig_offset + \ DLOOP_Stackelm_offset(cur_elmp); \}#define DLOOP_SEGMENT_POP_AND_MAYBE_EXIT \{ \ cur_sp--; \ if (cur_sp >= 0) cur_elmp = &segp->stackelm[cur_sp]; \ else { \ DLOOP_SEGMENT_SAVE_LOCAL_VALUES; \ return; \ } \}#define DLOOP_SEGMENT_PUSH \{ \ cur_sp++; \ cur_elmp = &segp->stackelm[cur_sp]; \}#define DLOOP_STACKELM_BLOCKINDEXED_OFFSET(elmp_, curcount_) \(elmp_)->loop_p->loop_params.bi_t.offset_array[(curcount_)]#define DLOOP_STACKELM_INDEXED_OFFSET(elmp_, curcount_) \(elmp_)->loop_p->loop_params.i_t.offset_array[(curcount_)]#define DLOOP_STACKELM_INDEXED_BLOCKSIZE(elmp_, curcount_) \(elmp_)->loop_p->loop_params.i_t.blocksize_array[(curcount_)]#define DLOOP_STACKELM_STRUCT_OFFSET(elmp_, curcount_) \(elmp_)->loop_p->loop_params.s_t.offset_array[(curcount_)]#define DLOOP_STACKELM_STRUCT_BLOCKSIZE(elmp_, curcount_) \(elmp_)->loop_p->loop_params.s_t.blocksize_array[(curcount_)]#define DLOOP_STACKELM_STRUCT_EL_EXTENT(elmp_, curcount_) \(elmp_)->loop_p->loop_params.s_t.el_extent_array[(curcount_)]#define DLOOP_STACKELM_STRUCT_DATALOOP(elmp_, curcount_) \(elmp_)->loop_p->loop_params.s_t.dataloop_array[(curcount_)]void PREPEND_PREFIX(Segment_manipulate)(struct DLOOP_Segment *segp, DLOOP_Offset first, DLOOP_Offset *lastp, int (*contigfn) (DLOOP_Offset *blocks_p, DLOOP_Type el_type, DLOOP_Offset rel_off, DLOOP_Buffer bufp, void *v_paramp), int (*vectorfn) (DLOOP_Offset *blocks_p, DLOOP_Count count, DLOOP_Count blklen, DLOOP_Offset stride, DLOOP_Type el_type, DLOOP_Offset rel_off, DLOOP_Buffer bufp, void *v_paramp), int (*blkidxfn) (DLOOP_Offset *blocks_p, DLOOP_Count count, DLOOP_Count blklen, DLOOP_Offset *offsetarray, DLOOP_Type el_type, DLOOP_Offset rel_off, DLOOP_Buffer bufp, void *v_paramp), int (*indexfn) (DLOOP_Offset *blocks_p, DLOOP_Count count, DLOOP_Count *blockarray, DLOOP_Offset *offsetarray, DLOOP_Type el_type, DLOOP_Offset rel_off, DLOOP_Buffer bufp, void *v_paramp), DLOOP_Offset (*sizefn) (DLOOP_Type el_type), void *pieceparams){ /* these four are the "local values": cur_sp, valid_sp, last, stream_off */ int cur_sp, valid_sp; DLOOP_Offset last, stream_off; struct DLOOP_Dataloop_stackelm *cur_elmp; enum { PF_NULL, PF_CONTIG, PF_VECTOR, PF_BLOCKINDEXED, PF_INDEXED } piecefn_type = PF_NULL; DLOOP_SEGMENT_LOAD_LOCAL_VALUES; if (first == *lastp) { /* nothing to do */ DLOOP_dbg_printf("dloop_segment_manipulate: warning: first == last (%d)\n", (int) first); return; } /* first we ensure that stream_off and first are in the same spot */ if (first != stream_off) {#ifdef DLOOP_DEBUG_MANIPULATE DLOOP_dbg_printf("first=%d; stream_off=%ld; resetting.\n", first, stream_off);#endif if (first < stream_off) { DLOOP_SEGMENT_RESET_VALUES; stream_off = 0; } if (first != stream_off) { DLOOP_Offset tmp_last = first; /* use manipulate function with a NULL piecefn to advance * stream offset */ PREPEND_PREFIX(Segment_manipulate)(segp, stream_off, &tmp_last, NULL, /* contig fn */ NULL, /* vector fn */ NULL, /* blkidx fn */ NULL, /* index fn */ sizefn, NULL); /* --BEGIN ERROR HANDLING-- */ /* verify that we're in the right location */ if (tmp_last != first) DLOOP_Assert(0); /* --END ERROR HANDLING-- */ } DLOOP_SEGMENT_LOAD_LOCAL_VALUES;#ifdef DLOOP_DEBUG_MANIPULATE DLOOP_dbg_printf("done repositioning stream_off; first=%d, stream_off=%ld, last=%d\n", first, stream_off, last);#endif } for (;;) {#ifdef DLOOP_DEBUG_MANIPULATE#if 0 DLOOP_dbg_printf("looptop; cur_sp=%d, cur_elmp=%x\n", cur_sp, (unsigned) cur_elmp);#endif#endif if (cur_elmp->loop_p->kind & DLOOP_FINAL_MASK) { int piecefn_indicated_exit = -1; DLOOP_Offset myblocks, local_el_size, stream_el_size; DLOOP_Type el_type; /* structs are never finals (leaves) */ DLOOP_Assert((cur_elmp->loop_p->kind & DLOOP_KIND_MASK) != DLOOP_KIND_STRUCT); /* pop immediately on zero count */ if (cur_elmp->curcount == 0) DLOOP_SEGMENT_POP_AND_MAYBE_EXIT; /* size on this system of the int, double, etc. that is * the elementary type. */ local_el_size = cur_elmp->loop_p->el_size; el_type = cur_elmp->loop_p->el_type; stream_el_size = (sizefn) ? sizefn(el_type) : local_el_size; /* calculate number of elem. types to work on and function to use. * default is to use the contig piecefn (if there is one). */ myblocks = cur_elmp->curblock; piecefn_type = (contigfn ? PF_CONTIG : PF_NULL); /* check for opportunities to use other piecefns */ switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) { case DLOOP_KIND_CONTIG: break; case DLOOP_KIND_BLOCKINDEXED: /* only use blkidx piecefn if at start of blkidx type */ if (blkidxfn && cur_elmp->orig_block == cur_elmp->curblock && cur_elmp->orig_count == cur_elmp->curcount) { /* TODO: RELAX CONSTRAINTS */ myblocks = cur_elmp->curblock * cur_elmp->curcount; piecefn_type = PF_BLOCKINDEXED; } break; case DLOOP_KIND_INDEXED: /* only use index piecefn if at start of the index type. * count test checks that we're on first block. * block test checks that we haven't made progress on first block. */ if (indexfn && cur_elmp->orig_count == cur_elmp->curcount && cur_elmp->curblock == DLOOP_STACKELM_INDEXED_BLOCKSIZE(cur_elmp, 0)) { /* TODO: RELAX CONSTRAINT ON COUNT? */ myblocks = cur_elmp->loop_p->loop_params.i_t.total_blocks; piecefn_type = PF_INDEXED; } break; case DLOOP_KIND_VECTOR: /* only use the vector piecefn if at the start of a * contiguous block. */ if (vectorfn && cur_elmp->orig_block == cur_elmp->curblock) { myblocks = cur_elmp->curblock * cur_elmp->curcount; piecefn_type = PF_VECTOR; } break; default: /* --BEGIN ERROR HANDLING-- */ DLOOP_Assert(0); break; /* --END ERROR HANDLING-- */ }#ifdef DLOOP_DEBUG_MANIPULATE
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -