⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dataloop_create_blockindexed.c

📁 fortran并行计算包
💻 C
字号:
/* -*- Mode: C; c-basic-offset:4 ; -*- *//* *  (C) 2001 by Argonne National Laboratory. *      See COPYRIGHT in top-level directory. */#include <stdio.h>#include "./dataloop.h"static DLOOP_Count DLOOP_Type_blockindexed_count_contig(DLOOP_Count count,							DLOOP_Count blklen,							void *disp_array,							int dispinbytes,							DLOOP_Offset old_extent);static void DLOOP_Type_blockindexed_array_copy(DLOOP_Count count,					       void *disp_array,					       DLOOP_Offset *out_disp_array,					       int dispinbytes,					       DLOOP_Offset old_extent);/*@   Dataloop_create_blockindexed - create blockindexed dataloop   Arguments:+  int count.  void *displacement_array (array of either MPI_Aints or ints).  int displacement_in_bytes (boolean).  MPI_Datatype old_type.  DLOOP_Dataloop **output_dataloop_ptr.  int output_dataloop_size.  int output_dataloop_depth-  int flag.N Errors.N Returns 0 on success, -1 on failure.@*/int PREPEND_PREFIX(Dataloop_create_blockindexed)(int icount,						 int iblklen,						 void *disp_array,						 int dispinbytes,						 DLOOP_Type oldtype,						 DLOOP_Dataloop **dlp_p,						 int *dlsz_p,						 int *dldepth_p,						 int flag){    int err, is_builtin, is_vectorizable = 1;    int i, new_loop_sz, old_loop_depth;    DLOOP_Count contig_count, count, blklen;    DLOOP_Offset old_extent, eff_disp0, eff_disp1, last_stride;    DLOOP_Dataloop *new_dlp;    count  = (DLOOP_Count) icount; /* avoid subsequent casting */    blklen = (DLOOP_Count) iblklen;    /* if count or blklen are zero, handle with contig code, call it a int */    if (count == 0 || blklen == 0)    {	err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,							 MPI_INT,							 dlp_p,							 dlsz_p,							 dldepth_p,							 flag);	return err;    }    is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;    if (is_builtin)    {	DLOOP_Handle_get_size_macro(oldtype, old_extent);	old_loop_depth = 0;    }    else    {	DLOOP_Handle_get_extent_macro(oldtype, old_extent);	DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);    }    contig_count = DLOOP_Type_blockindexed_count_contig(count,							blklen,							disp_array,							dispinbytes,							old_extent);    /* optimization:     *     * if contig_count == 1 and block starts at displacement 0,     * store it as a contiguous rather than a blockindexed dataloop.     */    if ((contig_count == 1) &&	((!dispinbytes && ((int *) disp_array)[0] == 0) ||	 (dispinbytes && ((MPI_Aint *) disp_array)[0] == 0)))    {	err = PREPEND_PREFIX(Dataloop_create_contiguous)(icount * iblklen,							 oldtype,							 dlp_p,							 dlsz_p,							 dldepth_p,							 flag);	return err;    }    /* optimization:     *     * if contig_count == 1 store it as a blockindexed with one     * element rather than as a lot of individual blocks.     */    if (contig_count == 1)    {	/* adjust count and blklen and drop through */	blklen *= count;	count = 1;	iblklen *= icount;	icount = 1;    }    /* optimization:     *     * if displacements start at zero and result in a fixed stride,     * store it as a vector rather than a blockindexed dataloop.     */    eff_disp0 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[0]) :	(((DLOOP_Offset) ((int *) disp_array)[0]) * old_extent);    if (count > 1 && eff_disp0 == (DLOOP_Offset) 0)    {	eff_disp1 = (dispinbytes) ?	    ((DLOOP_Offset) ((MPI_Aint *) disp_array)[1]) :	    (((DLOOP_Offset) ((int *) disp_array)[1]) * old_extent);	last_stride = eff_disp1 - eff_disp0;	for (i=2; i < count; i++) {	    eff_disp0 = eff_disp1;	    eff_disp1 = (dispinbytes) ?		((DLOOP_Offset) ((MPI_Aint *) disp_array)[i]) :		(((DLOOP_Offset) ((int *) disp_array)[i]) * old_extent);	    if (eff_disp1 - eff_disp0 != last_stride) {		is_vectorizable = 0;		break;	    }	}	if (is_vectorizable)	{	    err = PREPEND_PREFIX(Dataloop_create_vector)(count,							 blklen,							 last_stride,							 1, /* strideinbytes */							 oldtype,							 dlp_p,							 dlsz_p,							 dldepth_p,							 flag);	    return err;	}    }    /* TODO: optimization:     *     * if displacements result in a fixed stride, but first displacement     * is not zero, store it as a blockindexed (blklen == 1) of a vector.     */    /* TODO: optimization:     *     * if a blockindexed of a contig, absorb the contig into the blocklen     * parameter and keep the same overall depth     */    /* otherwise storing as a blockindexed dataloop */    /* Q: HOW CAN WE TELL IF IT IS WORTH IT TO STORE AS AN     * INDEXED WITH FEWER CONTIG BLOCKS (IF CONTIG_COUNT IS SMALL)?     */    if (is_builtin)    {	PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_BLOCKINDEXED,				       count,				       &new_dlp,				       &new_loop_sz);	/* --BEGIN ERROR HANDLING-- */	if (!new_dlp) return -1;	/* --END ERROR HANDLING-- */	new_dlp->kind = DLOOP_KIND_BLOCKINDEXED | DLOOP_FINAL_MASK;	if (flag == DLOOP_DATALOOP_ALL_BYTES)	{	    blklen            *= old_extent;	    new_dlp->el_size   = 1;	    new_dlp->el_extent = 1;	    new_dlp->el_type   = MPI_BYTE;	}	else	{	    new_dlp->el_size   = old_extent;	    new_dlp->el_extent = old_extent;	    new_dlp->el_type   = oldtype;	}    }    else    {	DLOOP_Dataloop *old_loop_ptr = NULL;	int old_loop_sz = 0;	DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);	DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);	PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_BLOCKINDEXED,						count,						old_loop_ptr,						old_loop_sz,						&new_dlp,						&new_loop_sz);	/* --BEGIN ERROR HANDLING-- */	if (!new_dlp) return -1;	/* --END ERROR HANDLING-- */	new_dlp->kind = DLOOP_KIND_BLOCKINDEXED;	DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);	DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);	DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);    }    new_dlp->loop_params.bi_t.count     = count;    new_dlp->loop_params.bi_t.blocksize = blklen;    /* copy in displacement parameters     *     * regardless of dispinbytes, we store displacements in bytes in loop.     */    DLOOP_Type_blockindexed_array_copy(count,				       disp_array,				       new_dlp->loop_params.bi_t.offset_array,				       dispinbytes,				       old_extent);    *dlp_p     = new_dlp;    *dlsz_p    = new_loop_sz;    *dldepth_p = old_loop_depth + 1;    return 0;}/* DLOOP_Type_blockindexed_array_copy * * Unlike the indexed version, this one does not compact adjacent * blocks, because that would really mess up the blockindexed type! */static void DLOOP_Type_blockindexed_array_copy(DLOOP_Count count,					       void *in_disp_array,					       DLOOP_Offset *out_disp_array,					       int dispinbytes,					       DLOOP_Offset old_extent){    int i;    if (!dispinbytes)    {	for (i=0; i < count; i++)	{	    out_disp_array[i] =		((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent;	}    }    else    {	for (i=0; i < count; i++)	{	    out_disp_array[i] =		((DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i]);	}    }    return;}static DLOOP_Count DLOOP_Type_blockindexed_count_contig(DLOOP_Count count,							DLOOP_Count blklen,							void *disp_array,							int dispinbytes,							DLOOP_Offset old_extent){    int i, contig_count = 1;    if (!dispinbytes)    {	/* this is from the MPI type, is of type int */	DLOOP_Offset cur_tdisp = (DLOOP_Offset) ((int *) disp_array)[0];	for (i=1; i < count; i++)	{	    DLOOP_Offset next_tdisp = (DLOOP_Offset) ((int *) disp_array)[i];	    if (cur_tdisp + blklen != next_tdisp)	    {		contig_count++;	    }	    cur_tdisp = next_tdisp;	}    }    else    {	/* this is from the MPI type, is of type MPI_Aint */	DLOOP_Offset cur_bdisp = (DLOOP_Offset) ((MPI_Aint *) disp_array)[0];	for (i=1; i < count; i++)	{	    DLOOP_Offset next_bdisp =		(DLOOP_Offset) ((MPI_Aint *) disp_array)[i];	    if (cur_bdisp + (DLOOP_Offset) blklen * old_extent != next_bdisp)	    {		contig_count++;	    }	    cur_bdisp = next_bdisp;	}    }    return contig_count;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -