⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mpidi_cart_map_fold.c

📁 fortran并行计算包
💻 C
📖 第 1 页 / 共 2 页
字号:
/*  (C)Copyright IBM Corp.  2007, 2008  *//** * \file src/comm/topo/mpidi_cart_map_fold.c * \brief ??? */#include "mpid_topo.h"/* finished = perm_next( ndims, perm_array[ndims] )       gets the next permutation. It returns 1 when there is no next permutation.       For ndims = 3, the permutation sequence is            0,1,2 --> 0,2,1 --> 1,0,2 --> 1,2,0 --> 2,0,1 --> 2,0,1 --> finished.*/static int perm_next( int size, int inperm[] ){    int i, j, place, place1, temp;    place1 = -1;    place = -1;    for (i=size-1; i>0; i--)    {        if (inperm[i] < inperm[i-1] && place1 == -1) place1 = i;        if (inperm[i] > inperm[i-1]) { place = i; break; }    }    /* last in the permutation */    if (place==-1) return 1;    for (i=size-1; i>place; i--)    if (inperm[i] < inperm[i-1] && inperm[i] > inperm[place-1])    {        place1 = i; break;    }    /* long swap */    if (place1 != -1 && inperm[place-1] < inperm[place1]) {        temp = inperm[place1];        for (i=place1; i>=place; i--) inperm[i] = inperm[i-1];        inperm[place-1] = temp;    }    /* or short swap */    else {        temp            = inperm[place-1];        inperm[place-1] = inperm[place];        inperm[place]   = temp;    }    /* bubble sort the tail */    for (i=place; i<size; i++) {        int action = 0;        for (j=place; j<size-1; j++) {            if (inperm[j] > inperm[j+1]) {                temp        = inperm[j];                inperm[j]   = inperm[j+1];                inperm[j+1] = temp;                action = 1;            }        }        if (!action) break;    }    return 0;}/* fail = find_fold( dims1[ndims1], dims2[ndims2], fold[3][3] )       searchs a folding schedule, the folding schedule is stored in matrix fold[3][3]       e.g. fold[i][j] = 3 indicates to unfold dimension i onto dimension j.       fold[i][i] has no meaning.       For 3D case as here, there will be at most 2 non-zero, non-diagonal entries.       Diagonal entries are useless here.       Further more, when the 2 non-zero entries are in the same row, the virtual cartesian is       unfolded from the row_id dimension onto the other dimensions in physical cartesian.       when the 2 entries are in the same coloum, the virtual cartesian is actually       folded from the physical cartesian. */static int find_fold( int nd1, int d1[], int nd2, int d2[], int fold[][3] ){    int neg_nfold=0, pos_nfold=0;    int neg_fold=1,  pos_fold=1;    int i, j;#if 0    static int count=0;#endif    /* initialize matrix */    for (i=0; i<3; i++) for (j=0; j<3; j++) fold[i][j] = 0;    /* count requesting folds and folds can gives away. */    for (i=0; i<nd1; i++) {        /* free folds */        if (d1[i] < d2[i])        {            fold[i][i] = d2[i]/d1[i];			/* floor () */            pos_fold *= fold[i][i];            pos_nfold ++;        }        else        /* needed folds */        if (d1[i] > d2[i])        {            fold[i][i] = -(d1[i]+d2[i]-1)/d2[i];	/* roof  () */            neg_fold *= (-fold[i][i]);            neg_nfold ++;        }    }    /* always: physical ndims >= virtual ndims */    for (i=nd1; i<nd2; i++) if (d2[i] > 1) { fold[i][i] = d2[i]; pos_fold *= fold[i][i]; pos_nfold ++; }    /* requesting folds > available folds --> can not fold. */    if (neg_fold > pos_fold) return 1;    /*       Merge the negative folds and positive folds. For 3D case, there are following possible cases:        0 dimension requests folds;        1 dimension requests folds : must have 1 or 2 dimensions have free folds.        2 dimensions requests folds: must have 1 dimension has free folds.       Previous test excludes cases that free folds are fewer than needing folds.     */    /* no fold is needed */    if (!neg_nfold) {        for (i=0; i<nd2; i++) fold[i][i] = 0;        return 0;    }    else    /* only one dimension NEEDS folds from other 1/2 dimensions */    if (neg_nfold == 1) {        for (i=0; i<nd2; i++)        if (fold[i][i] < 0) 		/* this is needy dimension */        {            int ask = -fold[i][i];	/* now how many folds do you want */            for (j=0; j<nd2; j++) 	/* search through dimension to satisfy the need */            {                if (j==i) continue;                if (fold[j][j] > 0 && ask > 1) 	/* j dimension has some folds */                {                    /* j dimension can fully satisfy the left need */                    if (fold[j][j] >= ask) {                        fold[j][i] = ask;                        ask = 0;                    }                    /* j dimension can partially satisfy the left need */                    else                    {                        fold[j][i] = fold[j][j];                        ask = (ask + fold[j][j] -1) / fold[j][j];	/* roof () */                    }                }            }            /* end of the try, still left some needs --> fail */            if (ask > 1) return 1;        }    }    else    /* only one dimension can GIVE folds to other 1/2 dimensions */    if (pos_nfold == 1) {        for (i=0; i<nd2; i++)        if (fold[i][i] > 0) 		/* this is the donor dimension */        {            int has = fold[i][i];	/* how many folds can it give away */            for (j=0; j<nd2; j++) 	/* search other dimensions to try to give */            {                if (j==i) continue;                if (fold[j][j] < 0 && has > 1) 	/* j needs folds and i has some left */                {                    /* left free folds can satisfy j's request */                    if (-fold[j][j] <= has) {                        fold[i][j] = -fold[j][j];                        has = has / (-fold[j][j]);                    }                    /* donor broken */                    else {                        has = 0; break;                    }                }            }            /* end of the try, left deficit --> fail */            if (has < 1) return 1;        }    }#if 0    if (!count) {    printf( "\t\tfold 1 = " );    for (i=0; i<3; i++) { for (j=0; j<3; j++) printf( "%4d", fold[i][j] ); printf( "; " ); }    printf( "\n" );    }    count ++;#endif    return 0;}/* Core of the whole folding story.        unfold dimension "dim_from" onto "dim_onto" in 3D setting. The coordinates on        dim_from (Z) and dim_onto (X) are both updated. The coordinate of the other        dimension (Y) does not change.*/static void unfold_3d( int pdims[3], int coord[3], int dim_from, int dim_onto, int folds ){    int layers   = pdims[dim_from] / folds;    int fold_num = coord[dim_from] / layers;    int newz     = coord[dim_from] % layers;    if (fold_num % 2)   /* reverse direction */    {        coord[dim_onto] = fold_num * pdims[dim_onto] + (pdims[dim_onto]-1 - coord[dim_onto]);        coord[dim_from] = layers-1 - newz;    }    else                /* same direction */    {        coord[dim_onto] = fold_num * pdims[dim_onto] + coord[dim_onto];        coord[dim_from] = newz;    }    pdims[dim_from] /= folds;    pdims[dim_onto] *= folds;}/* perform_fold( vir_coord[], phy_coord[], fold[3][3] )        does the folding following the schedule given by fold[3][3]. */static void perform_fold( int nd1, int d1[], int c1[], int nd2, int d2[], int c2[], int perm[], int fold[][3] ){    int i, j, nf, fold_list[9][3], t, dd2[3];    /* fold[][] has 2 useful entries out of 9. Then it is a sparse matrix, right? */    nf = 0;    for (i=0; i<3; i++)    for (j=0; j<3; j++)    if (j!=i && fold[i][j] > 1)    {        fold_list[nf][0] = fold[i][j];        fold_list[nf][1] = i;        fold_list[nf][2] = j;        nf ++;    }    /* 3x3 case, nf is 0, 1, 2 */    if (nf == 2)    {        /* When the 2 non-zero entries are in the same row, the virtual cartesian is           unfolded from the row_id dimension onto the other dimensions in physical cartesian.           Then UNFOLD the dimension with more folds first to reduce dialation.        */        if (fold_list[0][1] == fold_list[1][1]) {            if (fold_list[0][0] < fold_list[1][0]) {                for (i=0; i<3; i++) {                    t               = fold_list[0][i];                    fold_list[0][i] = fold_list[1][i];                    fold_list[1][i] = t;                }            }        }        /* When the 2 entries are in the same coloum, the virtual cartesian is actually           folded from the physical cartesian.           Then FOLD the dimension with less folds first to reduce dialation.        */        else {            if (fold_list[0][0] > fold_list[1][0]) {                for (i=0; i<3; i++) {                    t               = fold_list[0][i];                    fold_list[0][i] = fold_list[1][i];                    fold_list[1][i] = t;                }            }        }        for (i=0; i<3; i++) c1[i]  = c2[perm[i]];        for (i=0; i<3; i++) dd2[i] = d2[perm[i]];

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -