📄 mpidi_cart_map_fold.c
字号:
/* (C)Copyright IBM Corp. 2007, 2008 *//** * \file src/comm/topo/mpidi_cart_map_fold.c * \brief ??? */#include "mpid_topo.h"/* finished = perm_next( ndims, perm_array[ndims] ) gets the next permutation. It returns 1 when there is no next permutation. For ndims = 3, the permutation sequence is 0,1,2 --> 0,2,1 --> 1,0,2 --> 1,2,0 --> 2,0,1 --> 2,0,1 --> finished.*/static int perm_next( int size, int inperm[] ){ int i, j, place, place1, temp; place1 = -1; place = -1; for (i=size-1; i>0; i--) { if (inperm[i] < inperm[i-1] && place1 == -1) place1 = i; if (inperm[i] > inperm[i-1]) { place = i; break; } } /* last in the permutation */ if (place==-1) return 1; for (i=size-1; i>place; i--) if (inperm[i] < inperm[i-1] && inperm[i] > inperm[place-1]) { place1 = i; break; } /* long swap */ if (place1 != -1 && inperm[place-1] < inperm[place1]) { temp = inperm[place1]; for (i=place1; i>=place; i--) inperm[i] = inperm[i-1]; inperm[place-1] = temp; } /* or short swap */ else { temp = inperm[place-1]; inperm[place-1] = inperm[place]; inperm[place] = temp; } /* bubble sort the tail */ for (i=place; i<size; i++) { int action = 0; for (j=place; j<size-1; j++) { if (inperm[j] > inperm[j+1]) { temp = inperm[j]; inperm[j] = inperm[j+1]; inperm[j+1] = temp; action = 1; } } if (!action) break; } return 0;}/* fail = find_fold( dims1[ndims1], dims2[ndims2], fold[3][3] ) searchs a folding schedule, the folding schedule is stored in matrix fold[3][3] e.g. fold[i][j] = 3 indicates to unfold dimension i onto dimension j. fold[i][i] has no meaning. For 3D case as here, there will be at most 2 non-zero, non-diagonal entries. Diagonal entries are useless here. Further more, when the 2 non-zero entries are in the same row, the virtual cartesian is unfolded from the row_id dimension onto the other dimensions in physical cartesian. when the 2 entries are in the same coloum, the virtual cartesian is actually folded from the physical cartesian. */static int find_fold( int nd1, int d1[], int nd2, int d2[], int fold[][3] ){ int neg_nfold=0, pos_nfold=0; int neg_fold=1, pos_fold=1; int i, j;#if 0 static int count=0;#endif /* initialize matrix */ for (i=0; i<3; i++) for (j=0; j<3; j++) fold[i][j] = 0; /* count requesting folds and folds can gives away. */ for (i=0; i<nd1; i++) { /* free folds */ if (d1[i] < d2[i]) { fold[i][i] = d2[i]/d1[i]; /* floor () */ pos_fold *= fold[i][i]; pos_nfold ++; } else /* needed folds */ if (d1[i] > d2[i]) { fold[i][i] = -(d1[i]+d2[i]-1)/d2[i]; /* roof () */ neg_fold *= (-fold[i][i]); neg_nfold ++; } } /* always: physical ndims >= virtual ndims */ for (i=nd1; i<nd2; i++) if (d2[i] > 1) { fold[i][i] = d2[i]; pos_fold *= fold[i][i]; pos_nfold ++; } /* requesting folds > available folds --> can not fold. */ if (neg_fold > pos_fold) return 1; /* Merge the negative folds and positive folds. For 3D case, there are following possible cases: 0 dimension requests folds; 1 dimension requests folds : must have 1 or 2 dimensions have free folds. 2 dimensions requests folds: must have 1 dimension has free folds. Previous test excludes cases that free folds are fewer than needing folds. */ /* no fold is needed */ if (!neg_nfold) { for (i=0; i<nd2; i++) fold[i][i] = 0; return 0; } else /* only one dimension NEEDS folds from other 1/2 dimensions */ if (neg_nfold == 1) { for (i=0; i<nd2; i++) if (fold[i][i] < 0) /* this is needy dimension */ { int ask = -fold[i][i]; /* now how many folds do you want */ for (j=0; j<nd2; j++) /* search through dimension to satisfy the need */ { if (j==i) continue; if (fold[j][j] > 0 && ask > 1) /* j dimension has some folds */ { /* j dimension can fully satisfy the left need */ if (fold[j][j] >= ask) { fold[j][i] = ask; ask = 0; } /* j dimension can partially satisfy the left need */ else { fold[j][i] = fold[j][j]; ask = (ask + fold[j][j] -1) / fold[j][j]; /* roof () */ } } } /* end of the try, still left some needs --> fail */ if (ask > 1) return 1; } } else /* only one dimension can GIVE folds to other 1/2 dimensions */ if (pos_nfold == 1) { for (i=0; i<nd2; i++) if (fold[i][i] > 0) /* this is the donor dimension */ { int has = fold[i][i]; /* how many folds can it give away */ for (j=0; j<nd2; j++) /* search other dimensions to try to give */ { if (j==i) continue; if (fold[j][j] < 0 && has > 1) /* j needs folds and i has some left */ { /* left free folds can satisfy j's request */ if (-fold[j][j] <= has) { fold[i][j] = -fold[j][j]; has = has / (-fold[j][j]); } /* donor broken */ else { has = 0; break; } } } /* end of the try, left deficit --> fail */ if (has < 1) return 1; } }#if 0 if (!count) { printf( "\t\tfold 1 = " ); for (i=0; i<3; i++) { for (j=0; j<3; j++) printf( "%4d", fold[i][j] ); printf( "; " ); } printf( "\n" ); } count ++;#endif return 0;}/* Core of the whole folding story. unfold dimension "dim_from" onto "dim_onto" in 3D setting. The coordinates on dim_from (Z) and dim_onto (X) are both updated. The coordinate of the other dimension (Y) does not change.*/static void unfold_3d( int pdims[3], int coord[3], int dim_from, int dim_onto, int folds ){ int layers = pdims[dim_from] / folds; int fold_num = coord[dim_from] / layers; int newz = coord[dim_from] % layers; if (fold_num % 2) /* reverse direction */ { coord[dim_onto] = fold_num * pdims[dim_onto] + (pdims[dim_onto]-1 - coord[dim_onto]); coord[dim_from] = layers-1 - newz; } else /* same direction */ { coord[dim_onto] = fold_num * pdims[dim_onto] + coord[dim_onto]; coord[dim_from] = newz; } pdims[dim_from] /= folds; pdims[dim_onto] *= folds;}/* perform_fold( vir_coord[], phy_coord[], fold[3][3] ) does the folding following the schedule given by fold[3][3]. */static void perform_fold( int nd1, int d1[], int c1[], int nd2, int d2[], int c2[], int perm[], int fold[][3] ){ int i, j, nf, fold_list[9][3], t, dd2[3]; /* fold[][] has 2 useful entries out of 9. Then it is a sparse matrix, right? */ nf = 0; for (i=0; i<3; i++) for (j=0; j<3; j++) if (j!=i && fold[i][j] > 1) { fold_list[nf][0] = fold[i][j]; fold_list[nf][1] = i; fold_list[nf][2] = j; nf ++; } /* 3x3 case, nf is 0, 1, 2 */ if (nf == 2) { /* When the 2 non-zero entries are in the same row, the virtual cartesian is unfolded from the row_id dimension onto the other dimensions in physical cartesian. Then UNFOLD the dimension with more folds first to reduce dialation. */ if (fold_list[0][1] == fold_list[1][1]) { if (fold_list[0][0] < fold_list[1][0]) { for (i=0; i<3; i++) { t = fold_list[0][i]; fold_list[0][i] = fold_list[1][i]; fold_list[1][i] = t; } } } /* When the 2 entries are in the same coloum, the virtual cartesian is actually folded from the physical cartesian. Then FOLD the dimension with less folds first to reduce dialation. */ else { if (fold_list[0][0] > fold_list[1][0]) { for (i=0; i<3; i++) { t = fold_list[0][i]; fold_list[0][i] = fold_list[1][i]; fold_list[1][i] = t; } } } for (i=0; i<3; i++) c1[i] = c2[perm[i]]; for (i=0; i<3; i++) dd2[i] = d2[perm[i]];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -