📄 mpidi_cart_map_fold.c
字号:
unfold_3d( dd2, c1, fold_list[0][1], fold_list[0][2], fold_list[0][0] ); unfold_3d( dd2, c1, fold_list[1][1], fold_list[1][2], fold_list[1][0] ); } /* Z to X and Y stays the same, how nice, no dialation */ if (nf == 1) { for (i=0; i<3; i++) c1[i] = c2[perm[i]]; for (i=0; i<3; i++) dd2[i] = d2[perm[i]]; unfold_3d( dd2, c1, fold_list[0][1], fold_list[0][2], fold_list[0][0] ); } /* no fold, only permute the coordinate */ if (nf == 0) { for (i=0; i<3; i++) c1[i] = c2[perm[i]]; } return;}/* Main control of the folding mapping. 1. This routine only folds the 3 true dimensions. T dimension (if in virtual node mode) is handled specifically in the caller of this routine. 2. finished = perm_next( ndims, perm_array[ndims] ) gets the next permutation. It returns 1 when there is no next permutation. For ndims = 3, the permutation sequence is 0,1,2 --> 0,2,1 --> 1,0,2 --> 1,2,0 --> 2,0,1 --> 2,0,1 --> finished. 3. fail = find_fold( dims1[ndims1], dims2[ndims2], fold[3][3] ) searchs a folding schedule, the folding schedule is stored in matrix fold[3][3] e.g. fold[i][j] = 3 indicates to unfold dimension i onto dimension j. fold[i][i] has no meaning. For 3D case as here, there will be at most 2 non-zero, non-diagonal entries. Diagonal entries are useless here. Further more, when the 2 non-zero entries are in the same row, the virtual cartesian is unfolded from the row_id dimension onto the other dimensions in physical cartesian. when the 2 entries are in the same coloum, the virtual cartesian is actually folded from the physical cartesian. 4. perform_fold( vir_coord[], phy_coord[], fold[3][3] ) does the folding following the schedule given by fold[3][3]. */static int perm_dims_match( int nd1, int d1[], int c1[], int nd2, int d2[], int c2[] ){ int perm[3] = {0,1,2}; int fold[3][3] = {{0,0,0}, {0,0,0}, {0,0,0}}; int fail, finished; int dd2[3], i; fail = 1; finished = 0; while( !finished ) { for (i=0; i<3; i++) dd2[i] = d2[perm[i]]; fail = find_fold( nd1, d1, nd2, dd2, fold ); if (!fail) { break; } finished = perm_next( nd2, perm ); } if (fail) return 1; perform_fold( nd1, d1, c1, nd2, d2, c2, perm, fold ); return 0;}/* C_order means the right-most dimension is the fastest changing dimension. Of course, dims[3] is on the right of dims[0]. The cart utilities routines of MPICH2 follows this order; BG/L XYZT mapping following the reverse order (Fortran order). */void MPIDI_Cart_map_coord_to_rank( int size, int nd, int dims[], int cc[], int *newrank ){ int radix, i; *newrank = 0; radix = 1; for (i=nd-1; i>=0; i--) { if (cc[i] >= dims[i]) { /* outside vir_cart box */ *newrank = MPI_UNDEFINED; break; } *newrank += cc[i] * radix; radix *= dims[i]; } if (*newrank >= size) *newrank = MPI_UNDEFINED; return;}/* Try to map arbitrary 2D-4D requests onto 3D/4D mesh (rectangular communicator). The basic idea is like to fold a paper in both dimension into a 3D mesh. There do exist some edge loss when folding in both dimensions and therefore the mapping dialation can be greater than 1. The core operator is defined in routine "unfold_3d" which unfolds dim_X onto dim_Z with dim_Y unchanged. When starting from physical coordinates / dimensions, the operator is transitive. i.e., one can do unfold_3d( X, Z, dims[], coord[] ) unfold_3d( X, Y, dims[], coord[] ) And the dims[] and coord[] all changes to the new cartesian. Currently, limitation is only for 4D request. For 4D request, there has to be one dimension with size 2 to match the T dimension. This is because I do not fully understand folding on 4D cartesian. */int MPIDI_Cart_map_fold( MPIDI_VirtualCart *vir_cart, MPIDI_PhysicalCart *phy_cart, int *newrank ){ int notdone, i, j; int c1[3], d1[3], c2[3], d2[3], cc[3]; int vir_perm[4] = {0,1,2,3}; int phy_perm[4] = {0,1,2,3}; /* sort dimension in decreasing order to hope reduce the number of foldings. */ MPIDI_Cart_dims_sort( vir_cart->ndims, vir_cart->dims, vir_perm ); MPIDI_Cart_dims_sort( 3, phy_cart->dims, phy_perm ); notdone = 1; /* covers case: * 1. 4 = phy_cart->ndims > vir_cart->ndims > 1 * solution: * 1. try each vir_cart->dims[] * 2. vir_cart->dims[i] = roof (vir_cart->dims[i] / 2); * 3. try fold * 4. coord[i] = coord[i] * 2 + cpu_id */ if (phy_cart->ndims==4 && vir_cart->ndims<4) { for (i=vir_cart->ndims-1; i>=0; i--) { d1[i] = (vir_cart->dims[vir_perm[i]]+1)/2; for (j=0; j<vir_cart->ndims; j++) if (j!=i) d1[j] = vir_cart->dims[vir_perm[j]]; for (j=0; j<3; j++) { c2[j] = phy_cart->coord[phy_perm[j]] - phy_cart->start[phy_perm[j]]; d2[j] = phy_cart->dims [phy_perm[j]]; c1[j] = 0; } if (perm_dims_match( vir_cart->ndims, d1, c1, 3, d2, c2 )) continue; for (j=0; j<3; j++) if (j!=i) cc[vir_perm[j]] = c1[j]; cc[vir_perm[i]] = c1[i] * 2 + (phy_cart->coord[3] - phy_cart->start[3]); notdone = 0; break; } } /* covers cases: * 1. phy_cart->ndims == vir_cart->ndims == 4 * solution: remove the T dimension from both phy and vir cartesian. Then this case * becomes case 2. * 2. 3 = phy_cart->ndims >= vir_cart->ndims > 1 * solusion: just try fold. * */ else { int vir_ndims = vir_cart->ndims; if (vir_ndims == 4) { if (vir_cart->dims[vir_perm[3]] != 2) return 1; vir_ndims = 3; } for (j=0; j<vir_ndims; j++) d1[j] = vir_cart->dims[vir_perm[j]]; for (j=0; j<3; j++) { c2[j] = phy_cart->coord[phy_perm[j]] - phy_cart->start[phy_perm[j]]; d2[j] = phy_cart->dims [phy_perm[j]]; c1[j] = 0; } if (!perm_dims_match( vir_ndims, d1, c1, phy_cart->ndims, d2, c2 )) { for (j=0; j<3; j++) cc[vir_perm[j]] = c1[j]; notdone = 0; } } if (notdone) return notdone; /* C_order means the right-most dimension is the fastest changing dimension. Of course, dims[3] is on the right of dims[0]. The cart utilities routines of MPICH2 follows this order; BG/L XYZT mapping following the reverse order (Fortran order). */ MPIDI_Cart_map_coord_to_rank( vir_cart->size, vir_cart->ndims, vir_cart->dims, cc, newrank ); /* printf( "\t<%2d,%2d,%2d,%2d> to %4d (notdone = %d)\n", phy_cart->coord[0], phy_cart->coord[1], phy_cart->coord[2], phy_cart->coord[3], *newrank, notdone ); */ return notdone;}/*int main( int argc, char *argv[] ){ int perm[5] = {0,1,2,3,4}, next=0, cnt = 0, i, size=4; int fold[3][3]; int ret, c2[3], c1[3]; // int n1=450, nd1=3, d1[3] = {15,15,2}; // int n2=512, nd2=3, d2[3] = {8,8,8}; // int n1=343, nd1=3, d1[3] = {7,7,7}; // int n2=512, nd2=3, d2[3] = {16,16,2}; // int n1=343, nd1=3, d1[3] = {7,7,7}; // int n2=512, nd2=3, d2[3] = {64,4,2}; // int n1=465, nd1=2, d1[3] = {31,15}; // int n2=512, nd2=3, d2[3] = {64,4,2}; int n1=49, nd1=2, d1[3] = {3,5,1}; int n2=64, nd2=3, d2[3] = {2,2,6}; for (c2[0]=0; c2[0]<d2[0]; c2[0]++) for (c2[1]=0; c2[1]<d2[1]; c2[1]++) { for (c2[2]=0; c2[2]<d2[2]; c2[2]++) { ret = perm_dims_match( nd1, d1, c1, nd2, d2, c2 ); // printf( "ret = %d\n", ret ); printf( "<%2d/%2d,%2d/%2d,%2d/%2d> to <%2d/%2d,%2d/%2d,%2d/%2d>\n", c2[0], d2[0], c2[1], d2[1], c2[2], d2[2], c1[0], d1[0], c1[1], d1[1], c1[2], d1[2] ); // cnt ++; // if (cnt > 10) return 0; } printf( "\n" ); } return 0;}*//*int main( int argc, char *argv[] ){ int perm[5] = {0,1,2,3,4}, next=0, cnt = 0, i, size=4; int fold[3][3]; int ret; // int n1=450, nd1=3, d1[3] = {15,15,2}; // int n2=512, nd2=3, d2[3] = {8,8,8}; // int n1=343, nd1=3, d1[3] = {7,7,7}; // int n2=512, nd2=3, d2[3] = {16,16,2}; // int n1=343, nd1=3, d1[3] = {7,7,7}; // int n2=512, nd2=3, d2[3] = {64,4,2}; // int n1=465, nd1=2, d1[3] = {31,15}; // int n2=512, nd2=3, d2[3] = {64,4,2}; int n1=465, nd1=2, d1[3] = {31,15}; int n2=512, nd2=3, d2[3] = {64,8,1}; ret = find_fold( nd1, d1, nd2, d2, fold ); printf( "ret = %d\n", ret ); return 0;}*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -