📄 ad_bgl_aggrs.c
字号:
ADIO_Offset n_gpfs_blk = fd_gpfs_range / blksize; ADIO_Offset nb_cn_small = n_gpfs_blk/naggs; ADIO_Offset naggs_large = n_gpfs_blk - naggs * (n_gpfs_blk/naggs); ADIO_Offset naggs_small = naggs - naggs_large; for (i=0; i<naggs; i++) if (i < naggs_small) fd_size[i] = nb_cn_small * blksize; else fd_size[i] = (nb_cn_small+1) * blksize;/* FPRINTF(stderr,"%s(%d): " "gpfs_ub %llu, " "gpfs_lb %llu, " "gpfs_ub_rdoff %llu, " "gpfs_lb_rdoff %llu, " "fd_gpfs_range %llu, " "n_gpfs_blk %llu, " "nb_cn_small %llu, " "naggs_large %llu, " "naggs_small %llu, " "\n", myname,__LINE__, gpfs_ub , gpfs_lb , gpfs_ub_rdoff, gpfs_lb_rdoff, fd_gpfs_range, n_gpfs_blk , nb_cn_small , naggs_large , naggs_small );*/ fd_size[0] -= gpfs_lb_rdoff; fd_size[naggs-1] -= gpfs_ub_rdoff; /* compute the file domain for each aggr */ ADIO_Offset offset = min_st_offset; for (aggr=0; aggr<naggs; aggr++) { fd_start[aggr] = offset; fd_end [aggr] = offset + fd_size[aggr] - 1; offset += fd_size[aggr]; } *fd_size_ptr = fd_size[0]; *min_st_offset_ptr = min_st_offset; ADIOI_Free (fd_size);}/* * deprecated *void ADIOI_BGL_GPFS_Calc_file_domain0(ADIO_Offset *st_offsets, ADIO_Offset *end_offsets, int nprocs, int nprocs_for_coll, ADIO_Offset *min_st_offset_ptr, ADIO_Offset **fd_start_ptr, ADIO_Offset **fd_end_ptr, ADIO_Offset *fd_size_ptr){ ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, *fd_size; int i;static int GPFS_BSIZE=1048576; * find the range of all the requests * min_st_offset = st_offsets [0]; max_end_offset = end_offsets[0]; for (i=1; i<nprocs; i++) { min_st_offset = ADIOI_MIN(min_st_offset, st_offsets[i]); max_end_offset = ADIOI_MAX(max_end_offset, end_offsets[i]); } * determine the "file domain (FD)" of each process, i.e., the portion of the file that will be "owned" by each process * * GPFS specific, pseudo starting/end point has to round to GPFS_BSIZE * ADIO_Offset gpfs_ub = (max_end_offset +GPFS_BSIZE-1) / GPFS_BSIZE * GPFS_BSIZE - 1; ADIO_Offset gpfs_lb = min_st_offset / GPFS_BSIZE * GPFS_BSIZE; ADIO_Offset gpfs_ub_rdoff = (max_end_offset +GPFS_BSIZE-1) / GPFS_BSIZE * GPFS_BSIZE - 1 - max_end_offset; ADIO_Offset gpfs_lb_rdoff = min_st_offset - min_st_offset / GPFS_BSIZE * GPFS_BSIZE; ADIO_Offset fd_gpfs_range = gpfs_ub - gpfs_lb + 1; * all computation of partition is based on the rounded pseudo-range * ADIO_Offset fds_ub = (fd_gpfs_range +nprocs_for_coll-1) / nprocs_for_coll; ADIO_Offset fds_lb = fd_gpfs_range / nprocs_for_coll; int naggs = nprocs_for_coll; int npsets = aggrsInPset[0]; * special meaning for element 0 * fd_size = (ADIO_Offset *) ADIOI_Malloc(naggs * sizeof(ADIO_Offset)); *fd_start_ptr = (ADIO_Offset *) ADIOI_Malloc(naggs * sizeof(ADIO_Offset)); *fd_end_ptr = (ADIO_Offset *) ADIOI_Malloc(naggs * sizeof(ADIO_Offset)); fd_start = *fd_start_ptr; fd_end = *fd_end_ptr; * some pre-computation to determine rough ratio of when to up-fit, when to low-fit * * 1. get the estimated data per pset * * 2. determine a factor between up and down * int avg_aggrsInPset = (naggs +npsets-1)/npsets; ADIO_Offset avg_bytes_perPset = fd_gpfs_range / npsets; ADIO_Offset resid = avg_bytes_perPset % GPFS_BSIZE; ADIO_Offset downr = GPFS_BSIZE - resid; int small = (resid < downr); int ratio = downr == 0 ? npsets + 2 : (resid +downr-1)/downr; if (small) ratio = resid == 0 ? npsets + 2 : (downr +resid-1)/resid; * go through aggrsInfo of all PSETs * ADIO_Offset fd_range = fd_gpfs_range; int aggr = 0, pset; for (pset=0; pset<npsets; pset++) { ADIO_Offset fds_try = fds_lb; int my_naggs = aggrsInPset[pset+1]; ADIO_Offset fds_pset; * Last pset will deal with the residuals * if (pset == npsets-1) fds_pset = fd_range; else { int cond1 = ((pset+1) % ratio == 0); int cond2 = ((pset+1) % ratio != 0); if (small) { int temp = cond1; cond1 = cond2; cond2 = temp; } if (cond1) { fds_pset = fds_try * my_naggs; if (fds_pset % GPFS_BSIZE) // align to GPFS_BSIZE fds_pset = ((fds_pset +GPFS_BSIZE-1)/GPFS_BSIZE) * GPFS_BSIZE; } if (cond2) { fds_try = fds_ub; fds_pset = fds_try * my_naggs; if (fds_pset % GPFS_BSIZE) // align to GPFS_BSIZE fds_pset = (fds_pset / GPFS_BSIZE) * GPFS_BSIZE; } } * for aggrs in each PSET, divide evenly the data range *#define CN_ALIGN 1#if !CN_ALIGN fd_range -= fds_pset; if ( pset == 0 ) fds_pset -= gpfs_lb_rdoff; if ( pset == npsets-1 ) fds_pset -= gpfs_ub_rdoff; int p; for (p=0; p<my_naggs; p++) { fd_size[aggr] = (fds_pset +my_naggs-1) / my_naggs; if (p== my_naggs-1) fd_size[aggr] -= (fd_size[aggr]*my_naggs - fds_pset); aggr++; }#else ADIO_Offset avg_bytes_perP = fds_pset / my_naggs; ADIO_Offset resid2 = avg_bytes_perP % GPFS_BSIZE; ADIO_Offset downr2 = GPFS_BSIZE - resid2; int small2 = (resid2 < downr2); int ratio2 = downr2 == 0 ? my_naggs + 2 : (resid2 +downr2-1)/downr2; if (small2) ratio2 = resid2 == 0 ? my_naggs + 2 : (downr2 +resid2-1)/resid2; ADIO_Offset accu = 0; int p; for (p=0; p<my_naggs; p++) { int cond1 = ((p+1) % ratio2 == 0); int cond2 = ((p+1) % ratio2 != 0); if (small2) { int temp = cond1; cond1 = cond2; cond2 = temp; } fd_size[aggr] = avg_bytes_perP; if (cond2) fd_size[aggr] = ((fd_size[aggr] +GPFS_BSIZE-1)/GPFS_BSIZE) * GPFS_BSIZE; if (cond1) fd_size[aggr] = ((fd_size[aggr] )/GPFS_BSIZE) * GPFS_BSIZE; if (p== my_naggs-1) fd_size[aggr] = (fds_pset - accu); accu += fd_size[aggr]; fd_range -= fd_size[aggr]; aggr++; }#endif } * after scheduling, the first and the last region has to remove the round-off effect *#if CN_ALIGN fd_size[0] -= gpfs_lb_rdoff; fd_size[naggs-1] -= gpfs_ub_rdoff;#endif * compute the file domain for each aggr * ADIO_Offset offset = min_st_offset; for (aggr=0; aggr<naggs; aggr++) { fd_start[aggr] = offset; fd_end [aggr] = offset + fd_size[aggr] - 1; offset += fd_size[aggr]; } * printf( "\t%6d : %12qd:%12qd, %12qd:%12qd:%12qd, %12qd:%12qd:%12qd\n", naggs, min_st_offset, max_end_offset, fd_start[0], fd_end [0], fd_size [0], fd_start[naggs-1], fd_end [naggs-1], fd_size [naggs-1] ); * *fd_size_ptr = fd_size[0]; *min_st_offset_ptr = min_st_offset; ADIOI_Free (fd_size);}*//* * When a process is an IO aggregator, this will return its index in the aggrs list. * Otherwise, this will return -1 */int ADIOI_BGL_Aggrs_index( ADIO_File fd, int myrank ){ int i; for (i=0; i<fd->hints->cb_nodes; i++) if (fd->hints->ranklist[i] == myrank) return i; return -1;}/* * This is more general aggregator search function which does not base on the assumption * that each aggregator hosts the file domain with the same size */int ADIOI_BGL_Calc_aggregator(ADIO_File fd, ADIO_Offset off, ADIO_Offset min_off, ADIO_Offset *len, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end){ int rank_index, rank; ADIO_Offset avail_bytes; AD_BGL_assert ( (off <= fd_end[fd->hints->cb_nodes-1] && off >= min_off && fd_start[0] >= min_off ) ); /* binary search --> rank_index is returned */ int ub = fd->hints->cb_nodes; int lb = 0; rank_index = fd->hints->cb_nodes / 2; while ( off < fd_start[rank_index] || off > fd_end[rank_index] ) { if ( off > fd_end [rank_index] ) { lb = rank_index; rank_index = (rank_index + ub) / 2; } else if ( off < fd_start[rank_index] ) { ub = rank_index; rank_index = (rank_index + lb) / 2; } } // printf ("ADIOI_BGL_Calc_aggregator: rank_index = %d\n", rank_index ); /* * remember here that even in Rajeev's original code it was the case that * different aggregators could end up with different amounts of data to * aggregate. here we use fd_end[] to make sure that we know how much * data this aggregator is working with. * * the +1 is to take into account the end vs. length issue. */ avail_bytes = fd_end[rank_index] + 1 - off; if (avail_bytes < *len && avail_bytes > 0) { /* this file domain only has part of the requested contig. region */ *len = avail_bytes; } /* map our index to a rank */ /* NOTE: FOR NOW WE DON'T HAVE A MAPPING...JUST DO 0..NPROCS_FOR_COLL */ rank = fd->hints->ranklist[rank_index]; return rank;}/* * ADIOI_BGL_Calc_my_req() overrides ADIOI_Calc_my_req for the default implementation * is specific for static file domain partitioning. * * ADIOI_Calc_my_req() calculate what portions of the access requests * of this process are located in the file domains of various processes * (including this one) */void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset *fd_start, ADIO_Offset *fd_end, ADIO_Offset fd_size, int nprocs, int *count_my_req_procs_ptr, int **count_my_req_per_proc_ptr, ADIOI_Access **my_req_ptr, int **buf_idx_ptr){ int *count_my_req_per_proc, count_my_req_procs, *buf_idx; int i, l, proc; ADIO_Offset fd_len, rem_len, curr_idx, off; ADIOI_Access *my_req; *count_my_req_per_proc_ptr = (int *) ADIOI_Calloc(nprocs,sizeof(int)); count_my_req_per_proc = *count_my_req_per_proc_ptr;/* count_my_req_per_proc[i] gives the no. of contig. requests of this process in process i's file domain. calloc initializes to zero. I'm allocating memory of size nprocs, so that I can do an MPI_Alltoall later on.*/ buf_idx = (int *) ADIOI_Malloc(nprocs*sizeof(int));/* buf_idx is relevant only if buftype_is_contig. buf_idx[i] gives the index into user_buf where data received
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -