📄 ad_bgl_aggrs.c
字号:
from proc. i should be placed. This allows receives to be done without extra buffer. This can't be done if buftype is not contig. */ /* initialize buf_idx to -1 */ for (i=0; i < nprocs; i++) buf_idx[i] = -1; /* one pass just to calculate how much space to allocate for my_req; * contig_access_count was calculated way back in ADIOI_Calc_my_off_len() */ for (i=0; i < contig_access_count; i++) { /* When there is no data being processed, bypass this loop */ if (len_list[i] == 0) continue; off = offset_list[i]; fd_len = len_list[i]; /* note: we set fd_len to be the total size of the access. then * ADIOI_Calc_aggregator() will modify the value to return the * amount that was available from the file domain that holds the * first part of the access. */ proc = ADIOI_BGL_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size, fd_start, fd_end); count_my_req_per_proc[proc]++; /* figure out how much data is remaining in the access (i.e. wasn't * part of the file domain that had the starting byte); we'll take * care of this data (if there is any) in the while loop below. */ rem_len = len_list[i] - fd_len; while (rem_len > 0) { off += fd_len; /* point to first remaining byte */ fd_len = rem_len; /* save remaining size, pass to calc */ proc = ADIOI_BGL_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size, fd_start, fd_end); count_my_req_per_proc[proc]++; rem_len -= fd_len; /* reduce remaining length by amount from fd */ } }/* now allocate space for my_req, offset, and len */ *my_req_ptr = (ADIOI_Access *) ADIOI_Malloc(nprocs*sizeof(ADIOI_Access)); my_req = *my_req_ptr; count_my_req_procs = 0; for (i=0; i < nprocs; i++) { if (count_my_req_per_proc[i]) { my_req[i].offsets = (ADIO_Offset *) ADIOI_Malloc(count_my_req_per_proc[i] * sizeof(ADIO_Offset)); my_req[i].lens = (int *) ADIOI_Malloc(count_my_req_per_proc[i] * sizeof(int)); count_my_req_procs++; } my_req[i].count = 0; /* will be incremented where needed later */ }/* now fill in my_req */ curr_idx = 0; for (i=0; i<contig_access_count; i++) { /* When there is no data being processed, bypass this loop */ if (len_list[i] == 0) continue; off = offset_list[i]; fd_len = len_list[i]; proc = ADIOI_BGL_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size, fd_start, fd_end); /* for each separate contiguous access from this process */ if (buf_idx[proc] == -1) buf_idx[proc] = (int) curr_idx; l = my_req[proc].count; curr_idx += (int) fd_len; /* NOTE: Why is curr_idx an int? Fix? */ rem_len = len_list[i] - fd_len; /* store the proc, offset, and len information in an array * of structures, my_req. Each structure contains the * offsets and lengths located in that process's FD, * and the associated count. */ my_req[proc].offsets[l] = off; my_req[proc].lens[l] = (int) fd_len; my_req[proc].count++; while (rem_len > 0) { off += fd_len; fd_len = rem_len; proc = ADIOI_BGL_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size, fd_start, fd_end); if (buf_idx[proc] == -1) buf_idx[proc] = (int) curr_idx; l = my_req[proc].count; curr_idx += fd_len; rem_len -= fd_len; my_req[proc].offsets[l] = off; my_req[proc].lens[l] = (int) fd_len; my_req[proc].count++; } }#ifdef AGG_DEBUG for (i=0; i<nprocs; i++) { if (count_my_req_per_proc[i] > 0) { FPRINTF(stdout, "data needed from %d (count = %d):\n", i, my_req[i].count); for (l=0; l < my_req[i].count; l++) { FPRINTF(stdout, " off[%d] = %Ld, len[%d] = %d\n", l, my_req[i].offsets[l], l, my_req[i].lens[l]); } } }#if 0 for (i=0; i<nprocs; i++) { FPRINTF(stdout, "buf_idx[%d] = 0x%x\n", i, buf_idx[i]); }#endif#endif *count_my_req_procs_ptr = count_my_req_procs; *buf_idx_ptr = buf_idx;}/* * ADIOI_Calc_others_req * * param[in] count_my_req_procs Number of processes whose file domain my * request touches. * param[in] count_my_req_per_proc count_my_req_per_proc[i] gives the no. of * contig. requests of this process in * process i's file domain. * param[in] my_req A structure defining my request * param[in] nprocs Number of nodes in the block * param[in] myrank Rank of this node * param[out] count_others_req_proc_ptr Number of processes whose requests lie in * my process's file domain (including my * process itself) * param[out] others_req_ptr Array of other process' requests that lie * in my process's file domain */void ADIOI_BGL_Calc_others_req(ADIO_File fd, int count_my_req_procs, int *count_my_req_per_proc, ADIOI_Access *my_req, int nprocs, int myrank, int *count_others_req_procs_ptr, ADIOI_Access **others_req_ptr) {/* determine what requests of other processes lie in this process's file domain *//* count_others_req_procs = number of processes whose requests lie in this process's file domain (including this process itself) count_others_req_per_proc[i] indicates how many separate contiguous requests of proc. i lie in this process's file domain. */ int *count_others_req_per_proc, count_others_req_procs; int i; ADIOI_Access *others_req; /* Parameters for MPI_Alltoallv */ int *scounts, *sdispls, *rcounts, *rdispls; /* Parameters for MPI_Alltoallv. These are the buffers, which * are later computed to be the lowest address of all buffers * to be sent/received for offsets and lengths. Initialize to * the highest possible address which is the current minimum. */ void *sendBufForOffsets=(void*)0xFFFFFFFF, *sendBufForLens =(void*)0xFFFFFFFF, *recvBufForOffsets=(void*)0xFFFFFFFF, *recvBufForLens =(void*)0xFFFFFFFF; /* first find out how much to send/recv and from/to whom */ /* Send 1 int to each process. count_my_req_per_proc[i] is the number of * requests that my process will do to the file domain owned by process[i]. * Receive 1 int from each process. count_others_req_per_proc[i] is the number of * requests that process[i] will do to the file domain owned by my process. */ count_others_req_per_proc = (int *) ADIOI_Malloc(nprocs*sizeof(int));/* cora2a1=timebase(); */ MPI_Alltoall(count_my_req_per_proc, 1, MPI_INT, count_others_req_per_proc, 1, MPI_INT, fd->comm);/* total_cora2a+=timebase()-cora2a1; */ /* Allocate storage for an array of other nodes' accesses of our * node's file domain. Also allocate storage for the alltoallv * parameters. */ *others_req_ptr = (ADIOI_Access *) ADIOI_Malloc(nprocs*sizeof(ADIOI_Access)); others_req = *others_req_ptr; scounts = ADIOI_Malloc(nprocs*sizeof(int)); sdispls = ADIOI_Malloc(nprocs*sizeof(int)); rcounts = ADIOI_Malloc(nprocs*sizeof(int)); rdispls = ADIOI_Malloc(nprocs*sizeof(int)); /* If process[i] has any requests in my file domain, * initialize an ADIOI_Access structure that will describe each request * from process[i]. The offsets, lengths, and buffer pointers still need * to be obtained to complete the setting of this structure. */ count_others_req_procs = 0; for (i=0; i<nprocs; i++) { if (count_others_req_per_proc[i]) { others_req[i].count = count_others_req_per_proc[i]; others_req[i].offsets = (ADIO_Offset *) ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(ADIO_Offset)); others_req[i].lens = (int *) ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(int)); if ( (unsigned)others_req[i].offsets < (unsigned)recvBufForOffsets ) recvBufForOffsets = others_req[i].offsets; if ( (unsigned)others_req[i].lens < (unsigned)recvBufForLens ) recvBufForLens = others_req[i].lens; others_req[i].mem_ptrs = (MPI_Aint *) ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(MPI_Aint)); count_others_req_procs++; } else { others_req[i].count = 0; others_req[i].offsets = NULL; others_req[i].lens = NULL; } } /* Now send the calculated offsets and lengths to respective processes */ /************************/ /* Exchange the offsets */ /************************/ /* Determine the lowest sendBufForOffsets/Lens */ for (i=0; i<nprocs; i++) { if ( (my_req[i].count) && ((unsigned)my_req[i].offsets <= (unsigned)sendBufForOffsets) ) sendBufForOffsets = my_req[i].offsets; if ( (my_req[i].count) && ((unsigned)my_req[i].lens <= (unsigned)sendBufForLens) ) sendBufForLens = my_req[i].lens; } /* Calculate the displacements from the sendBufForOffsets/Lens */ for (i=0; i<nprocs; i++) { // Send these offsets to process i. scounts[i] = count_my_req_per_proc[i]; if ( scounts[i] == 0 ) sdispls[i] = 0; else sdispls[i] = ( (unsigned)my_req[i].offsets - (unsigned)sendBufForOffsets ) / sizeof(ADIO_Offset); // Receive these offsets from process i. rcounts[i] = count_others_req_per_proc[i]; if ( rcounts[i] == 0 ) rdispls[i] = 0; else rdispls[i] = ( (unsigned)others_req[i].offsets - (unsigned)recvBufForOffsets ) / sizeof(ADIO_Offset); } /* Exchange the offsets */ MPI_Alltoallv(sendBufForOffsets, scounts, sdispls, ADIO_OFFSET, recvBufForOffsets, rcounts, rdispls, ADIO_OFFSET, fd->comm); /************************/ /* Exchange the lengths */ /************************/ for (i=0; i<nprocs; i++) { // Send these lengths to process i. scounts[i] = count_my_req_per_proc[i]; if ( scounts[i] == 0 ) sdispls[i] = 0; else sdispls[i] = ( (unsigned)my_req[i].lens - (unsigned)sendBufForLens ) / sizeof(int); // Receive these offsets from process i. rcounts[i] = count_others_req_per_proc[i]; if ( rcounts[i] == 0 ) rdispls[i] = 0; else rdispls[i] = ( (unsigned)others_req[i].lens - (unsigned)recvBufForLens ) / sizeof(int); } /* Exchange the lengths */ MPI_Alltoallv(sendBufForLens, scounts, sdispls, MPI_INT, recvBufForLens, rcounts, rdispls, MPI_INT, fd->comm); /* Clean up */ ADIOI_Free(count_others_req_per_proc); ADIOI_Free (scounts); ADIOI_Free (sdispls); ADIOI_Free (rcounts); ADIOI_Free (rdispls); *count_others_req_procs_ptr = count_others_req_procs;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -