📄 adapt.c
字号:
args012.sbuff = memtmp; /* args012.rbuff = memtmp1; */ } } Sync012(&args012); t0 = MPI_Wtime(); for (j = 0; j < nrepeat012; j++) { MPI_Recv(args012.rbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD, &status); MPI_Send(args012.sbuff, args012.bufflen, MPI_BYTE, args012.nbor2, MSG_TAG_012, MPI_COMM_WORLD); MPI_Recv(args012.rbuff, args012.bufflen, MPI_BYTE, args012.nbor2, MSG_TAG_012, MPI_COMM_WORLD, &status); MPI_Send(args012.sbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD); if (bNoCache) { args012.sbuff += args012.bufflen; /* args012.rbuff += args012.bufflen; */ } } t = (MPI_Wtime() - t0)/(2 * nrepeat012); } MPI_Bcast(&bwdata012[n].t, 1, MPI_DOUBLE, g_left_rank, MPI_COMM_WORLD); break; case RIGHT_PROCESS: bwdata012[n].t = LONGTIME; t1 = 0; for (i = 0; i < TRIALS; i++) { if (bNoCache) { if (bufalign != 0) { args012.sbuff = memtmp + ((bufalign - (POINTER_TO_INT(args012.sbuff) % bufalign) + bufoffset) % bufalign); /* args012.rbuff = memtmp1 + ((bufalign - ((MPI_Aint)args012.rbuff % bufalign) + bufoffset) % bufalign); */ } else { args012.sbuff = memtmp; /* args012.rbuff = memtmp1; */ } } Sync012(&args012); t0 = MPI_Wtime(); for (j = 0; j < nrepeat012; j++) { MPI_Recv(args012.rbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD, &status); MPI_Send(args012.sbuff, args012.bufflen, MPI_BYTE, args012.nbor, MSG_TAG_012, MPI_COMM_WORLD); if (bNoCache) { args012.sbuff += args012.bufflen; /* args012.rbuff += args012.bufflen; */ } } t = (MPI_Wtime() - t0)/(2 * nrepeat012); } MPI_Bcast(&bwdata012[n].t, 1, MPI_DOUBLE, g_left_rank, MPI_COMM_WORLD); break; } tlast012 = bwdata012[n].t; bwdata012[n].bits = args012.bufflen * CHARSIZE; bwdata012[n].bps = bwdata012[n].bits / (bwdata012[n].t * 1024 * 1024); bwdata012[n].repeat = nrepeat012; if (g_nIproc == 0) { if (bSavePert) { if (bUseMegaBytes) fprintf(out, "%f\t%0.9f\n", bwdata012[n].bps / 8, bwdata012[n].t); else fprintf(out, "%f\t%0.9f\n", bwdata012[n].bps, bwdata012[n].t); fflush(out); } } free(memtmp); free(memtmp1); if (g_nIproc == 0 && printopt) { if (bUseMegaBytes) printf(" %6.2f MBps in %0.9f sec\n", bwdata012[n].bps / 8, tlast012); else printf(" %6.2f Mbps in %0.9f sec\n", bwdata012[n].bps, tlast012); fflush(stdout); }#endif } /* End of perturbation loop */ if (!bSavePert)/* && g_nIproc == 0)*/ { /* if we didn't save all of the perturbation loops, find the max and save it */ int index01 = 1, index12 = 1, index012 = 1; double dmax01 = bwdata01[n-1].bps; double dmax12 = bwdata12[n-1].bps;#ifdef CREATE_SINGLE_CURVE double dmax012 = bwdata012[n-1].bps;#endif for (; ipert > 1; ipert--) { if (bwdata01[n-ipert].bps > dmax01) { index01 = ipert; dmax01 = bwdata01[n-ipert].bps; } if (bwdata12[n-ipert].bps > dmax12) { index12 = ipert; dmax12 = bwdata12[n-ipert].bps; }#ifdef CREATE_SINGLE_CURVE if (bwdata012[n-ipert].bps > dmax012) { index012 = ipert; dmax012 = bwdata012[n-ipert].bps; }#endif } /* get the left stuff out */ MPI_Bcast(&index01, 1, MPI_INT, g_left_rank, MPI_COMM_WORLD); MPI_Bcast(&bwdata01[n-index01].bits, 1, MPI_INT, g_left_rank, MPI_COMM_WORLD); MPI_Bcast(&bwdata01[n-index01].bps, 1, MPI_DOUBLE, g_left_rank, MPI_COMM_WORLD); MPI_Bcast(&bwdata01[n-index01].t, 1, MPI_DOUBLE, g_left_rank, MPI_COMM_WORLD); /* get the right stuff out */ MPI_Bcast(&index12, 1, MPI_INT, g_middle_rank, MPI_COMM_WORLD); MPI_Bcast(&bwdata12[n-index12].bps, 1, MPI_DOUBLE, g_middle_rank, MPI_COMM_WORLD); MPI_Bcast(&bwdata12[n-index12].t, 1, MPI_DOUBLE, g_middle_rank, MPI_COMM_WORLD); if (g_nIproc == 0) { if (bUseMegaBytes) { fprintf(out, "%d\t%f\t%0.9f\t", bwdata01[n-index01].bits / 8, bwdata01[n-index01].bps / 8, bwdata01[n-index01].t); fprintf(out, "%f\t%0.9f\t", bwdata12[n-index12].bps / 8, bwdata12[n-index12].t);#ifdef CREATE_SINGLE_CURVE fprintf(out, "%f\t%0.9f\n", bwdata012[n-index012].bps / 8, bwdata012[n-index012].t);#endif } else { fprintf(out, "%d\t%f\t%0.9f\t", bwdata01[n-index01].bits / 8, bwdata01[n-index01].bps, bwdata01[n-index01].t); fprintf(out, "%f\t%0.9f\t", bwdata12[n-index12].bps, bwdata12[n-index12].t);#ifdef CREATE_SINGLE_CURVE fprintf(out, "%f\t%0.9f\n", bwdata012[n-index012].bps, bwdata012[n-index012].t);#endif }#ifdef CREATE_DIFFERENCE_CURVES for (itrial = 0; itrial < ntrials && dtrials[itrial] != LONGTIME; itrial++) { fprintf(out, "%0.9f\t", dtrials[itrial]); } fprintf(out, "\n");#endif fflush(out); } } } /* End of main loop */ if (g_nIproc == 0) fclose(out); /* THE_END: */ MPI_Finalize(); free(bwdata01); free(bwdata12); free(bwdata012); return 0;}int Setup(int middle_rank, ArgStruct *p01, ArgStruct *p12, ArgStruct *p012){ char s[255]; int len = 255; p01->iproc = p12->iproc = p012->iproc = g_nIproc; MPI_Get_processor_name(s, &len); /*gethostname(s, len);*/ printf("%d: %s\n", p01->iproc, s); fflush(stdout); switch (middle_rank) { case 0: switch (g_nIproc) { case 0: g_proc_loc = MIDDLE_PROCESS; p01->nbor = 2; p01->tr = FALSE; p12->nbor = 1; p12->tr = TRUE; p012->nbor = 2; p012->nbor2 = 1; break; case 1: g_proc_loc = RIGHT_PROCESS; p01->nbor = -1; p01->tr = FALSE; p12->nbor = 0; p12->tr = FALSE; p012->nbor = 0; p012->nbor2 = -1; break; case 2: g_proc_loc = LEFT_PROCESS; p01->nbor = 0; p01->tr = TRUE; p12->nbor = -1; p12->tr = FALSE; p012->nbor = 0; p012->nbor2 = -1; break; } g_left_rank = 2; g_middle_rank = 0; g_right_rank = 1; break; case 1: switch (g_nIproc) { case 0: g_proc_loc = LEFT_PROCESS; p01->nbor = 1; p01->tr = TRUE; p12->nbor = -1; p12->tr = FALSE; p012->nbor = 1; p012->nbor2 = -1; break; case 1: g_proc_loc = MIDDLE_PROCESS; p01->nbor = 0; p01->tr = FALSE; p12->nbor = 2; p12->tr = TRUE; p012->nbor = 0; p012->nbor2 = 2; break; case 2: g_proc_loc = RIGHT_PROCESS; p01->nbor = -1; p01->tr = FALSE; p12->nbor = 1; p12->tr = FALSE; p012->nbor = 1; p012->nbor2 = -1; break; } g_left_rank = 0; g_middle_rank = 1; g_right_rank = 2; break; case 2: switch (g_nIproc) { case 0: g_proc_loc = RIGHT_PROCESS; p01->nbor = -1; p01->tr = FALSE; p12->nbor = 2; p12->tr = FALSE; p012->nbor = 2; p012->nbor2 = -1; break; case 1: g_proc_loc = LEFT_PROCESS; p01->nbor = 2; p01->tr = TRUE; p12->nbor = -1; p12->tr = FALSE; p012->nbor = 2; p012->nbor2 = -1; break; case 2: g_proc_loc = MIDDLE_PROCESS; p01->nbor = 1; p01->tr = FALSE; p12->nbor = 0; p12->tr = TRUE; p012->nbor = 1; p012->nbor2 = 0; break; } g_left_rank = 1; g_middle_rank = 2; g_right_rank = 0; break; } return 1; } void Sync(ArgStruct *p){ MPI_Status status; if (p->tr) { MPI_Send(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD); MPI_Recv(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD, &status); MPI_Send(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD); } else { MPI_Recv(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD, &status); MPI_Send(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD); MPI_Recv(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD, &status); }}void Sync012(ArgStruct *p){ MPI_Status status; switch (g_proc_loc) { case LEFT_PROCESS: MPI_Send(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD); MPI_Recv(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD, &status); MPI_Send(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD); break; case MIDDLE_PROCESS: MPI_Recv(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD, &status); MPI_Send(NULL, 0, MPI_BYTE, p->nbor2, 1, MPI_COMM_WORLD); MPI_Recv(NULL, 0, MPI_BYTE, p->nbor2, 1, MPI_COMM_WORLD, &status); MPI_Send(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD); MPI_Recv(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD, &status); MPI_Send(NULL, 0, MPI_BYTE, p->nbor2, 1, MPI_COMM_WORLD); break; case RIGHT_PROCESS: MPI_Recv(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD, &status); MPI_Send(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD); MPI_Recv(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD, &status); break; }}int DetermineLatencyReps(ArgStruct *p){ MPI_Status status; double t0, duration = 0; int reps = 1, prev_reps = 0; int i; /* prime the send/receive pipes */ Sync(p); Sync(p); Sync(p); /* test how long it takes to send n messages * where n = 1, 2, 4, 8, 16, 32, ... */ t0 = MPI_Wtime(); t0 = MPI_Wtime(); t0 = MPI_Wtime(); while ( (duration < RUNTM) || (duration < MAX_LAT_TIME && reps < 1000)) { t0 = MPI_Wtime(); for (i=0; i<reps-prev_reps; i++) { Sync(p); } duration += MPI_Wtime() - t0; prev_reps = reps; reps = reps * 2; /* use duration from the root only */ if (p->tr) MPI_Send(&duration, 1, MPI_DOUBLE, p->nbor, 2, MPI_COMM_WORLD); else MPI_Recv(&duration, 1, MPI_DOUBLE, p->nbor, 2, MPI_COMM_WORLD, &status); } return reps;}int DetermineLatencyReps012(ArgStruct *p){ double t0, duration = 0; int reps = 1, prev_reps = 0; int i; /* prime the send/receive pipes */ Sync012(p); Sync012(p); Sync012(p); /* test how long it takes to send n messages * where n = 1, 2, 4, 8, 16, 32, ... */ t0 = MPI_Wtime(); t0 = MPI_Wtime(); t0 = MPI_Wtime(); while ( (duration < RUNTM) || (duration < MAX_LAT_TIME && reps < 1000)) { t0 = MPI_Wtime(); for (i=0; i<reps-prev_reps; i++) { Sync012(p); } duration += MPI_Wtime() - t0; prev_reps = reps; reps = reps * 2; /* use duration from the root only */ MPI_Bcast(&duration, 1, MPI_DOUBLE, g_left_rank, MPI_COMM_WORLD); } return reps;}double TestLatency(ArgStruct *p){ double latency, t0, min_latency = LONGTIME; int i, j; MPI_Status status; char str[100]; /* calculate the latency between rank 0 and rank 1 */ p->latency_reps = DetermineLatencyReps(p); if (/*p->latency_reps < 1024 &&*/ p->tr) { if (g_proc_loc == LEFT_PROCESS) { sprintf(str, "%d <-> %d ", p->iproc, p->nbor); } else { sprintf(str, " %d <-> %d", p->iproc, p->nbor); } /*printf("To determine %s latency, using %d reps\n", p->iproc == 0 ? "0 -> 1 " : " 1 -> 2", p->latency_reps);*/ printf("To determine %s latency, using %d reps.\n", str, p->latency_reps); fflush(stdout); } for (j=0; j<TRIALS; j++) { Sync(p); t0 = MPI_Wtime(); t0 = MPI_Wtime(); t0 = MPI_Wtime(); t0 = MPI_Wtime(); for (i = 0; i < p->latency_reps; i++) { if (p->tr) { MPI_Send(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD); MPI_Recv(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD, &status); } else { MPI_Recv(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD, &status); MPI_Send(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD); } } latency = (MPI_Wtime() - t0)/(2 * p->latency_reps); min_latency = MIN(min_latency, latency); } return min_latency;}double TestLatency012(ArgStruct *p){ double latency, t0, min_latency = LONGTIME; int i, j; MPI_Status status; g_latency012_reps = DetermineLatencyReps012(p); if (g_proc_loc == MIDDLE_PROCESS) { printf("To determine %d <-- %d --> %d latency, using %d reps\n", p->nbor, p->iproc, p->nbor2, g_latency012_reps); fflush(stdout); } for (j=0; j<TRIALS; j++) { Sync012(p); t0 = MPI_Wtime(); t0 = MPI_Wtime(); t0 = MPI_Wtime(); t0 = MPI_Wtime(); for (i = 0; i < g_latency012_reps; i++) { switch (g_proc_loc) { case LEFT_PROCESS: MPI_Send(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD); MPI_Recv(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD, &status); break; case MIDDLE_PROCESS: MPI_Recv(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD, &status); MPI_Send(NULL, 0, MPI_BYTE, p->nbor2, 1, MPI_COMM_WORLD); MPI_Recv(NULL, 0, MPI_BYTE, p->nbor2, 1, MPI_COMM_WORLD, &status); MPI_Send(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD); break; case RIGHT_PROCESS: MPI_Recv(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD, &status); MPI_Send(NULL, 0, MPI_BYTE, p->nbor, 1, MPI_COMM_WORLD); break; } } latency = (MPI_Wtime() - t0)/(2 * g_latency012_reps); min_latency = MIN(min_latency, latency); } return min_latency;}void SendTime(ArgStruct *p, double *t){ MPI_Send(t, 1, MPI_DOUBLE, p->nbor, 2, MPI_COMM_WORLD);}void RecvTime(ArgStruct *p, double *t){ MPI_Status status; MPI_Recv(t, 1, MPI_DOUBLE, p->nbor, 2, MPI_COMM_WORLD, &status);}void SendReps(ArgStruct *p, int *rpt){ MPI_Send(rpt, 1, MPI_INT, p->nbor, 2, MPI_COMM_WORLD);}void RecvReps(ArgStruct *p, int *rpt){ MPI_Status status; MPI_Recv(rpt, 1, MPI_INT, p->nbor, 2, MPI_COMM_WORLD, &status);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -