📄 mmv.c
字号:
/***************************************************************************//* mmv.c - parallel MPI message passing SMVP kernel *//* *//* Spark98 Kernels *//* Copyright (C) David O'Hallaron 1998 *//* *//* You are free to use this software without restriction. If you find that *//* the suite is helpful to you, it would be very helpful if you sent me a *//* note at droh@cs.cmu.edu letting me know how you are using it. *//***************************************************************************/ #include <stdio.h>#include <stdlib.h>#include <sys/time.h>#include <sys/stat.h>#include <mpi.h>/* * program wide constants */#define DEFAULT_ITERS 20 /* default number of SMVP operations */#define STRLEN 128 /* default string length */#define DOF 3 /* degrees of freedom in underlying simulation */#define SEND_CMD 10 /* used by i/o master to ask for data from slaves */#define INVALID -9999 /* marks uninitialized data *//* * global variables */struct gi { /* misc */ char progname[STRLEN]; /* program name */ FILE *packfile; /* file descriptor for pack file */ /* command line options */ int quiet; /* run quietly unless there are errors (-Q) */ int iters; /* number of times to perform SMVP (-i<d>) */ int output; /* print the result vector? (-o) */ char packfilename[STRLEN]; /* input packfile name */ /* problem parameters */ int globalnodes; /* number of global nodes */ int globalelems; /* number of global elements */ int mesh_dim; /* mesh dimension (3) */ int corners; /* nodes per element (4) */ int subdomains; /* number of partition sets */ int processors; /* not used */ int nodes; /* total number of local nodes on this PE */ int mine; /* nodes this subdomain owns */ int priv; /* unshared nodes priv <= mine <= nodes */ int *nodeall; /* total nodes on each PE */ int maxnodes; /* max nodes on any PE */ int elems; /* number of local elements */ int matrixlen; /* number of local nonzero block entries */ int friends; /* number of PEs I communicate with */ int commlen; /* total communicated nodes on this PE */ int maxcommnodes; /* max nodes communicated by any PE */ /* * global data structures */ double (*coord)[DOF]; /* geometric node coordinates */ int (*vertex)[4]; /* nodes associated with each element */ int *globalnode; /* local-to-global node mapping function */ int *globalelem; /* local-to-global element mapping function */ /* sparse matrix in compressed sparse row format */ int *matrixcol; /* K[i] is in column matrixcol[i] */ int *matrixindex; /* row i starts at K[matrixindex[i]] */ /* communication schedule */ int *comm; /* local node number to communicate */ int *commindex; /* subdomain i starts at comm[commindex[i]] */ /* status and request vectors for communication phase */ MPI_Status *statusvec; MPI_Request *requestvec; /* send and receive buffers for communication phase */ double *sendbuf; double *recvbuf; /* * the following are determined by MPI calls * there are two groups: rgroup (runtime group) and and cgroup * (compute subgroup). cgroup is a subset of rgroup. We need two * groups because the number of subdomains might be different than * the number of PE's, e.g., T3D programs must run on a power of * two PE's. */ /* runtime group */ MPI_Group rgroup; /* runtime group 0, ..., rsize-1 */ int rsize; /* total number of processors in runtime group */ /* compute subgroup */ MPI_Group cgroup; /* compute subgroup 0, .., asize-2 */ MPI_Comm ccomm; /* compute subgroup communicator */ int csize; /* size of compute subgroup */ /* my rank and the rank of the io processor */ int rank; /* my rank */ int ioproc; /* rank of the i/o proc */} gi, *gip=&gi;/* timing variables */double starttime, startctime;double secs, csecs;/* w1 = K1 * v1 */double (*K1)[DOF][DOF]; /* sparse matrix */double (*v1)[DOF]; /* dense vector */double (*w1)[DOF]; /* dense vector *//* w2 = K2 * v2 */double (*K2)[DOF][DOF]; /* sparse matrix */double (*v2)[DOF]; /* dense vector */double (*w2)[DOF]; /* dense vector *//* end globals *//* * function prototypes *//* initialization and exit routines */void init(int argc, char **argv, struct gi *gip);void parsecommandline(int argc, char **argv, struct gi *gip);void readpackfile(struct gi *gip); void bail(struct gi *gip);void finalize(struct gi *gip);/* routines that read and parse the packfile */void load_pack(struct gi *gip);void load_global_master(struct gi *gip);void load_global_slave(struct gi *gip);void load_nodes_master(struct gi *gip);void load_nodes_slave(struct gi *gip);void load_elems_master(struct gi *gip);void load_elems_slave(struct gi *gip);void load_matrix_master(struct gi *gip);void load_matrix_slave(struct gi *gip);void load_comm_master(struct gi *gip);void load_comm_slave(struct gi *gip);/* routine that assembles the sparse matrix and initial vector */void assemble_matrix(double (*K)[DOF][DOF], struct gi *gip);void assemble_vector(double (*v)[DOF], struct gi *gip);/* sparse matrix vector multiply routines */void smvpthread(struct gi *gip);void zero_vector(double (*v)[DOF], int firstrow, int numrows);void local_smvp(int nodes, double (*A)[DOF][DOF], int *Acol, int *Aindex, double (*v)[DOF], double (*w)[DOF], int firstrow, int numrows);void full_assemble(double (*v)[DOF], struct gi *gip);/* misc routines */void info(struct gi *gip);void usage(struct gi *gip);void prglobal(struct gi *gip);void prnodes(struct gi *gip);void prelems(struct gi *gip);void prmatrix(struct gi *gip);void prcomm(struct gi *gip);void local_printmatrix3(double (*A)[DOF][DOF], struct gi *gip);void local_printvec3(double (*v)[DOF], int n, struct gi *gip);void printnodevector(double (*v)[DOF], int n, struct gi *gip);/* * main program */void main(int argc, char **argv) { int i, j; double mflops, global_mflops; double max_secs, max_csecs; int maxnonzeros, minnonzeros; init(argc, argv, gip); /* * allocate contiguous storage for the matrix and vectors */ if (!(K1 = (void *)malloc((gip->matrixlen+1)*DOF*DOF*sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc K1(%d)\n", gip->progname, gip->matrixlen); bail(gip); } if (!(K2 = (void *)malloc((gip->matrixlen+1)*DOF*DOF*sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc K2(%d)\n", gip->progname, gip->matrixlen); bail(gip); } if (!(v1 = (void *)malloc(gip->nodes * DOF * sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc v1(%d)\n", gip->progname, gip->nodes); bail(gip); } if (!(v2 = (void *)malloc(gip->nodes * DOF * sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc v2(%d)\n", gip->progname, gip->nodes); bail(gip); } if (!(w1 = (void *)malloc(gip->nodes * DOF * sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc w1(%d)\n", gip->progname, gip->nodes); bail(gip); } if (!(w2 = (void *)malloc(gip->nodes * DOF * sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc w2(%d)\n", gip->progname, gip->nodes); bail(gip); } /* * generate the sparse matrix coefficients */ MPI_Barrier(gip->ccomm); if (gip->rank == 0 && !gip->quiet) fprintf(stderr, "%s: Computing sparse matrix coefficients.", gip->progname); assemble_matrix(K1, gip); assemble_matrix(K2, gip); assemble_vector(v1, gip); assemble_vector(v2, gip); if (!gip->quiet && gip->rank == 0) { fprintf(stderr, " Done.\n"); fflush(stderr); } /* * Do the actual SMVP operation */ if (gip->rank == 0 && !gip->quiet) { fprintf(stderr, "%s: Performing %d SMVP pairs (n=%d) on %d PEs.", gip->progname, gip->iters, gip->globalnodes, gip->subdomains); } smvpthread(gip); if (!gip->quiet && gip->rank == 0) { fprintf(stderr, " Done.\n"); fflush(stderr); } if (secs == 0.0) { fprintf(stderr, "error: no measured elapsed time. Use more iterations (e.g., -i%d)\n", gip->iters*10); bail(gip); } /* * Summarize performance */ MPI_Barrier(gip->ccomm); mflops = (double)((2.0*gip->matrixlen - gip->nodes) /* nonzero blocks */ *DOF*DOF /* DOF^2 numbers/block */ *2.0) /* 2 flops/block */ / 1000000.0; /* get the total number of mflops */ MPI_Reduce(&mflops, &global_mflops, 1, MPI_DOUBLE, MPI_SUM, 0, gip->ccomm); MPI_Bcast(&global_mflops, 1, MPI_DOUBLE, 0, gip->ccomm); /* get the longest execution time */ MPI_Reduce(&secs, &max_secs, 1, MPI_DOUBLE, MPI_MAX, 0, gip->ccomm); MPI_Bcast(&max_secs, 1, MPI_DOUBLE, 0, gip->ccomm); MPI_Reduce(&csecs, &max_csecs, 1, MPI_DOUBLE, MPI_MAX, 0, gip->ccomm); MPI_Bcast(&max_csecs, 1, MPI_DOUBLE, 0, gip->ccomm); /* get the maximum and minimum loads */ MPI_Reduce(&gip->matrixlen, &maxnonzeros, 1, MPI_INT, MPI_MAX, 0, gip->ccomm); MPI_Bcast(&maxnonzeros, 1, MPI_INT, 0, gip->ccomm); MPI_Reduce(&gip->matrixlen, &minnonzeros, 1, MPI_INT, MPI_MIN, 0, gip->ccomm); MPI_Bcast(&minnonzeros, 1, MPI_INT, 0, gip->ccomm); if (gip->rank == 0) { fprintf(stderr, "%s: %s %.6lf Mf %.6lf s [%.6lf/%.6lf/%.0lf%%] %.1lf Mf/s [%d/%d/%.0lf%%]\n", gip->progname, gip->packfilename, global_mflops, max_secs, max_secs - max_csecs, max_csecs, ((max_secs-max_csecs)/max_secs)*100.0, global_mflops/max_secs, minnonzeros, maxnonzeros, (double)((double)minnonzeros/(double)maxnonzeros)*100.0); } /* * print results if asked for */ if (gip->output) { printnodevector(w1, gip->globalnodes, gip); } /* * Clean up */ if (gip->rank == 0 && !gip->quiet) { fprintf(stderr, "%s: Done.\n", gip->progname); } finalize(gip);}void smvpthread(struct gi *gip) { int i; MPI_Barrier(gip->ccomm); csecs = 0.0; starttime = MPI_Wtime(); for (i=0; i<gip->iters; i++) { /* w1 = K1 * v1 */ zero_vector(w1, 0, gip->nodes); local_smvp(gip->nodes, K1, gip->matrixcol, gip->matrixindex, v1, w1, 0, gip->nodes); startctime = MPI_Wtime(); full_assemble(w1, gip); csecs += (MPI_Wtime() - startctime); /* w2 = K2 * v2 */ zero_vector(w2, 0, gip->nodes); local_smvp(gip->nodes, K2, gip->matrixcol, gip->matrixindex, v2, w2, 0, gip->nodes); startctime = MPI_Wtime(); full_assemble(w2, gip); csecs += (MPI_Wtime() - startctime); } secs = (MPI_Wtime() - starttime)/(gip->iters*2.0); csecs = csecs / (gip->iters*2.0);}/* * assemble_matrix - assemble the local sparse matrix */void assemble_matrix(double (*K)[DOF][DOF], struct gi *gip) { int i, j, k, l, m; int temp1, elem; for (i = 0; i < gip->matrixlen; i++) { for (j = 0; j < DOF; j++) { for (k = 0; k < DOF; k++) { K[i][j][k] = 0.0; } } } /* * add the contribution of each element to K */ for (elem = 0; elem < gip->elems; elem++) { for (j = 0; j < gip->corners; j++) { for (k = 0; k < gip->corners; k++) { if (gip->vertex[elem][j] < gip->vertex[elem][k]) { temp1 = gip->matrixindex[gip->vertex[elem][j]]; while (gip->matrixcol[temp1] != gip->vertex[elem][k]) { temp1++; if (temp1 >= gip->matrixindex[gip->vertex[elem][k] + 1]) { fprintf(stderr, "%s: K indexing error in assemble %d %d\n", gip->progname, temp1, gip->vertex[elem][k]); bail(gip); } } for (l = 0; l < 3; l++) { for (m = 0; m < 3; m++) { K[temp1][l][m]++; } } } } } } #ifdef DEBUGASSEMBLE local_printmatrix3(K, gip);#endif}/* * assemble_vector - assemble the local v vector */void assemble_vector(double (*v)[DOF], struct gi *gip) { int i, j; for (i = 0; i < gip->nodes; i++) { for (j = 0; j < DOF; j++) { v[i][j] = 1.0; } } #ifdef DEBUGASSEMBLE local_printvec3(v, gip->nodes, gip);#endif}/* * zero_vector - clear a portion of a vector */void zero_vector(double (*v)[DOF], int firstrow, int numrows) { int i; for (i=firstrow; i<(firstrow+numrows); i++) { v[i][0] = 0.0; v[i][1] = 0.0; v[i][2] = 0.0; }}/* * local_smvp - local sparse matrix vector product w = K*v * v and w are vectors of elements of size n x DOF. * smvp is computed starting at row firstrow (zero based) * and for numrows rows. * * K is a block symmetric sparse matrix which is stored * in compressed sparse row format as three vectors: * - A is a coefficient vector of elements of size DOFxDOF. Only * the upper right triangle is stored. * - Acol[i] is the column number of the coefficient in K[i]. * - Row i consists of elements A[Acol[i]]...A[Acol[i+1]-1]. */void local_smvp(int nodes, double (*A)[DOF][DOF], int *Acol, int *Aindex, double (*v)[DOF], double (*w)[DOF], int firstrow, int numrows) { int i; int Anext, Alast, col; double sum0, sum1, sum2; for (i = firstrow; i < (firstrow + numrows); i++) { Anext = Aindex[i]; Alast = Aindex[i + 1]; sum0 = A[Anext][0][0]*v[i][0] + A[Anext][0][1]*v[i][1] + A[Anext][0][2]*v[i][2]; sum1 = A[Anext][1][0]*v[i][0] + A[Anext][1][1]*v[i][1] + A[Anext][1][2]*v[i][2]; sum2 = A[Anext][2][0]*v[i][0] + A[Anext][2][1]*v[i][1] + A[Anext][2][2]*v[i][2]; Anext++; while (Anext < Alast) { col = Acol[Anext]; sum0 += A[Anext][0][0]*v[col][0] + A[Anext][0][1]*v[col][1] + A[Anext][0][2]*v[col][2]; sum1 += A[Anext][1][0]*v[col][0] + A[Anext][1][1]*v[col][1] + A[Anext][1][2]*v[col][2]; sum2 += A[Anext][2][0]*v[col][0] + A[Anext][2][1]*v[col][1] + A[Anext][2][2]*v[col][2]; w[col][0] += A[Anext][0][0]*v[i][0] + A[Anext][1][0]*v[i][1] + A[Anext][2][0]*v[i][2]; w[col][1] += A[Anext][0][1]*v[i][0] + A[Anext][1][1]*v[i][1] + A[Anext][2][1]*v[i][2]; w[col][2] += A[Anext][0][2]*v[i][0] + A[Anext][1][2]*v[i][1] + A[Anext][2][2]*v[i][2]; Anext++; } w[i][0] += sum0; w[i][1] += sum1; w[i][2] += sum2; }}/* * full_assemble - assemble a distributed vector */void full_assemble(double (*v)[DOF], struct gi *gip) { int i, j, friend, cnt; MPI_Barrier(gip->ccomm); friend = 0; for (i = 0; i < gip->subdomains; i++) { if (gip->commindex[i] < gip->commindex[i+1]) { MPI_Irecv(&gip->recvbuf[gip->commindex[i]*3], (gip->commindex[i+1]*3) - (gip->commindex[i]*3), MPI_DOUBLE, i, MPI_ANY_TAG, gip->ccomm, &gip->requestvec[friend]); friend++; } } for (i = 0; i < gip->subdomains; i++) { if (gip->commindex[i] < gip->commindex[i+1]) { cnt = 0; for (j = gip->commindex[i]; j < gip->commindex[i+1]; j++) { gip->sendbuf[cnt++] = v[gip->comm[j]][0]; gip->sendbuf[cnt++] = v[gip->comm[j]][1]; gip->sendbuf[cnt++] = v[gip->comm[j]][2]; } MPI_Send(gip->sendbuf, cnt, MPI_DOUBLE, i, 0, gip->ccomm); } } MPI_Waitall(gip->friends, gip->requestvec, gip->statusvec); cnt = 0; for (i = 0; i < gip->commlen; i++) { v[gip->comm[i]][0] += gip->recvbuf[cnt++]; v[gip->comm[i]][1] += gip->recvbuf[cnt++]; v[gip->comm[i]][2] += gip->recvbuf[cnt++]; } MPI_Barrier(gip->ccomm);}void init(int argc, char **argv, struct gi *gip) { char *sp; int ibuf[6]; int crange[1][3]; int rrank, crank; struct stat st; /* * Initialize MPI. */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &gip->rank); MPI_Comm_size(MPI_COMM_WORLD, &(gip->rsize)); MPI_Comm_group(MPI_COMM_WORLD, &(gip->rgroup)); /* * There is no need to see the entire path name, the base will do. */ for (sp = argv[0]+strlen(argv[0]); (sp != argv[0]) && *sp != '/'; sp--) ; if (*sp == '/') strcpy(gip->progname, sp+1); else strcpy(gip->progname, sp); parsecommandline(argc, argv, gip); /* * Do some consistency checks on process 0 and let everyone * else know how many subdomains to expect. */ if (gip->rank == 0) { if (stat(gip->packfilename, &st) < 0) { fprintf(stderr, "%s: Can't find %s \n", gip->progname, gip->packfilename); bail(gip); } if (!(gip->packfile = fopen(gip->packfilename, "r"))) { fprintf(stderr, "%s: Can't open %s\n", gip->progname, gip->packfilename);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -