📄 mmv.c

📁 卡内基梅隆大学开发的并行计算程序
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/***************************************************************************//* mmv.c - parallel MPI message passing SMVP kernel                        *//*                                                                         *//* Spark98 Kernels                                                         *//* Copyright (C) David O'Hallaron 1998                                     *//*                                                                         *//* You are free to use this software without restriction. If you find that *//* the suite is helpful to you, it would be very helpful if you sent me a  *//* note at droh@cs.cmu.edu letting me know how you are using it.           *//***************************************************************************/ #include <stdio.h>#include <stdlib.h>#include <sys/time.h>#include <sys/stat.h>#include <mpi.h>/*  * program wide constants  */#define DEFAULT_ITERS 20  /* default number of SMVP operations */#define STRLEN 128        /* default string length */#define DOF 3             /* degrees of freedom in underlying simulation */#define SEND_CMD 10       /* used by i/o master to ask for data from slaves */#define INVALID -9999     /* marks uninitialized data *//* * global variables */struct gi {   /* misc */  char progname[STRLEN];     /* program name */  FILE *packfile;            /* file descriptor for pack file */  /* command line options */  int quiet;                 /* run quietly unless there are errors (-Q) */  int iters;                 /* number of times to perform SMVP (-i<d>) */  int output;                /* print the result vector? (-o) */  char packfilename[STRLEN]; /* input packfile name */  /* problem parameters */  int globalnodes;           /* number of global nodes */  int globalelems;           /* number of global elements */  int mesh_dim;              /* mesh dimension (3) */  int corners;               /* nodes per element (4) */  int subdomains;            /* number of partition sets */   int processors;            /* not used */  int nodes;                 /* total number of local nodes on this PE */  int mine;                  /* nodes this subdomain owns */  int priv;                  /* unshared nodes priv <= mine <= nodes */    int *nodeall;              /* total nodes on each PE */  int maxnodes;              /* max nodes on any PE */  int elems;                 /* number of local elements */  int matrixlen;             /* number of local nonzero block entries */  int friends;               /* number of PEs I communicate with */  int commlen;               /* total communicated nodes on this PE */  int maxcommnodes;          /* max nodes communicated by any PE */  /*    * global data structures    */  double (*coord)[DOF];        /* geometric node coordinates */  int (*vertex)[4];          /* nodes associated with each element */  int *globalnode;           /* local-to-global node mapping function */  int *globalelem;           /* local-to-global element mapping function */  /* sparse matrix in compressed sparse row format */  int *matrixcol;            /* K[i] is in column matrixcol[i] */  int *matrixindex;          /* row i starts at K[matrixindex[i]] */  /* communication schedule */  int *comm;                 /* local node number to communicate */  int *commindex;            /* subdomain i starts at comm[commindex[i]] */  /* status and request vectors for communication phase */  MPI_Status *statusvec;      MPI_Request *requestvec;  /* send and receive buffers for communication phase */  double *sendbuf;  double *recvbuf;    /*    * the following are determined by MPI calls    * there are two groups: rgroup (runtime group) and and cgroup    * (compute subgroup). cgroup is a subset of rgroup. We need two   * groups because the number of subdomains might be different than   * the number of PE's, e.g., T3D programs must run on a power of    * two PE's.    */    /* runtime group */  MPI_Group rgroup;  /* runtime group 0, ..., rsize-1 */   int rsize;         /* total number of processors in runtime group */    /* compute subgroup */  MPI_Group cgroup; /* compute subgroup 0, .., asize-2 */  MPI_Comm ccomm;   /* compute subgroup communicator */  int csize;        /* size of compute subgroup */    /* my rank and the rank of the io processor */  int rank;         /* my rank */  int ioproc;       /* rank of the i/o proc */} gi, *gip=&gi;/* timing variables */double starttime, startctime;double secs, csecs;/* w1 = K1 * v1 */double (*K1)[DOF][DOF];     /* sparse matrix */double (*v1)[DOF];          /* dense vector */double (*w1)[DOF];          /* dense vector *//* w2 = K2 * v2 */double (*K2)[DOF][DOF];     /* sparse matrix */double (*v2)[DOF];          /* dense vector */double (*w2)[DOF];          /* dense vector *//* end globals *//*  * function prototypes  *//* initialization and exit routines */void init(int argc, char **argv, struct gi *gip);void parsecommandline(int argc, char **argv, struct gi *gip);void readpackfile(struct gi *gip); void bail(struct gi *gip);void finalize(struct gi *gip);/* routines that read and parse the packfile */void load_pack(struct gi *gip);void load_global_master(struct gi *gip);void load_global_slave(struct gi *gip);void load_nodes_master(struct gi *gip);void load_nodes_slave(struct gi *gip);void load_elems_master(struct gi *gip);void load_elems_slave(struct gi *gip);void load_matrix_master(struct gi *gip);void load_matrix_slave(struct gi *gip);void load_comm_master(struct gi *gip);void load_comm_slave(struct gi *gip);/* routine that assembles the sparse matrix and initial vector */void assemble_matrix(double (*K)[DOF][DOF], struct gi *gip);void assemble_vector(double (*v)[DOF], struct gi *gip);/* sparse matrix vector multiply routines */void smvpthread(struct gi *gip);void zero_vector(double (*v)[DOF], int firstrow, int numrows);void local_smvp(int nodes, double (*A)[DOF][DOF], int *Acol, int *Aindex, 		double (*v)[DOF], double (*w)[DOF], int firstrow, int numrows);void full_assemble(double (*v)[DOF], struct gi *gip);/* misc routines */void info(struct gi *gip);void usage(struct gi *gip);void prglobal(struct gi *gip);void prnodes(struct gi *gip);void prelems(struct gi *gip);void prmatrix(struct gi *gip);void prcomm(struct gi *gip);void local_printmatrix3(double (*A)[DOF][DOF], struct gi *gip);void local_printvec3(double (*v)[DOF], int n, struct gi *gip);void printnodevector(double (*v)[DOF], int n, struct gi *gip);/* * main program  */void main(int argc, char **argv) {  int i, j;  double mflops, global_mflops;  double max_secs, max_csecs;  int maxnonzeros, minnonzeros;  init(argc, argv, gip);  /*    * allocate contiguous storage for the matrix and vectors    */  if (!(K1 = (void *)malloc((gip->matrixlen+1)*DOF*DOF*sizeof(double)))) {    fprintf(stderr, "%s: couldn't malloc K1(%d)\n", 	    gip->progname, gip->matrixlen);    bail(gip);  }  if (!(K2 = (void *)malloc((gip->matrixlen+1)*DOF*DOF*sizeof(double)))) {    fprintf(stderr, "%s: couldn't malloc K2(%d)\n", 	    gip->progname, gip->matrixlen);    bail(gip);  }  if (!(v1 = (void *)malloc(gip->nodes * DOF * sizeof(double)))) {    fprintf(stderr, "%s: couldn't malloc v1(%d)\n", gip->progname, gip->nodes);    bail(gip);  }  if (!(v2 = (void *)malloc(gip->nodes * DOF * sizeof(double)))) {    fprintf(stderr, "%s: couldn't malloc v2(%d)\n", gip->progname, gip->nodes);    bail(gip);  }  if (!(w1 = (void *)malloc(gip->nodes * DOF * sizeof(double)))) {    fprintf(stderr, "%s: couldn't malloc w1(%d)\n", gip->progname, gip->nodes);    bail(gip);  }  if (!(w2 = (void *)malloc(gip->nodes * DOF * sizeof(double)))) {    fprintf(stderr, "%s: couldn't malloc w2(%d)\n", gip->progname, gip->nodes);    bail(gip);  }  /*    * generate the sparse matrix coefficients    */  MPI_Barrier(gip->ccomm);  if (gip->rank == 0 && !gip->quiet)     fprintf(stderr, "%s: Computing sparse matrix coefficients.", 	    gip->progname);  assemble_matrix(K1, gip);  assemble_matrix(K2, gip);  assemble_vector(v1, gip);  assemble_vector(v2, gip);  if (!gip->quiet && gip->rank == 0) {    fprintf(stderr, " Done.\n");    fflush(stderr);  }    /*    * Do the actual SMVP operation    */  if (gip->rank == 0 && !gip->quiet) {    fprintf(stderr, "%s: Performing %d SMVP pairs (n=%d) on %d PEs.", 	    gip->progname, gip->iters, gip->globalnodes, gip->subdomains);  }  smvpthread(gip);  if (!gip->quiet && gip->rank == 0) {    fprintf(stderr, " Done.\n");    fflush(stderr);  }  if (secs == 0.0) {    fprintf(stderr, "error: no measured elapsed time. Use more iterations (e.g., -i%d)\n",	    gip->iters*10);    bail(gip);  }  /*    * Summarize performance    */  MPI_Barrier(gip->ccomm);  mflops =     (double)((2.0*gip->matrixlen - gip->nodes)  /* nonzero blocks */	     *DOF*DOF                           /* DOF^2 numbers/block */	     *2.0)                              /* 2 flops/block */	     / 1000000.0;  /* get the total number of mflops */  MPI_Reduce(&mflops, &global_mflops, 1, MPI_DOUBLE, MPI_SUM, 0, gip->ccomm);  MPI_Bcast(&global_mflops, 1, MPI_DOUBLE, 0, gip->ccomm);	  /* get the longest execution time */  MPI_Reduce(&secs, &max_secs, 1, MPI_DOUBLE, MPI_MAX, 0, gip->ccomm);  MPI_Bcast(&max_secs, 1, MPI_DOUBLE, 0, gip->ccomm);	  MPI_Reduce(&csecs, &max_csecs, 1, MPI_DOUBLE, MPI_MAX, 0, gip->ccomm);  MPI_Bcast(&max_csecs, 1, MPI_DOUBLE, 0, gip->ccomm);	  /* get the maximum and minimum loads */  MPI_Reduce(&gip->matrixlen, &maxnonzeros, 1, MPI_INT, MPI_MAX, 0, gip->ccomm);  MPI_Bcast(&maxnonzeros, 1, MPI_INT, 0, gip->ccomm);	  MPI_Reduce(&gip->matrixlen, &minnonzeros, 1, MPI_INT, MPI_MIN, 0, gip->ccomm);  MPI_Bcast(&minnonzeros, 1, MPI_INT, 0, gip->ccomm);	    if (gip->rank == 0) {    fprintf(stderr, 	    "%s: %s %.6lf Mf %.6lf s [%.6lf/%.6lf/%.0lf%%] %.1lf Mf/s [%d/%d/%.0lf%%]\n",	    gip->progname, gip->packfilename, global_mflops, max_secs, 	    max_secs - max_csecs, max_csecs, 	    ((max_secs-max_csecs)/max_secs)*100.0, 	    global_mflops/max_secs,	    minnonzeros, maxnonzeros, 	    (double)((double)minnonzeros/(double)maxnonzeros)*100.0);  }  /*    * print results if asked for   */  if (gip->output) {    printnodevector(w1, gip->globalnodes, gip);  }  /*   * Clean up   */  if (gip->rank == 0 && !gip->quiet) {    fprintf(stderr, "%s: Done.\n", gip->progname);  }  finalize(gip);}void smvpthread(struct gi *gip) {  int i;  MPI_Barrier(gip->ccomm);  csecs = 0.0;  starttime = MPI_Wtime();  for (i=0; i<gip->iters; i++) {    /* w1 = K1 * v1 */    zero_vector(w1, 0, gip->nodes);    local_smvp(gip->nodes, K1, gip->matrixcol, gip->matrixindex, v1, w1, 0, gip->nodes);    startctime = MPI_Wtime();    full_assemble(w1, gip);    csecs += (MPI_Wtime() - startctime);        /* w2 = K2 * v2 */    zero_vector(w2, 0, gip->nodes);    local_smvp(gip->nodes, K2, gip->matrixcol, gip->matrixindex, v2, w2, 0, gip->nodes);    startctime = MPI_Wtime();    full_assemble(w2, gip);    csecs += (MPI_Wtime() - startctime);  }  secs = (MPI_Wtime() - starttime)/(gip->iters*2.0);  csecs = csecs / (gip->iters*2.0);}/* * assemble_matrix - assemble the local sparse matrix */void assemble_matrix(double (*K)[DOF][DOF], struct gi *gip) {  int i, j, k, l, m;  int temp1, elem;  for (i = 0; i < gip->matrixlen; i++) {    for (j = 0; j < DOF; j++) {      for (k = 0; k < DOF; k++) {	K[i][j][k] = 0.0;      }    }  }  /*    * add the contribution of each element to K   */  for (elem = 0; elem < gip->elems; elem++) {    for (j = 0; j < gip->corners; j++) {      for (k = 0; k < gip->corners; k++) {	if (gip->vertex[elem][j] < gip->vertex[elem][k]) {	  temp1 = gip->matrixindex[gip->vertex[elem][j]];	  while (gip->matrixcol[temp1] != gip->vertex[elem][k]) {	    temp1++;	    if (temp1 >= gip->matrixindex[gip->vertex[elem][k] + 1]) {	      fprintf(stderr, "%s: K indexing error in assemble %d %d\n", 		     gip->progname, temp1, gip->vertex[elem][k]);	      bail(gip);	    }	  }	  for (l = 0; l < 3; l++) {	    for (m = 0; m < 3; m++) {	      K[temp1][l][m]++;	    }	  }	}      }    }  } #ifdef DEBUGASSEMBLE  local_printmatrix3(K, gip);#endif}/* * assemble_vector - assemble the local v vector */void assemble_vector(double (*v)[DOF], struct gi *gip) {  int i, j;  for (i = 0; i < gip->nodes; i++) {    for (j = 0; j < DOF; j++) {      v[i][j] = 1.0;    }  } #ifdef DEBUGASSEMBLE  local_printvec3(v, gip->nodes, gip);#endif}/* * zero_vector - clear a portion of a vector  */void zero_vector(double (*v)[DOF], int firstrow, int numrows) {  int i;  for (i=firstrow; i<(firstrow+numrows); i++) {    v[i][0] = 0.0;    v[i][1] = 0.0;    v[i][2] = 0.0;  }}/* * local_smvp - local sparse matrix vector product w = K*v *    v and w are vectors of elements of size n x DOF. *    smvp is computed starting at row firstrow (zero based) *    and for numrows rows. *  *    K is a block symmetric sparse matrix which is stored *    in compressed sparse row format as three vectors: *      - A is a coefficient vector of elements of size DOFxDOF. Only  *        the upper right triangle is stored. *      - Acol[i] is the column number of the coefficient in K[i]. *      - Row i consists of elements A[Acol[i]]...A[Acol[i+1]-1]. */void local_smvp(int nodes, double (*A)[DOF][DOF], int *Acol, 		int *Aindex, double (*v)[DOF], double (*w)[DOF],		int firstrow, int numrows) {  int i;  int Anext, Alast, col;  double sum0, sum1, sum2;    for (i = firstrow; i < (firstrow + numrows); i++) {    Anext = Aindex[i];    Alast = Aindex[i + 1];    sum0 = A[Anext][0][0]*v[i][0] + A[Anext][0][1]*v[i][1] + A[Anext][0][2]*v[i][2];    sum1 = A[Anext][1][0]*v[i][0] + A[Anext][1][1]*v[i][1] + A[Anext][1][2]*v[i][2];    sum2 = A[Anext][2][0]*v[i][0] + A[Anext][2][1]*v[i][1] + A[Anext][2][2]*v[i][2];    Anext++;    while (Anext < Alast) {      col = Acol[Anext];      sum0 += A[Anext][0][0]*v[col][0] + A[Anext][0][1]*v[col][1] + A[Anext][0][2]*v[col][2];      sum1 += A[Anext][1][0]*v[col][0] + A[Anext][1][1]*v[col][1] + A[Anext][1][2]*v[col][2];      sum2 += A[Anext][2][0]*v[col][0] + A[Anext][2][1]*v[col][1] + A[Anext][2][2]*v[col][2];            w[col][0] += A[Anext][0][0]*v[i][0] + A[Anext][1][0]*v[i][1] + A[Anext][2][0]*v[i][2];      w[col][1] += A[Anext][0][1]*v[i][0] + A[Anext][1][1]*v[i][1] + A[Anext][2][1]*v[i][2];      w[col][2] += A[Anext][0][2]*v[i][0] + A[Anext][1][2]*v[i][1] + A[Anext][2][2]*v[i][2];      Anext++;    }    w[i][0] += sum0;    w[i][1] += sum1;    w[i][2] += sum2;  }}/* * full_assemble - assemble a distributed vector */void full_assemble(double (*v)[DOF], struct gi *gip) {  int i, j, friend, cnt;    MPI_Barrier(gip->ccomm);    friend = 0;  for (i = 0; i < gip->subdomains; i++) {    if (gip->commindex[i] < gip->commindex[i+1]) {      MPI_Irecv(&gip->recvbuf[gip->commindex[i]*3], 		(gip->commindex[i+1]*3) - (gip->commindex[i]*3),		MPI_DOUBLE, i, MPI_ANY_TAG, gip->ccomm,		&gip->requestvec[friend]);      friend++;    }  }    for (i = 0; i < gip->subdomains; i++) {    if (gip->commindex[i] < gip->commindex[i+1]) {      cnt = 0;      for (j = gip->commindex[i]; j < gip->commindex[i+1]; j++) {	gip->sendbuf[cnt++] = v[gip->comm[j]][0];	gip->sendbuf[cnt++] = v[gip->comm[j]][1];	gip->sendbuf[cnt++] = v[gip->comm[j]][2];      }      MPI_Send(gip->sendbuf, cnt, MPI_DOUBLE, i, 0, gip->ccomm);    }  }    MPI_Waitall(gip->friends, gip->requestvec, gip->statusvec);    cnt = 0;  for (i = 0; i < gip->commlen; i++) {    v[gip->comm[i]][0] += gip->recvbuf[cnt++];    v[gip->comm[i]][1] += gip->recvbuf[cnt++];    v[gip->comm[i]][2] += gip->recvbuf[cnt++];  }  MPI_Barrier(gip->ccomm);}void init(int argc, char **argv, struct gi *gip) {  char *sp;      int ibuf[6];  int crange[1][3];     int rrank, crank;  struct stat st;  /*    * Initialize MPI.   */  MPI_Init(&argc, &argv);  MPI_Comm_rank(MPI_COMM_WORLD, &gip->rank);  MPI_Comm_size(MPI_COMM_WORLD, &(gip->rsize));  MPI_Comm_group(MPI_COMM_WORLD, &(gip->rgroup));    /*    * There is no need to see the entire path name, the base will do.    */   for (sp = argv[0]+strlen(argv[0]); (sp != argv[0]) && *sp != '/'; sp--)    ;  if (*sp == '/')    strcpy(gip->progname, sp+1);  else        strcpy(gip->progname, sp);  parsecommandline(argc, argv, gip);  /*   * Do some consistency checks on process 0 and let everyone    * else know how many subdomains to expect.   */  if (gip->rank == 0) {     if (stat(gip->packfilename, &st) < 0) {      fprintf(stderr, "%s: Can't find %s \n", 	      gip->progname, gip->packfilename);      bail(gip);    }    if (!(gip->packfile = fopen(gip->packfilename, "r"))) {      fprintf(stderr, "%s: Can't open %s\n", gip->progname, gip->packfilename);
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -