📄 hmv.c

📁 卡内基梅隆大学开发的并行计算程序
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/***************************************************************************//* hmv.c -  hybrid SMVP kernel: a shared memory program in a time- and     *//*             space-efficient message passing style                       *//*                                                                         *//* Spark98 Kernels                                                         *//* Copyright (c) David O'Hallaron 1998                                     *//*                                                                         *//* Compilation options (you must define one of these two variables:        *//*                                                                         *//* Variable     Binary   Description                                       *//* SGI          hmv      uses SGI thread primitives                        *//* PTHREAD      hmv      uses Pthread thread primitives                    *//*                                                                         *//* You are free to use this software without restriction. If you find that *//* the suite is helpful to you, it would be very helpful if you sent me a  *//* note at droh@cs.cmu.edu letting me know how you are using it.           *//***************************************************************************/ #include <stdio.h>#include <stdlib.h>#include <sys/time.h>#if defined(PTHREAD)#include <pthread.h>#elif defined(SGI)#include <sys/types.h>#include <sys/prctl.h>#include <ulocks.h>#endif/*  * program wide constants  */#define DEFAULT_ITERS 20  /* default number of SMVP operations */#define STRLEN 128        /* default string length */#define DOF 3             /* degrees of freedom in underlying simulation */#define SEND_CMD 10       /* used by i/o master to ask for data from slaves */#define INVALID -9999     /* marks uninitialized data *//* * global variables */struct gi {   /* misc */  char progname[STRLEN];     /* program name */  FILE *packfile;            /* file descriptor for pack file */  /* command line options */  int quiet;                 /* run quietly unless there are errors (-Q) */  int iters;                 /* number of times to perform SMVP (-i<d>) */  int output;                /* print the result vector? (-o) */  char packfilename[STRLEN]; /* input packfile name */  /* problem parameters */  int globalnodes;           /* number of global nodes */  int globalelems;           /* number of global elements */  int mesh_dim;              /* mesh dimension (3) */  int corners;               /* nodes per element (4) */  int subdomains;            /* number of partition sets (and threads)*/   int processors;            /* not used */  int *nodes;                /* number of local nodes on each PE */  int *mine;                 /* number of nodes that each PE owns */  int *priv;                 /* number of unshared nodes per PE priv <= mine <= nodes */    int *elems;                /* number of elements on each PE */  int *matrixlen;            /* number of nonzeros on each PE */  int *friends;              /* number of PEs each PE communicates with */  int *commlen;              /* number of communicated nodes for each PE */  int maxcommlen;            /* max number of nodes shared by an PE */  /*    * global data structures    */  double (**coord)[DOF];     /* geometric node coordinates */  int (**vertex)[4];         /* nodes associated with each element */  int **globalnode;          /* local-to-global node mapping function */  int **globalelem;          /* local-to-global element mapping function */  /* sparse matrix in compressed sparse row format */  int **matrixcol;           /* K[j] on PE i is in column matrixcol[i][j] */  int **matrixindex;         /* row j on PE i starts at K[matrixindex[i][j]] */  /* communication schedule */  int **comm;                /* jth local node to send from PE i is comm[i][j] */  int **commindex;           /* subdomain j on PE i starts at comm[i][commindex[j]] */  /* tmp buffer for communication phase */  double (**recvbuf)[DOF];} gi, *gip=&gi;/* timing variables */double secs, csecs;/* thread ids */int *ids;#if defined(PTHREAD)/* * Pthread-specific thread variables */pthread_mutex_t barriermutexhandle;pthread_mutex_t vectormutexhandle;pthread_cond_t barriercondhandle;int barriercnt = 0;#elif defined(SGI)/*  * SGI-specific thread variables */char arenaname[STRLEN]= "/dev/zero";usptr_t *arenahandle;barrier_t *barrierhandle;ulock_t *lockhandle;#endif/* * SMVP data structures *//* w1 = K1 * v1 */double (**K1)[DOF][DOF];     /* sparse matrix */double (**v1)[DOF];          /* dense vector */double (**w1)[DOF];          /* dense vector *//* w2 = K2 * v2 */double (**K2)[DOF][DOF];     /* sparse matrix */double (**v2)[DOF];          /* dense vector */double (**w2)[DOF];          /* dense vector *//* end globals *//*  * function prototypes  *//* initialization and exit routines */void init(int argc, char **argv, struct gi *gip);void parsecommandline(int argc, char **argv, struct gi *gip);void readpackfile(struct gi *gip); void bail(struct gi *gip);void finalize(struct gi *gip);/* routines that read and parse the packfile */void load_pack(struct gi *gip);void load_global(struct gi *gip);void load_nodes(struct gi *gip);void load_elems(struct gi *gip);void load_matrix(struct gi *gip);void load_comm(struct gi *gip);/* routine that assembles the sparse matrix and initial vector */void assemble_matrix(double (**K)[DOF][DOF], struct gi *gip);void assemble_vector(double (**v)[DOF], struct gi *gip);/* sparse matrix vector multiply routines */void *smvpthread(void *);void zero_vector(double (*v)[DOF], int firstrow, int numrows);void local_smvp(int nodes, double (*A)[DOF][DOF], int *Acol, int *Aindex, 		double (*v)[DOF], double (*w)[DOF], int firstrow, int numrows);void full_assemble(double (*v)[DOF], int id, struct gi *gip);/* system dependent timer routines */void init_etime(void);double get_etime(void);/* system dependent thread routines */void spark_init_threads(void);void spark_start_threads(int n);void spark_barrier(void);void spark_setlock(void);void spark_unsetlock(void);/* output routines */void printnodevector(double (**v)[DOF], int n, struct gi *gip);void par_printmatrix3(double (*A)[DOF][DOF], int id,struct gi *gip);void par_printvec3(double (**v)[DOF], int n, int id, struct gi *gip);void seq_printvec3(double (**v)[DOF], int id, struct gi *gip);void seq_prglobal(struct gi *gip);void seq_prnodes(struct gi *gip);void seq_prelems(struct gi *gip);void seq_prmatrix(struct gi *gip);void seq_prcomm(struct gi *gip);void seq_prcommvals(struct gi *gip);/* misc routines */void info(struct gi *gip);void usage(struct gi *gip);/* * main program  */void main(int argc, char **argv) {  int i, j;  double mflops, global_mflops;  int minnonzeros, maxnonzeros;  init(argc, argv, gip);  /*    * allocate storage for the local matrices and vectors   */  if (!(K1 = (void *)malloc((gip->subdomains*sizeof(double *))))) {    fprintf(stderr, "%s: couldn't malloc K1\n", gip->progname);    bail(gip);  }  if (!(K2 = (void *)malloc((gip->subdomains*sizeof(double *))))) {    fprintf(stderr, "%s: couldn't malloc K2\n", gip->progname);    bail(gip);  }  if (!(v1 = (void *)malloc((gip->subdomains*sizeof(double *))))) {    fprintf(stderr, "%s: couldn't malloc v1\n", gip->progname);    bail(gip);  }  if (!(v2 = (void *)malloc((gip->subdomains*sizeof(double *))))) {    fprintf(stderr, "%s: couldn't malloc v2\n", gip->progname);    bail(gip);  }  if (!(w1 = (void *)malloc((gip->subdomains*sizeof(double *))))) {    fprintf(stderr, "%s: couldn't malloc w1\n", gip->progname);    bail(gip);  }  if (!(w2 = (void *)malloc((gip->subdomains*sizeof(double *))))) {    fprintf(stderr, "%s: couldn't malloc w2\n", gip->progname);    bail(gip);  }  for (i=0; i<gip->subdomains; i++) {    if (!(K1[i] = (void *)malloc((gip->matrixlen[i]+1) * DOF * DOF * sizeof(double)))) {      fprintf(stderr, "%s: couldn't malloc K1[i](%d)\n", gip->progname);      bail(gip);    }    if (!(K2[i] = (void *)malloc((gip->matrixlen[i]+1) * DOF * DOF * sizeof(double)))) {      fprintf(stderr, "%s: couldn't malloc K2[i]\n", gip->progname);      bail(gip);    }    if (!(v1[i] = (void *)malloc(gip->nodes[i] * DOF * sizeof(double)))) {      fprintf(stderr, "%s: couldn't malloc v1\n", gip->progname);      bail(gip);    }    if (!(v2[i] = (void *)malloc(gip->nodes[i] * DOF * sizeof(double)))) {      fprintf(stderr, "%s: couldn't malloc v2\n", gip->progname);      bail(gip);    }    if (!(w1[i] = (void *)malloc(gip->nodes[i] * DOF * sizeof(double)))) {      fprintf(stderr, "%s: couldn't malloc w1\n", gip->progname);      bail(gip);    }    if (!(w2[i] = (void *)malloc(gip->nodes[i] * DOF * sizeof(double)))) {      fprintf(stderr, "%s: couldn't malloc w2\n", gip->progname);      bail(gip);    }  }  /*    * generate the sparse matrix coefficients    */  if (!gip->quiet)     fprintf(stderr, "%s: Computing sparse matrix coefficients.", 	    gip->progname);  assemble_matrix(K1, gip);  assemble_matrix(K2, gip);  assemble_vector(v1, gip);  assemble_vector(v2, gip);  if (!gip->quiet) {    fprintf(stderr, " Done.\n");    fflush(stderr);  }  /*    * start the workers   */  spark_start_threads(gip->subdomains-1);  /*   * start the master    */  if (!gip->quiet) {    fprintf(stderr, "%s: Performing %d SMVP pairs with %d threads.", 	    gip->progname, gip->iters, gip->subdomains);  }  ids[0] = 0;  smvpthread(&ids[0]);  if (!gip->quiet) {    fprintf(stderr, " Done.\n");    fflush(stderr);  }  if (secs == 0.0) {    fprintf(stderr, "error: no measured elapsed time. Use more iterations (e.g., -i%d)\n",	    gip->iters*10);    bail(gip);  }  /*    * Summarize performance    */  mflops = 0.0;  for (i=0; i<gip->subdomains; i++) {    mflops +=       (double)((2.0*gip->matrixlen[i] - gip->nodes[i])  /* nonzero blocks */	       *DOF*DOF                                 /* DOF^2 numbers/block */	       *2.0)                                    /* 2 flops/block */      / 1000000.0;  }      /* find the minimum and maximum load on each subdomain */   minnonzeros = 1<<20;  maxnonzeros = -1;  for (i=0; i<gip->subdomains; i++) {    if (gip->matrixlen[i] < minnonzeros)       minnonzeros = gip->matrixlen[i];    if (gip->matrixlen[i] > maxnonzeros)       maxnonzeros = gip->matrixlen[i];  }    fprintf(stderr, 	    "%s: %s %.6f Mf %.6f s [%.6f/%.6f/%.0f%%] %.1f Mf/s [%d/%d/%.0f%%]\n",	    gip->progname, gip->packfilename, mflops, secs, 	    secs - csecs, csecs, 	    ((secs-csecs)/secs)*100.0, 	    mflops/secs,	    minnonzeros, maxnonzeros, 	    (double)((double)minnonzeros/(double)maxnonzeros)*100.0);    /*    * print results if asked for   */  if (gip->output) {    printnodevector(w1, gip->globalnodes, gip);  }  /*   * Clean up   */  if (!gip->quiet) {    fprintf(stderr, "%s: Done.\n", gip->progname);  }  finalize(gip);}/* * smvpthread - thread that performs a sequence of SMVP pairs  */void *smvpthread(void *a) {  int i, j;  double mycsecs, mystarttime, mystartctime;  int *argp = (int *)a;  int id = *argp;  spark_barrier();  mycsecs = 0.0;  mystarttime = get_etime();  for (i=0; i<gip->iters; i++) {    /* w1 = K1 * v1 */    zero_vector(w1[id], 0, gip->nodes[id]);    local_smvp(gip->nodes[id], K1[id], gip->matrixcol[id], 	       gip->matrixindex[id], v1[id], w1[id], 0, gip->nodes[id]);    mystartctime = get_etime();    full_assemble(w1[id], id, gip);    mycsecs += (get_etime() - mystartctime);        /* w2 = K2 * v2 */    zero_vector(w2[id], 0, gip->nodes[id]);    local_smvp(gip->nodes[id], K2[id], gip->matrixcol[id], 	       gip->matrixindex[id], v2[id], w2[id], 0, gip->nodes[id]);    mystartctime = get_etime();    full_assemble(w2[id], id, gip);    mycsecs += (get_etime() - mystartctime);  }  if (id == 0) {    secs = (get_etime() - mystarttime)/(gip->iters*2.0);    csecs = mycsecs / (gip->iters*2.0);  }  return NULL;}/* * assemble_matrix - assemble the local sparse matrix */void assemble_matrix(double (**K)[DOF][DOF], struct gi *gip) {  int i, j, k, l, m, s;  int temp1, elem;  for (s=0; s<gip->subdomains; s++) {    for (i = 0; i < gip->matrixlen[s]; i++) {      for (j = 0; j < DOF; j++) {	for (k = 0; k < DOF; k++) {	  K[s][i][j][k] = 0.0;	}      }    }  }  /*    * add the contribution of each element to K   */  for (s=0; s<gip->subdomains; s++) {    for (elem = 0; elem < gip->elems[s]; elem++) {      for (j = 0; j < gip->corners; j++) {	for (k = 0; k < gip->corners; k++) {	  if (gip->vertex[s][elem][j] < gip->vertex[s][elem][k]) {	    temp1 = gip->matrixindex[s][gip->vertex[s][elem][j]];	    while (gip->matrixcol[s][temp1] != gip->vertex[s][elem][k]) {	      temp1++;	      if (temp1 >= gip->matrixindex[s][gip->vertex[s][elem][k] + 1]) {		fprintf(stderr, "%s: K indexing error in assemble %d %d\n", 			gip->progname, temp1, gip->vertex[s][elem][k]);		bail(gip);	      }	    }	    for (l = 0; l < 3; l++) {	      for (m = 0; m < 3; m++) {		K[s][temp1][l][m] += 1.0;	      }	    }	  }	}      }    }  } #ifdef DEBUGASSEMBLE  seq_printmatrix3(K[i], gip);#endif}/* * assemble_vector - assemble the local v vector */void assemble_vector(double (**v)[DOF], struct gi *gip) {  int i, j, s;  for (s=0; s<gip->subdomains; s++) {    for (i = 0; i < gip->nodes[s]; i++) {      for (j = 0; j < DOF; j++) {	v[s][i][j] = 1.0;      }    }   }#ifdef DEBUGASSEMBLE  seq_printvec3(v, gip);#endif}/* * zero_vector - clear a portion of a vector  */void zero_vector(double (*v)[DOF], int firstrow, int numrows) {  int i;  for (i=firstrow; i<(firstrow+numrows); i++) {    v[i][0] = 0.0;    v[i][1] = 0.0;    v[i][2] = 0.0;  }}/* * local_smvp - local sparse matrix vector product w = K*v
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -