📄 mv.c

📁 卡内基梅隆大学开发的并行计算程序
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/***************************************************************************//* mv.c - sequential and parallel shared memory SMVP kernel                *//*                                                                         *//* Spark98 Kernels                                                         */  /* Copyright (C) David O'Hallaron 1998                                     *//*                                                                         *//* Compilation options: (you must define zero or one of these)             *//*                                                                         *//* Variable       Binary   Description                                     *//* none           smv      baseline sequential SMVP                        *//* PTHREAD_LOCK   lmv      locks/pthreads                                  *//* PTHREAD_REDUCE rmv      reductions instead of locks/pthreads            *//* SGI_LOCK       lmv      locks/SGI threads                               *//* SGI_REDUCE     rmv      reductions instead of locks/SGI threads         *//*                                                                         *//* You are free to use this software without restriction. If you find that *//* the suite is helpful to you, it would be very helpful if you sent me a  *//* note at droh@cs.cmu.edu letting me know how you are using it.           *//***************************************************************************/ #include <stdio.h>#include <stdlib.h>#include <sys/time.h>#if (defined(PTHREAD_LOCK) || defined(PTHREAD_REDUCE))#define PTHREAD#elif (defined(SGI_LOCK) || defined(SGI_REDUCE))#define SGI#endif#if defined(PTHREAD)#include <pthread.h>#elif defined(SGI)#include <sys/types.h>#include <sys/prctl.h>#include <ulocks.h>#endif#if (defined(SGI_LOCK) || defined(PTHREAD_LOCK))#define LOCK#else#undef LOCK#endif#if (defined(SGI_REDUCE) || defined(PTHREAD_REDUCE))#define REDUCE#else#undef REDUCE#endif/*  * program wide constants  */#define DEFAULT_ITERS 20   /* default number of SMVP operations */#define STRLEN 128         /* default string length */#define DOF 3              /* degrees of freedom in underlying simulation */struct gi {  char *progname;              /* program name */  /* command line options */  int quiet;                 /* run quietly unless there are errors (-Q) */  int iters;                 /* number of times to perform SMVP (-i<n>) */  int output;                /* print the result vector? (-o) */  int threads;               /* number of threads (-t<n>) */  int locks;                 /* number of locks (-l<n>) */  char packfilename[STRLEN]; /* input packfile name */  /* problem parameters */  int globalnodes;           /* global nodes (same as local nodes) */  int globalelems;           /* global elements (same as local elements) */  int mesh_dim;              /* mesh dimension (3) */  int corners;               /* nodes per element (4) */  int subdomains;            /* number of partition sets */   int processors;            /* not used */  int nodes;                 /* number of local nodes */  int elems;                 /* number of local elements */  int matrixlen;             /* number of local nonzero block entries */  int choleskylen;           /* not used */  int priv;                  /* not used */  int mine;                  /* not used */  /* global data structures */  double (*coord)[DOF];      /* geometric node coordinates */  int (*vertex)[4];          /* nodes associated with each element */  int *globalnode;           /* global to local node mapping function */  int *matrixcol;            /* K[i] is in column matrixcol[i] */  int *matrixindex;          /* row i starts at K[matrixindex[i]] */};/* * globals *//* w1 = K1 * x1 */double (*K1)[DOF][DOF];     /* sparse matrix  coefficients */double (*v1)[DOF];          /* dense vector */double (*w1)[DOF];          /* dense vector *//* w2 = K2 * x2 */double (*K2)[DOF][DOF];     /* sparse matrix  coefficients */double (*v2)[DOF];          /* dense vector */double (*w2)[DOF];          /* dense vector */#if defined(REDUCE)/* temporary vectors (one per thread) to hold *//* partially assembled  w vectors */double (**tmpw)[DOF];#endif/* global information about the simulation */struct gi gi, *gip = &gi;/* timing variables */double secs, csecs;/* partition */int *firstrow;/* thread ids */int *ids;/* * Pthread-specific thread variables */#if defined(PTHREAD)pthread_mutex_t barriermutexhandle;pthread_mutex_t *vectormutexhandle;pthread_cond_t barriercondhandle;int barriercnt = 0;/*  * SGI-specific thread variables */#elif defined(SGI)char arenaname[STRLEN]= "/dev/zero";usptr_t *arenahandle;barrier_t *barrierhandle;ulock_t *lockhandle; /* array of locks */#endif/* * end globals *//*  * function prototypes  *//* code executed by each thread */void *smvpthread(void *a);/* initialization routines */void init(int argc, char **argv, struct gi *gip);void parsecommandline(int argc, char **argv, struct gi *gip);void readpackfile(struct gi *gip); /* routines that assemble the sparse matrix */void assemble_matrix(double (*K)[DOF][DOF], struct gi *gip);void assemble_vector(double (*v)[DOF], struct gi *gip);/* sparse matrix vector multiply */void zero_vector(double (*v)[DOF], int firstrow, int numrows);void local_smvp(int nodes, double (*A)[DOF][DOF], int *Acol, 		int *Aindex, double (*v)[DOF], double (*w)[DOF],		int firstrow, int numrows, int id, struct gi *gip);void add_vectors(double (**tmpw)[DOF], double (*w)[DOF], 		 int firstrow, int numrows, struct gi *gip);/* misc routines */void info(struct gi *gip);void usage(struct gi *gip);void printfvec3(double (*v)[DOF], int n);void printmatrix3(double (*A)[DOF][DOF], struct gi *gip);void printnodevector(double (*v)[DOF], int n);/* system-dependent timer routines */void init_etime(void);double get_etime(void);/* system dependent thread routines */#if (defined(LOCK) || defined(REDUCE))void spark_init_threads(void);void spark_start_threads(int n);void spark_barrier(void);void spark_setlock(int lockid);void spark_unsetlock(int lockid);#endif/* * main program  */void main(int argc, char **argv) {  int i;  int oldrow, newrow, limit, nonzeros, minnonzeros, maxnonzeros;  double mflops;  init(argc, argv, gip);  /*    * allocate contiguous storage for the matrix and vectors    */  if (!(K1 = (void *)malloc((gip->matrixlen+1)*DOF*DOF*sizeof(double)))) {    fprintf(stderr, "%s: couldn't malloc K1(%d)\n", gip->progname, 	    gip->matrixlen);    exit(0);  }  if (!(K2 = (void *)malloc((gip->matrixlen+1)*DOF*DOF*sizeof(double)))) {    fprintf(stderr, "%s: couldn't malloc K2(%d)\n", gip->progname, 	    gip->matrixlen);    exit(0);  }  if (!(v1 = (void *)malloc(gip->nodes * DOF * sizeof(double)))) {    fprintf(stderr, "%s: couldn't malloc v1(%d)\n", gip->progname, gip->nodes);    exit(0);  }  if (!(v2 = (void *)malloc(gip->nodes * DOF * sizeof(double)))) {    fprintf(stderr, "%s: couldn't malloc v2(%d)\n", gip->progname, gip->nodes);    exit(0);  }  if (!(w1 = (void *)malloc(gip->nodes * DOF * sizeof(double)))) {    fprintf(stderr, "%s: couldn't malloc w1(%d)\n", gip->progname, gip->nodes);    exit(0);  }  if (!(w2 = (void *)malloc(gip->nodes * DOF * sizeof(double)))) {    fprintf(stderr, "%s: couldn't malloc w2(%d)\n", gip->progname, gip->nodes);    exit(0);  }#if defined(REDUCE)  if (!(tmpw = (void *)malloc(gip->threads*sizeof(double *)))) {    fprintf(stderr, "%s: couldn't malloc tmpw(%d)\n", 	    gip->progname, gip->threads);    exit(0);  }  for (i=0; i<gip->threads; i++) {    if (!(tmpw[i] = (void *)malloc(gip->nodes*DOF*sizeof(double)))) {      fprintf(stderr, "%s: couldn't malloc tmpw[i](%d)\n", 	      gip->progname, gip->nodes);      exit(0);    }  }#endif  /*    * generate the sparse matrix and vector coefficients    */  if (!gip->quiet)     fprintf(stderr, "%s: Computing sparse matrix coefficients.", argv[0]);  assemble_matrix(K1, gip);  assemble_matrix(K2, gip);  assemble_vector(v1, gip);  assemble_vector(v2, gip);  if (!gip->quiet) {    fprintf(stderr, " Done.\n");    fflush(stderr);  }  /*   * partition the load across the threads    */  limit = gip->matrixlen / gip->threads;  oldrow = 0;  newrow = 0;  firstrow[0] = 0;    for (i=1; i<gip->threads; i++) {    if (newrow > gip->nodes) {      fprintf(stderr, "%s: inconsistent row number\n");      exit(0);    }    else if (newrow < gip->nodes) {      newrow = oldrow + 1;      while (((gip->matrixindex[newrow]-gip->matrixindex[oldrow]) < limit)	     && (newrow < gip->nodes)) {	newrow++;      }      firstrow[i] = newrow;      oldrow = newrow;    }    else {      firstrow[i] = newrow;    }  }  firstrow[gip->threads] = gip->nodes;  /*    * start the workers   */#if (defined(LOCK) || defined(REDUCE))  spark_start_threads(gip->threads-1);#endif  /*   * start the master    */  if (!gip->quiet) {#if defined(LOCK)      fprintf(stderr, "%s: Performing %d SMVP pairs (n=%d) with %d threads and %d locks.", 	      gip->progname, gip->iters, gip->nodes, gip->threads, gip->locks);#elif defined(REDUCE)      fprintf(stderr, "%s: Performing %d SMVP pairs (n=%d) with %d threads.", 	      gip->progname, gip->iters, gip->nodes, gip->threads);#else      fprintf(stderr, "%s: Performing %d SMVP pairs (n=%d).",	      gip->progname, gip->iters, gip->nodes);#endif  }  ids[0] = 0;  smvpthread(&ids[0]);  if (!gip->quiet) {    fprintf(stderr, " Done.\n");    fflush(stderr);  }  if (secs == 0.0) {    fprintf(stderr, "error: no measured elapsed time. Use more iterations (e.g., -i%d)\n",	    gip->iters*10);    exit(0);  }  /*    * summarize performance    */  mflops =     (double)((2.0*gip->matrixlen - gip->nodes) *   /* nonzero blocks */     DOF*DOF *                                     /* DOF^2 numbers/block */     2.0)                                          /* 2 flops/block */        / 1000000.0;       /* compute min and max load on each thread */  minnonzeros = 1<<20;  maxnonzeros = -1;  for (i=0; i<gip->threads; i++) {    nonzeros = gip->matrixindex[firstrow[i+1]] - gip->matrixindex[firstrow[i]];    if (nonzeros < minnonzeros)       minnonzeros = nonzeros;    if (nonzeros > maxnonzeros)       maxnonzeros = nonzeros;  }#if defined(LOCK)  /* lock-based SMVP */  fprintf(stderr, "%s: %s %d threads %d locks %.6f Mf %.6f s %.1f Mf/s [%d/%d/%.0f%%]\n", 	  gip->progname, gip->packfilename, gip->threads, gip->locks, mflops, secs, mflops/secs, 	  minnonzeros, maxnonzeros, 	  (double)((double)minnonzeros/(double)maxnonzeros)*100.0);#elif defined (REDUCE)  /* reduction-based SMVP */  fprintf(stderr, 	  "%s: %s %d threads %.6f Mf %.6f s [%.6f/%.6f/%.0f%%] %.1f Mf/s [%d/%d/%.0f%%]\n", 	  gip->progname, gip->packfilename, gip->threads, mflops, secs, secs-csecs, csecs, 	  ((secs-csecs)/secs)*100.0, mflops/secs, minnonzeros, maxnonzeros, 	  (double)((double)minnonzeros/(double)maxnonzeros)*100.0);#else  /* baseline sequential SMVP */  fprintf(stderr, "%s: %s %.6f Mf %.6f s %.1f Mf/s\n", 	  gip->progname, gip->packfilename, mflops, secs, mflops/secs);#endif  /* print results */  if (gip->output) {    printnodevector(w1, gip->nodes);  }  fflush(stdout);  if (!gip->quiet) {    fprintf(stderr, "%s: Done.\n", gip->progname);  }
12 3 下一页
💿 文件大小 26 K
👤 上传用户 struggle10000
📂 所属分类并行计算
🏷️ 相关标签

#大学 #并行计算 #程序
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -