📄 mv.c
字号:
/***************************************************************************//* mv.c - sequential and parallel shared memory SMVP kernel *//* *//* Spark98 Kernels */ /* Copyright (C) David O'Hallaron 1998 *//* *//* Compilation options: (you must define zero or one of these) *//* *//* Variable Binary Description *//* none smv baseline sequential SMVP *//* PTHREAD_LOCK lmv locks/pthreads *//* PTHREAD_REDUCE rmv reductions instead of locks/pthreads *//* SGI_LOCK lmv locks/SGI threads *//* SGI_REDUCE rmv reductions instead of locks/SGI threads *//* *//* You are free to use this software without restriction. If you find that *//* the suite is helpful to you, it would be very helpful if you sent me a *//* note at droh@cs.cmu.edu letting me know how you are using it. *//***************************************************************************/ #include <stdio.h>#include <stdlib.h>#include <sys/time.h>#if (defined(PTHREAD_LOCK) || defined(PTHREAD_REDUCE))#define PTHREAD#elif (defined(SGI_LOCK) || defined(SGI_REDUCE))#define SGI#endif#if defined(PTHREAD)#include <pthread.h>#elif defined(SGI)#include <sys/types.h>#include <sys/prctl.h>#include <ulocks.h>#endif#if (defined(SGI_LOCK) || defined(PTHREAD_LOCK))#define LOCK#else#undef LOCK#endif#if (defined(SGI_REDUCE) || defined(PTHREAD_REDUCE))#define REDUCE#else#undef REDUCE#endif/* * program wide constants */#define DEFAULT_ITERS 20 /* default number of SMVP operations */#define STRLEN 128 /* default string length */#define DOF 3 /* degrees of freedom in underlying simulation */struct gi { char *progname; /* program name */ /* command line options */ int quiet; /* run quietly unless there are errors (-Q) */ int iters; /* number of times to perform SMVP (-i<n>) */ int output; /* print the result vector? (-o) */ int threads; /* number of threads (-t<n>) */ int locks; /* number of locks (-l<n>) */ char packfilename[STRLEN]; /* input packfile name */ /* problem parameters */ int globalnodes; /* global nodes (same as local nodes) */ int globalelems; /* global elements (same as local elements) */ int mesh_dim; /* mesh dimension (3) */ int corners; /* nodes per element (4) */ int subdomains; /* number of partition sets */ int processors; /* not used */ int nodes; /* number of local nodes */ int elems; /* number of local elements */ int matrixlen; /* number of local nonzero block entries */ int choleskylen; /* not used */ int priv; /* not used */ int mine; /* not used */ /* global data structures */ double (*coord)[DOF]; /* geometric node coordinates */ int (*vertex)[4]; /* nodes associated with each element */ int *globalnode; /* global to local node mapping function */ int *matrixcol; /* K[i] is in column matrixcol[i] */ int *matrixindex; /* row i starts at K[matrixindex[i]] */};/* * globals *//* w1 = K1 * x1 */double (*K1)[DOF][DOF]; /* sparse matrix coefficients */double (*v1)[DOF]; /* dense vector */double (*w1)[DOF]; /* dense vector *//* w2 = K2 * x2 */double (*K2)[DOF][DOF]; /* sparse matrix coefficients */double (*v2)[DOF]; /* dense vector */double (*w2)[DOF]; /* dense vector */#if defined(REDUCE)/* temporary vectors (one per thread) to hold *//* partially assembled w vectors */double (**tmpw)[DOF];#endif/* global information about the simulation */struct gi gi, *gip = &gi;/* timing variables */double secs, csecs;/* partition */int *firstrow;/* thread ids */int *ids;/* * Pthread-specific thread variables */#if defined(PTHREAD)pthread_mutex_t barriermutexhandle;pthread_mutex_t *vectormutexhandle;pthread_cond_t barriercondhandle;int barriercnt = 0;/* * SGI-specific thread variables */#elif defined(SGI)char arenaname[STRLEN]= "/dev/zero";usptr_t *arenahandle;barrier_t *barrierhandle;ulock_t *lockhandle; /* array of locks */#endif/* * end globals *//* * function prototypes *//* code executed by each thread */void *smvpthread(void *a);/* initialization routines */void init(int argc, char **argv, struct gi *gip);void parsecommandline(int argc, char **argv, struct gi *gip);void readpackfile(struct gi *gip); /* routines that assemble the sparse matrix */void assemble_matrix(double (*K)[DOF][DOF], struct gi *gip);void assemble_vector(double (*v)[DOF], struct gi *gip);/* sparse matrix vector multiply */void zero_vector(double (*v)[DOF], int firstrow, int numrows);void local_smvp(int nodes, double (*A)[DOF][DOF], int *Acol, int *Aindex, double (*v)[DOF], double (*w)[DOF], int firstrow, int numrows, int id, struct gi *gip);void add_vectors(double (**tmpw)[DOF], double (*w)[DOF], int firstrow, int numrows, struct gi *gip);/* misc routines */void info(struct gi *gip);void usage(struct gi *gip);void printfvec3(double (*v)[DOF], int n);void printmatrix3(double (*A)[DOF][DOF], struct gi *gip);void printnodevector(double (*v)[DOF], int n);/* system-dependent timer routines */void init_etime(void);double get_etime(void);/* system dependent thread routines */#if (defined(LOCK) || defined(REDUCE))void spark_init_threads(void);void spark_start_threads(int n);void spark_barrier(void);void spark_setlock(int lockid);void spark_unsetlock(int lockid);#endif/* * main program */void main(int argc, char **argv) { int i; int oldrow, newrow, limit, nonzeros, minnonzeros, maxnonzeros; double mflops; init(argc, argv, gip); /* * allocate contiguous storage for the matrix and vectors */ if (!(K1 = (void *)malloc((gip->matrixlen+1)*DOF*DOF*sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc K1(%d)\n", gip->progname, gip->matrixlen); exit(0); } if (!(K2 = (void *)malloc((gip->matrixlen+1)*DOF*DOF*sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc K2(%d)\n", gip->progname, gip->matrixlen); exit(0); } if (!(v1 = (void *)malloc(gip->nodes * DOF * sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc v1(%d)\n", gip->progname, gip->nodes); exit(0); } if (!(v2 = (void *)malloc(gip->nodes * DOF * sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc v2(%d)\n", gip->progname, gip->nodes); exit(0); } if (!(w1 = (void *)malloc(gip->nodes * DOF * sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc w1(%d)\n", gip->progname, gip->nodes); exit(0); } if (!(w2 = (void *)malloc(gip->nodes * DOF * sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc w2(%d)\n", gip->progname, gip->nodes); exit(0); }#if defined(REDUCE) if (!(tmpw = (void *)malloc(gip->threads*sizeof(double *)))) { fprintf(stderr, "%s: couldn't malloc tmpw(%d)\n", gip->progname, gip->threads); exit(0); } for (i=0; i<gip->threads; i++) { if (!(tmpw[i] = (void *)malloc(gip->nodes*DOF*sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc tmpw[i](%d)\n", gip->progname, gip->nodes); exit(0); } }#endif /* * generate the sparse matrix and vector coefficients */ if (!gip->quiet) fprintf(stderr, "%s: Computing sparse matrix coefficients.", argv[0]); assemble_matrix(K1, gip); assemble_matrix(K2, gip); assemble_vector(v1, gip); assemble_vector(v2, gip); if (!gip->quiet) { fprintf(stderr, " Done.\n"); fflush(stderr); } /* * partition the load across the threads */ limit = gip->matrixlen / gip->threads; oldrow = 0; newrow = 0; firstrow[0] = 0; for (i=1; i<gip->threads; i++) { if (newrow > gip->nodes) { fprintf(stderr, "%s: inconsistent row number\n"); exit(0); } else if (newrow < gip->nodes) { newrow = oldrow + 1; while (((gip->matrixindex[newrow]-gip->matrixindex[oldrow]) < limit) && (newrow < gip->nodes)) { newrow++; } firstrow[i] = newrow; oldrow = newrow; } else { firstrow[i] = newrow; } } firstrow[gip->threads] = gip->nodes; /* * start the workers */#if (defined(LOCK) || defined(REDUCE)) spark_start_threads(gip->threads-1);#endif /* * start the master */ if (!gip->quiet) {#if defined(LOCK) fprintf(stderr, "%s: Performing %d SMVP pairs (n=%d) with %d threads and %d locks.", gip->progname, gip->iters, gip->nodes, gip->threads, gip->locks);#elif defined(REDUCE) fprintf(stderr, "%s: Performing %d SMVP pairs (n=%d) with %d threads.", gip->progname, gip->iters, gip->nodes, gip->threads);#else fprintf(stderr, "%s: Performing %d SMVP pairs (n=%d).", gip->progname, gip->iters, gip->nodes);#endif } ids[0] = 0; smvpthread(&ids[0]); if (!gip->quiet) { fprintf(stderr, " Done.\n"); fflush(stderr); } if (secs == 0.0) { fprintf(stderr, "error: no measured elapsed time. Use more iterations (e.g., -i%d)\n", gip->iters*10); exit(0); } /* * summarize performance */ mflops = (double)((2.0*gip->matrixlen - gip->nodes) * /* nonzero blocks */ DOF*DOF * /* DOF^2 numbers/block */ 2.0) /* 2 flops/block */ / 1000000.0; /* compute min and max load on each thread */ minnonzeros = 1<<20; maxnonzeros = -1; for (i=0; i<gip->threads; i++) { nonzeros = gip->matrixindex[firstrow[i+1]] - gip->matrixindex[firstrow[i]]; if (nonzeros < minnonzeros) minnonzeros = nonzeros; if (nonzeros > maxnonzeros) maxnonzeros = nonzeros; }#if defined(LOCK) /* lock-based SMVP */ fprintf(stderr, "%s: %s %d threads %d locks %.6f Mf %.6f s %.1f Mf/s [%d/%d/%.0f%%]\n", gip->progname, gip->packfilename, gip->threads, gip->locks, mflops, secs, mflops/secs, minnonzeros, maxnonzeros, (double)((double)minnonzeros/(double)maxnonzeros)*100.0);#elif defined (REDUCE) /* reduction-based SMVP */ fprintf(stderr, "%s: %s %d threads %.6f Mf %.6f s [%.6f/%.6f/%.0f%%] %.1f Mf/s [%d/%d/%.0f%%]\n", gip->progname, gip->packfilename, gip->threads, mflops, secs, secs-csecs, csecs, ((secs-csecs)/secs)*100.0, mflops/secs, minnonzeros, maxnonzeros, (double)((double)minnonzeros/(double)maxnonzeros)*100.0);#else /* baseline sequential SMVP */ fprintf(stderr, "%s: %s %.6f Mf %.6f s %.1f Mf/s\n", gip->progname, gip->packfilename, mflops, secs, mflops/secs);#endif /* print results */ if (gip->output) { printnodevector(w1, gip->nodes); } fflush(stdout); if (!gip->quiet) { fprintf(stderr, "%s: Done.\n", gip->progname); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -