📄 hmv.c
字号:
/***************************************************************************//* hmv.c - hybrid SMVP kernel: a shared memory program in a time- and *//* space-efficient message passing style *//* *//* Spark98 Kernels *//* Copyright (c) David O'Hallaron 1998 *//* *//* Compilation options (you must define one of these two variables: *//* *//* Variable Binary Description *//* SGI hmv uses SGI thread primitives *//* PTHREAD hmv uses Pthread thread primitives *//* *//* You are free to use this software without restriction. If you find that *//* the suite is helpful to you, it would be very helpful if you sent me a *//* note at droh@cs.cmu.edu letting me know how you are using it. *//***************************************************************************/ #include <stdio.h>#include <stdlib.h>#include <sys/time.h>#if defined(PTHREAD)#include <pthread.h>#elif defined(SGI)#include <sys/types.h>#include <sys/prctl.h>#include <ulocks.h>#endif/* * program wide constants */#define DEFAULT_ITERS 20 /* default number of SMVP operations */#define STRLEN 128 /* default string length */#define DOF 3 /* degrees of freedom in underlying simulation */#define SEND_CMD 10 /* used by i/o master to ask for data from slaves */#define INVALID -9999 /* marks uninitialized data *//* * global variables */struct gi { /* misc */ char progname[STRLEN]; /* program name */ FILE *packfile; /* file descriptor for pack file */ /* command line options */ int quiet; /* run quietly unless there are errors (-Q) */ int iters; /* number of times to perform SMVP (-i<d>) */ int output; /* print the result vector? (-o) */ char packfilename[STRLEN]; /* input packfile name */ /* problem parameters */ int globalnodes; /* number of global nodes */ int globalelems; /* number of global elements */ int mesh_dim; /* mesh dimension (3) */ int corners; /* nodes per element (4) */ int subdomains; /* number of partition sets (and threads)*/ int processors; /* not used */ int *nodes; /* number of local nodes on each PE */ int *mine; /* number of nodes that each PE owns */ int *priv; /* number of unshared nodes per PE priv <= mine <= nodes */ int *elems; /* number of elements on each PE */ int *matrixlen; /* number of nonzeros on each PE */ int *friends; /* number of PEs each PE communicates with */ int *commlen; /* number of communicated nodes for each PE */ int maxcommlen; /* max number of nodes shared by an PE */ /* * global data structures */ double (**coord)[DOF]; /* geometric node coordinates */ int (**vertex)[4]; /* nodes associated with each element */ int **globalnode; /* local-to-global node mapping function */ int **globalelem; /* local-to-global element mapping function */ /* sparse matrix in compressed sparse row format */ int **matrixcol; /* K[j] on PE i is in column matrixcol[i][j] */ int **matrixindex; /* row j on PE i starts at K[matrixindex[i][j]] */ /* communication schedule */ int **comm; /* jth local node to send from PE i is comm[i][j] */ int **commindex; /* subdomain j on PE i starts at comm[i][commindex[j]] */ /* tmp buffer for communication phase */ double (**recvbuf)[DOF];} gi, *gip=&gi;/* timing variables */double secs, csecs;/* thread ids */int *ids;#if defined(PTHREAD)/* * Pthread-specific thread variables */pthread_mutex_t barriermutexhandle;pthread_mutex_t vectormutexhandle;pthread_cond_t barriercondhandle;int barriercnt = 0;#elif defined(SGI)/* * SGI-specific thread variables */char arenaname[STRLEN]= "/dev/zero";usptr_t *arenahandle;barrier_t *barrierhandle;ulock_t *lockhandle;#endif/* * SMVP data structures *//* w1 = K1 * v1 */double (**K1)[DOF][DOF]; /* sparse matrix */double (**v1)[DOF]; /* dense vector */double (**w1)[DOF]; /* dense vector *//* w2 = K2 * v2 */double (**K2)[DOF][DOF]; /* sparse matrix */double (**v2)[DOF]; /* dense vector */double (**w2)[DOF]; /* dense vector *//* end globals *//* * function prototypes *//* initialization and exit routines */void init(int argc, char **argv, struct gi *gip);void parsecommandline(int argc, char **argv, struct gi *gip);void readpackfile(struct gi *gip); void bail(struct gi *gip);void finalize(struct gi *gip);/* routines that read and parse the packfile */void load_pack(struct gi *gip);void load_global(struct gi *gip);void load_nodes(struct gi *gip);void load_elems(struct gi *gip);void load_matrix(struct gi *gip);void load_comm(struct gi *gip);/* routine that assembles the sparse matrix and initial vector */void assemble_matrix(double (**K)[DOF][DOF], struct gi *gip);void assemble_vector(double (**v)[DOF], struct gi *gip);/* sparse matrix vector multiply routines */void *smvpthread(void *);void zero_vector(double (*v)[DOF], int firstrow, int numrows);void local_smvp(int nodes, double (*A)[DOF][DOF], int *Acol, int *Aindex, double (*v)[DOF], double (*w)[DOF], int firstrow, int numrows);void full_assemble(double (*v)[DOF], int id, struct gi *gip);/* system dependent timer routines */void init_etime(void);double get_etime(void);/* system dependent thread routines */void spark_init_threads(void);void spark_start_threads(int n);void spark_barrier(void);void spark_setlock(void);void spark_unsetlock(void);/* output routines */void printnodevector(double (**v)[DOF], int n, struct gi *gip);void par_printmatrix3(double (*A)[DOF][DOF], int id,struct gi *gip);void par_printvec3(double (**v)[DOF], int n, int id, struct gi *gip);void seq_printvec3(double (**v)[DOF], int id, struct gi *gip);void seq_prglobal(struct gi *gip);void seq_prnodes(struct gi *gip);void seq_prelems(struct gi *gip);void seq_prmatrix(struct gi *gip);void seq_prcomm(struct gi *gip);void seq_prcommvals(struct gi *gip);/* misc routines */void info(struct gi *gip);void usage(struct gi *gip);/* * main program */void main(int argc, char **argv) { int i, j; double mflops, global_mflops; int minnonzeros, maxnonzeros; init(argc, argv, gip); /* * allocate storage for the local matrices and vectors */ if (!(K1 = (void *)malloc((gip->subdomains*sizeof(double *))))) { fprintf(stderr, "%s: couldn't malloc K1\n", gip->progname); bail(gip); } if (!(K2 = (void *)malloc((gip->subdomains*sizeof(double *))))) { fprintf(stderr, "%s: couldn't malloc K2\n", gip->progname); bail(gip); } if (!(v1 = (void *)malloc((gip->subdomains*sizeof(double *))))) { fprintf(stderr, "%s: couldn't malloc v1\n", gip->progname); bail(gip); } if (!(v2 = (void *)malloc((gip->subdomains*sizeof(double *))))) { fprintf(stderr, "%s: couldn't malloc v2\n", gip->progname); bail(gip); } if (!(w1 = (void *)malloc((gip->subdomains*sizeof(double *))))) { fprintf(stderr, "%s: couldn't malloc w1\n", gip->progname); bail(gip); } if (!(w2 = (void *)malloc((gip->subdomains*sizeof(double *))))) { fprintf(stderr, "%s: couldn't malloc w2\n", gip->progname); bail(gip); } for (i=0; i<gip->subdomains; i++) { if (!(K1[i] = (void *)malloc((gip->matrixlen[i]+1) * DOF * DOF * sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc K1[i](%d)\n", gip->progname); bail(gip); } if (!(K2[i] = (void *)malloc((gip->matrixlen[i]+1) * DOF * DOF * sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc K2[i]\n", gip->progname); bail(gip); } if (!(v1[i] = (void *)malloc(gip->nodes[i] * DOF * sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc v1\n", gip->progname); bail(gip); } if (!(v2[i] = (void *)malloc(gip->nodes[i] * DOF * sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc v2\n", gip->progname); bail(gip); } if (!(w1[i] = (void *)malloc(gip->nodes[i] * DOF * sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc w1\n", gip->progname); bail(gip); } if (!(w2[i] = (void *)malloc(gip->nodes[i] * DOF * sizeof(double)))) { fprintf(stderr, "%s: couldn't malloc w2\n", gip->progname); bail(gip); } } /* * generate the sparse matrix coefficients */ if (!gip->quiet) fprintf(stderr, "%s: Computing sparse matrix coefficients.", gip->progname); assemble_matrix(K1, gip); assemble_matrix(K2, gip); assemble_vector(v1, gip); assemble_vector(v2, gip); if (!gip->quiet) { fprintf(stderr, " Done.\n"); fflush(stderr); } /* * start the workers */ spark_start_threads(gip->subdomains-1); /* * start the master */ if (!gip->quiet) { fprintf(stderr, "%s: Performing %d SMVP pairs with %d threads.", gip->progname, gip->iters, gip->subdomains); } ids[0] = 0; smvpthread(&ids[0]); if (!gip->quiet) { fprintf(stderr, " Done.\n"); fflush(stderr); } if (secs == 0.0) { fprintf(stderr, "error: no measured elapsed time. Use more iterations (e.g., -i%d)\n", gip->iters*10); bail(gip); } /* * Summarize performance */ mflops = 0.0; for (i=0; i<gip->subdomains; i++) { mflops += (double)((2.0*gip->matrixlen[i] - gip->nodes[i]) /* nonzero blocks */ *DOF*DOF /* DOF^2 numbers/block */ *2.0) /* 2 flops/block */ / 1000000.0; } /* find the minimum and maximum load on each subdomain */ minnonzeros = 1<<20; maxnonzeros = -1; for (i=0; i<gip->subdomains; i++) { if (gip->matrixlen[i] < minnonzeros) minnonzeros = gip->matrixlen[i]; if (gip->matrixlen[i] > maxnonzeros) maxnonzeros = gip->matrixlen[i]; } fprintf(stderr, "%s: %s %.6f Mf %.6f s [%.6f/%.6f/%.0f%%] %.1f Mf/s [%d/%d/%.0f%%]\n", gip->progname, gip->packfilename, mflops, secs, secs - csecs, csecs, ((secs-csecs)/secs)*100.0, mflops/secs, minnonzeros, maxnonzeros, (double)((double)minnonzeros/(double)maxnonzeros)*100.0); /* * print results if asked for */ if (gip->output) { printnodevector(w1, gip->globalnodes, gip); } /* * Clean up */ if (!gip->quiet) { fprintf(stderr, "%s: Done.\n", gip->progname); } finalize(gip);}/* * smvpthread - thread that performs a sequence of SMVP pairs */void *smvpthread(void *a) { int i, j; double mycsecs, mystarttime, mystartctime; int *argp = (int *)a; int id = *argp; spark_barrier(); mycsecs = 0.0; mystarttime = get_etime(); for (i=0; i<gip->iters; i++) { /* w1 = K1 * v1 */ zero_vector(w1[id], 0, gip->nodes[id]); local_smvp(gip->nodes[id], K1[id], gip->matrixcol[id], gip->matrixindex[id], v1[id], w1[id], 0, gip->nodes[id]); mystartctime = get_etime(); full_assemble(w1[id], id, gip); mycsecs += (get_etime() - mystartctime); /* w2 = K2 * v2 */ zero_vector(w2[id], 0, gip->nodes[id]); local_smvp(gip->nodes[id], K2[id], gip->matrixcol[id], gip->matrixindex[id], v2[id], w2[id], 0, gip->nodes[id]); mystartctime = get_etime(); full_assemble(w2[id], id, gip); mycsecs += (get_etime() - mystartctime); } if (id == 0) { secs = (get_etime() - mystarttime)/(gip->iters*2.0); csecs = mycsecs / (gip->iters*2.0); } return NULL;}/* * assemble_matrix - assemble the local sparse matrix */void assemble_matrix(double (**K)[DOF][DOF], struct gi *gip) { int i, j, k, l, m, s; int temp1, elem; for (s=0; s<gip->subdomains; s++) { for (i = 0; i < gip->matrixlen[s]; i++) { for (j = 0; j < DOF; j++) { for (k = 0; k < DOF; k++) { K[s][i][j][k] = 0.0; } } } } /* * add the contribution of each element to K */ for (s=0; s<gip->subdomains; s++) { for (elem = 0; elem < gip->elems[s]; elem++) { for (j = 0; j < gip->corners; j++) { for (k = 0; k < gip->corners; k++) { if (gip->vertex[s][elem][j] < gip->vertex[s][elem][k]) { temp1 = gip->matrixindex[s][gip->vertex[s][elem][j]]; while (gip->matrixcol[s][temp1] != gip->vertex[s][elem][k]) { temp1++; if (temp1 >= gip->matrixindex[s][gip->vertex[s][elem][k] + 1]) { fprintf(stderr, "%s: K indexing error in assemble %d %d\n", gip->progname, temp1, gip->vertex[s][elem][k]); bail(gip); } } for (l = 0; l < 3; l++) { for (m = 0; m < 3; m++) { K[s][temp1][l][m] += 1.0; } } } } } } } #ifdef DEBUGASSEMBLE seq_printmatrix3(K[i], gip);#endif}/* * assemble_vector - assemble the local v vector */void assemble_vector(double (**v)[DOF], struct gi *gip) { int i, j, s; for (s=0; s<gip->subdomains; s++) { for (i = 0; i < gip->nodes[s]; i++) { for (j = 0; j < DOF; j++) { v[s][i][j] = 1.0; } } }#ifdef DEBUGASSEMBLE seq_printvec3(v, gip);#endif}/* * zero_vector - clear a portion of a vector */void zero_vector(double (*v)[DOF], int firstrow, int numrows) { int i; for (i=firstrow; i<(firstrow+numrows); i++) { v[i][0] = 0.0; v[i][1] = 0.0; v[i][2] = 0.0; }}/* * local_smvp - local sparse matrix vector product w = K*v
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -