mvsearch.c
来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 885 行 · 第 1/2 页
C
885 行
/* * Automatically Tuned Linear Algebra Software v3.8.0 * (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the ATLAS group or the names of its contributers may * not be used to endorse or promote products derived from this * software without specific written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */#include <stdio.h>#include <stdlib.h>#include <ctype.h>#include <assert.h>#include "atlas_fopen.h"#define ATL_MVIsMM(iflag_) ( ((iflag_) | 8) == (iflag_) )#define ATL_MVIsAxpy(iflag_) ( ((iflag_) | 16) == (iflag_) )#define ATL_MVIsDot(iflag_) ( !ATL_MVIsMM(iflag_) && !ATL_MVIsAxpy(iflag_) )#define ATL_MVNoBlock(iflag_) ( ((iflag_) | 32) == (iflag_) )double GetAvg(int n, double tolerance, double *mflop){ int i, j; double t0, tavg;/* * Sort results, largest first */ for (i=0; i != n; i++) { for (j=i+1; j < n; j++) { if (mflop[i] < mflop[j]) { t0 = mflop[i]; mflop[i] = mflop[j]; mflop[j] = t0; } } }/* * Throw out result if it is outside tolerance; rerun if two mflop not within * tolerance; this code assumes n == 3 */ if (tolerance*mflop[1] < mflop[0]) /* too big a range in results */ { if (tolerance*mflop[2] < mflop[1]) return(-1.0); tavg = (mflop[1] + mflop[2]) / 2.0; } else if (tolerance*mflop[2] < mflop[0]) tavg = (mflop[0] + mflop[1]) / 2.0; else tavg = (mflop[0] + mflop[1] + mflop[2]) / 3.0; return(tavg);}int GetL1CacheSize(){ FILE *L1f; int L1Size; if (!FileExists("res/L1CacheSize")) { assert(system("make res/L1CacheSize\n") == 0); } L1f = fopen("res/L1CacheSize", "r"); assert(L1f != NULL); fscanf(L1f, "%d", &L1Size); fclose(L1f); fprintf(stderr, "\n Read in L1 Cache size as = %dKB.\n",L1Size); return(L1Size);}int mvtstcase(char pre, char TA, char *mvnam, char *cc, char *ccflags){ char ln[256]; if (cc[0] != '\0' && ccflags[0] != '\0') sprintf(ln, "make %cmvtstcase%c mvrout=%s %cMVCC=\"%s\" %cMVFLAGS=\"%s\"\n", pre, TA, mvnam, pre, cc, pre, ccflags); else if (cc[0] == '\0' && ccflags[0] != '\0') sprintf(ln, "make %cmvtstcase%c mvrout=%s EXTFLAGS=\"%s\"\n", pre, TA, mvnam, ccflags); else sprintf(ln, "make %cmvtstcase%c mvrout=%s\n", pre, TA, mvnam); return(system(ln));}double svcase (char pre, int cas, int l1mul, int mu, int nu, int flag, char *mvnamN, char *ccN, char *ccflagsN, char *mvnamT, char *ccT, char *ccflagsT){ char ln[1024], fnam[64]; const int imul = l1mul; int i, mb; double mfs[3], mf; FILE *fp; sprintf(fnam, "res/%csymvS_%d_0", pre, cas); if (!FileExists(fnam)) { i = sprintf(ln, "make %csvcase mvrout=%s svrout=%s cas=%d xu=%d yu=%d l1mul=%d iflag=%d gmvout=\"-o %s\"", pre, mvnamT, mvnamN, cas, nu, mu, imul, flag, fnam); if (ccN[0] != '\0' && ccflagsN[0] != '\0') i += sprintf(ln+i, " %cSVCC=\"%s\" %cSVFLAGS=\"%s\"", pre, ccN, pre, ccflagsN); else if (ccN[0] == '\0' && ccflagsN[0] != '\0') { i += sprintf(ln+i, " SEXTFLAGS=\"%s -DATL_NOL1PREFETCH -DATL_NOL2PREFETCH\"", ccflagsN); } else i += sprintf(ln+i, " SEXTFLAGS=\"-DATL_NOL1PREFETCH -DATL_NOL2PREFETCH\""); if (ccT[0] != '\0' && ccflagsT[0] != '\0') i += sprintf(ln+i, " %cMVCC=\"%s\" %cMVFLAGS=\"%s\"", pre, ccT, pre, ccflagsT); else if (ccT[0] == '\0' && ccflagsT[0] != '\0') i += sprintf(ln+i, " SEXTFLAGS=\"%s\"", ccflagsT); sprintf(ln+i, "\n"); fprintf(stderr, "%s", ln); if (system(ln)) return(-1.0); /* won't compile here */ } fp = fopen(fnam, "r"); assert(fp); assert(fscanf(fp, " %lf %lf %lf", mfs, mfs+1, mfs+2) == 3); fclose(fp); mf = GetAvg(3, 1.20, mfs); if (mf == -1.0) { fprintf(stderr,"\n\n%s : VARIATION EXCEEDS TOLERENCE, RERUN WITH HIGHER REPS.\n\n", fnam); sprintf(ln, "rm -f %s\n", fnam); system(ln); exit(-1); } return(mf);}double mvcase(int SY, char pre, char *mvnam, char TA, int flag, int mu, int nu, int cas, char *cc, char *ccflags, int l1mul){ char nTA; char ln[1024], fnam[64]; const int imul = l1mul; int i, mb; double mfs[3], mf; FILE *fp; if (TA == 'n' || TA == 'N') nTA = 'T'; else nTA = 'N'; if (mu == 0) mu = 1; if (nu == 0) nu = 1; if (SY) sprintf(fnam, "res/%csymv%c_%d_0", pre, TA, cas); else { if (ATL_MVNoBlock(flag)) sprintf(fnam, "res/%cgemv%c_%d_0", pre, TA, cas); else sprintf(fnam, "res/%cgemv%c_%d_%d", pre, TA, cas, imul); } if (!FileExists(fnam)) { i = sprintf(ln, "make %cmvcase%c mvrout=%s cas=%d xu=%d yu=%d l1mul=%d iflag=%d gmvout=\"-o %s\"", pre, TA, mvnam, cas, nu, mu, imul, flag, fnam); if (cc[0] != '\0' && ccflags[0] != '\0') i += sprintf(ln+i, " %cMVCC=\"%s\" %cMVFLAGS=\"%s\"", pre, cc, pre, ccflags); else if (cc[0] == '\0' && ccflags[0] != '\0') { if (SY) i += sprintf(ln+i, " EXTFLAGS=\"%s -DATL_NOL1PREFETCH -DATL_NOL2PREFETCH\"", ccflags); else i += sprintf(ln+i, " EXTFLAGS=\"%s\"", ccflags); } if (SY) { if (pre == 'd' || pre == 'c') mb = 1024 / 8; else if (pre == 's') mb = 1024 / 4; else if (pre == 'z') mb = 1024 / 16; mb = 0.01*l1mul*mb*GetL1CacheSize(); mb = (mb-2*nu)/(nu+2); if (mb > mu) mb = (mb/mu)*mu; if (!(cc[0] == '\0' && ccflags[0] != '\0')) i += sprintf(ln+i, " EXTFLAGS=\"-DATL_NOL1PREFETCH -DATL_NOL2PREFETCH\""); i += sprintf(ln+i, " opt=\"-2 1 -L %d\" M=%d N=%d", mb, mb, nu); } sprintf(ln+i, "\n"); fprintf(stderr, "%s", ln); if (system(ln)) return(-1.0); /* won't compile here */ } fp = fopen(fnam, "r"); assert(fp); assert(fscanf(fp, " %lf %lf %lf", mfs, mfs+1, mfs+2) == 3); fclose(fp); mf = GetAvg(3, 1.20, mfs); if (mf == -1.0) { fprintf(stderr,"\n\n%s : VARIATION EXCEEDS TOLERENCE, RERUN WITH HIGHER REPS.\n\n", fnam); sprintf(ln, "rm -f %s\n", fnam); system(ln); exit(-1); } return(mf);}int FindL1Mul(char pre, int cas, char *mvnam, char *cc, char *ccflags, char TA, int flag, int mu, int nu){ double low = .5, high = 1.0; double mflow, mfhigh; int ilow, ihigh; if (ATL_MVNoBlock(flag)) flag -= 32; /* always actually block these times */ do { ilow = (low * 100.0); ihigh = (high * 100.0); mflow = mvcase(0, pre, mvnam, TA, flag, mu, nu, cas, cc, ccflags, ilow); mfhigh = mvcase(0, pre, mvnam, TA, flag, mu, nu, cas, cc, ccflags, ihigh); fprintf(stdout, " %d%% %.2fMFLOP --- %d%% %.2fMFLOP\n", ilow, mflow, ihigh, mfhigh); if (mflow < 1.005*mfhigh) low += 0.5*(high-low); else high -= 0.5 * (high-low); } while (ihigh-ilow); fprintf(stdout, "\n\nBEST %% of L1 cache: %d\n", ilow); return(ilow);}int ConfirmBlock(char pre, char *mvnam, char *cc, char *ccflags, char TA, int flag, int mu, int nu, int cas, int l1mul){ int bflag; double mfblock, mfnoblock; if ( ATL_MVNoBlock(flag) ) { bflag = flag - 32; mfblock = mvcase(0, pre, mvnam, TA, bflag, mu, nu, cas, cc, ccflags, l1mul); mfnoblock = mvcase(0, pre, mvnam, TA, flag, mu, nu, cas, cc, ccflags, l1mul); fprintf(stdout, "\nWith blocking=%lf, without=%lf\n\n", mfblock, mfnoblock); if (mfblock >= mfnoblock) return(bflag); } return(flag);}void NoEndLineWhiteSpace(char *ln){ int i; for (i=0; ln[i]; i++); if (i) for (i--; isspace(ln[i]); i--) ln[i] = '\0';}int LineIsCont(char *ln){ int i, iret=0; for(i=0; ln[i]; i++); if (i) { for(i--; isspace(ln[i]); i--); if (ln[i] == '\\') iret = 1; } return(iret);}int iGetIDCase(FILE *fp, int ID, char *fnam, char *auth, char *cc, char *ccflags, int *flag, int *mu, int *nu){ char ln[256]; int i, n, id; assert(fgets(ln, 128, fp)); assert(sscanf(ln, " %d", &n) == 1); for (i=0; i < n; i++) { assert(fgets(ln, 256, fp)); assert(sscanf(ln, " %d %d %d %d %s \"%[^\"]", &id, flag, mu, nu, fnam, auth) == 6); if (LineIsCont(ln)) { assert(fgets(cc, 256, fp)); assert(fgets(ccflags, 256, fp)); NoEndLineWhiteSpace(cc); NoEndLineWhiteSpace(ccflags); } else cc[0] = ccflags[0] = '\0'; if (id == ID) break; } return(id);}void GetIDCase(char pre, int ID, char *fnam, char *auth, char *cc, char *ccflags, int *flag, int *mu, int *nu){ char ln[256]; int i, n, id; FILE *fp; sprintf(ln, "%ccases.dsc", pre); fp = fopen(ln, "r"); assert(fp); id = iGetIDCase(fp, ID, fnam, auth, cc, ccflags, flag, mu, nu); if (id != ID) id = iGetIDCase(fp, ID, fnam, auth, cc, ccflags, flag, mu, nu); fclose(fp); assert(id == ID);}void GetCases(FILE *fp, int *N, int **IDs, char ***fnams, char ***auths, char ***CC, char ***CCFLAGS, int **flags, int **mus, int **nus){ int i, j, n; int *mu, *nu, *flag, *id; char **fnam, **auth, **cc, **ccflags; char ln[256]; assert(fgets(ln, 128, fp)); assert(sscanf(ln, " %d", &n) == 1); assert(n < 100 && n > 0); fnam = malloc(n * sizeof(char*)); auth = malloc(n * sizeof(char*)); cc = malloc(n * sizeof(char*)); ccflags = malloc(n * sizeof(char*)); assert(fnam && auth && cc && ccflags); for (i=0; i < n; i++) { assert(fnam[i] = malloc(64*sizeof(char))); assert(auth[i] = malloc(64*sizeof(char))); } id = malloc(n * sizeof(int)); mu = malloc(n * sizeof(int)); nu = malloc(n * sizeof(int)); flag = malloc(n * sizeof(int)); assert(id && mu && nu && flag); for (i=0; i < n; i++) { assert(fgets(ln, 256, fp)); assert(sscanf(ln, " %d %d %d %d %s \"%[^\"]", id+i, flag+i, mu+i, nu+i, fnam[i], auth[i]) == 6); assert(mu[i] >= 0 && nu[i] >= 0 && fnam[i][0] != '\0' && id[i] > 0); if (LineIsCont(ln)) { fgets(ln, 256, fp); NoEndLineWhiteSpace(ln); if (ln[0] == '+' && (ln[1] == '\0' || ln[1] == ' ')) /* adding flags */ cc[i] = NULL; else { j = strlen(ln); cc[i] = malloc((j+1)*sizeof(char)); assert(cc[i]); strcpy(cc[i], ln); } fgets(ln, 256, fp); NoEndLineWhiteSpace(ln); j = strlen(ln); ccflags[i] = malloc((j+1)*sizeof(char)); assert(ccflags[i]); strcpy(ccflags[i], ln); } else cc[i] = ccflags[i] = NULL; } for (i=0; i < n; i++) /* fix cc & ccflags */ { if (cc[i] == NULL) { cc[i] = malloc(2*sizeof(char)); assert(cc[i]); cc[i][0] = cc[i][1] = '\0'; } if (ccflags[i] == NULL) { ccflags[i] = malloc(2*sizeof(char)); assert(ccflags[i]); ccflags[i][0] = ccflags[i][1] = '\0'; } } *N = n; *IDs = id; *fnams = fnam; *auths = auth; *CC = cc; *CCFLAGS = ccflags; *flags = flag; *mus = mu; *nus = nu;}int RunTransCases(int SY, char pre, char TA, int ncases, int *ids, char **fnams, char **cc, char **ccflags, int *flags, int *mus, int *nus){ int i, imax=0; double mf, mfmax=0.0; for (i=0; i < ncases; i++) { mf = mvcase(SY, pre, fnams[i], TA, flags[i], mus[i], nus[i], ids[i], cc[i], ccflags[i], 75); fprintf(stdout, "%s : %.2f\n", fnams[i], mf); if (mf > mfmax) { if (mvtstcase(pre, TA, fnams[i], cc[i], ccflags[i]) == 0) { /* ensure it passes test */ mfmax = mf; imax = i+1; } else fprintf(stderr, "\n\nROUTINE %s FAILED TESTS!!!\n\n", fnams[i]); } } assert(imax); fprintf(stdout, "\nbest %cgemv%c : ID=%d, mu=%d, nu=%d at %.2f MFLOPS\n\n", pre, TA, ids[imax-1], mus[imax-1], nus[imax-1], mfmax); return(imax-1);}void ReadSum(char pre, int *l1mul,
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?