mvsearch.c

来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 885 行 · 第 1/2 页

C
885
字号
/* *             Automatically Tuned Linear Algebra Software v3.8.0 *                    (C) Copyright 1999 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: *   1. Redistributions of source code must retain the above copyright *      notice, this list of conditions and the following disclaimer. *   2. Redistributions in binary form must reproduce the above copyright *      notice, this list of conditions, and the following disclaimer in the *      documentation and/or other materials provided with the distribution. *   3. The name of the ATLAS group or the names of its contributers may *      not be used to endorse or promote products derived from this *      software without specific written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */#include <stdio.h>#include <stdlib.h>#include <ctype.h>#include <assert.h>#include "atlas_fopen.h"#define ATL_MVIsMM(iflag_) ( ((iflag_) | 8) == (iflag_) )#define ATL_MVIsAxpy(iflag_) ( ((iflag_) | 16) == (iflag_) )#define ATL_MVIsDot(iflag_) ( !ATL_MVIsMM(iflag_) && !ATL_MVIsAxpy(iflag_) )#define ATL_MVNoBlock(iflag_) ( ((iflag_) | 32) == (iflag_) )double GetAvg(int n, double tolerance, double *mflop){   int i, j;   double t0, tavg;/* * Sort results, largest first */   for (i=0; i != n; i++)   {      for (j=i+1; j < n; j++)      {         if (mflop[i] < mflop[j])         {            t0 = mflop[i];            mflop[i] = mflop[j];            mflop[j] = t0;         }      }   }/* * Throw out result if it is outside tolerance; rerun if two mflop not within * tolerance;  this code assumes n == 3 */   if (tolerance*mflop[1] < mflop[0])  /* too big a range in results */   {      if (tolerance*mflop[2] < mflop[1]) return(-1.0);      tavg = (mflop[1] + mflop[2]) / 2.0;   }   else if (tolerance*mflop[2] < mflop[0]) tavg = (mflop[0] + mflop[1]) / 2.0;   else tavg = (mflop[0] + mflop[1] + mflop[2]) / 3.0;   return(tavg);}int GetL1CacheSize(){   FILE *L1f;   int L1Size;   if (!FileExists("res/L1CacheSize"))   {      assert(system("make res/L1CacheSize\n") == 0);   }   L1f = fopen("res/L1CacheSize", "r");   assert(L1f != NULL);   fscanf(L1f, "%d", &L1Size);   fclose(L1f);   fprintf(stderr, "\n      Read in L1 Cache size as = %dKB.\n",L1Size);   return(L1Size);}int mvtstcase(char pre, char TA, char *mvnam, char *cc, char *ccflags){   char ln[256];   if (cc[0] != '\0' && ccflags[0] != '\0')      sprintf(ln,              "make %cmvtstcase%c mvrout=%s %cMVCC=\"%s\" %cMVFLAGS=\"%s\"\n",              pre, TA, mvnam, pre, cc, pre, ccflags);   else if (cc[0] == '\0' && ccflags[0] != '\0')      sprintf(ln, "make %cmvtstcase%c mvrout=%s EXTFLAGS=\"%s\"\n",              pre, TA, mvnam, ccflags);   else sprintf(ln, "make %cmvtstcase%c mvrout=%s\n", pre, TA, mvnam);   return(system(ln));}double svcase   (char pre, int cas, int l1mul, int mu, int nu, int flag,    char *mvnamN, char *ccN, char *ccflagsN,    char *mvnamT, char *ccT, char *ccflagsT){   char ln[1024], fnam[64];   const int imul = l1mul;   int i, mb;   double mfs[3], mf;   FILE *fp;   sprintf(fnam, "res/%csymvS_%d_0", pre, cas);   if (!FileExists(fnam))   {      i = sprintf(ln, "make %csvcase mvrout=%s svrout=%s cas=%d xu=%d yu=%d l1mul=%d iflag=%d gmvout=\"-o %s\"",              pre, mvnamT, mvnamN, cas, nu, mu, imul, flag, fnam);      if (ccN[0] != '\0' && ccflagsN[0] != '\0')         i += sprintf(ln+i, " %cSVCC=\"%s\" %cSVFLAGS=\"%s\"",                      pre, ccN, pre, ccflagsN);      else if (ccN[0] == '\0' && ccflagsN[0] != '\0')      {         i += sprintf(ln+i,           " SEXTFLAGS=\"%s -DATL_NOL1PREFETCH -DATL_NOL2PREFETCH\"", ccflagsN);      }      else i += sprintf(ln+i,           " SEXTFLAGS=\"-DATL_NOL1PREFETCH -DATL_NOL2PREFETCH\"");      if (ccT[0] != '\0' && ccflagsT[0] != '\0')         i += sprintf(ln+i, " %cMVCC=\"%s\" %cMVFLAGS=\"%s\"",                      pre, ccT, pre, ccflagsT);      else if (ccT[0] == '\0' && ccflagsT[0] != '\0')         i += sprintf(ln+i, " SEXTFLAGS=\"%s\"", ccflagsT);      sprintf(ln+i, "\n");      fprintf(stderr, "%s", ln);      if (system(ln)) return(-1.0);  /* won't compile here */   }   fp = fopen(fnam, "r");   assert(fp);   assert(fscanf(fp, " %lf %lf %lf", mfs, mfs+1, mfs+2) == 3);   fclose(fp);   mf = GetAvg(3, 1.20, mfs);   if (mf == -1.0)   {      fprintf(stderr,"\n\n%s : VARIATION EXCEEDS TOLERENCE, RERUN WITH HIGHER REPS.\n\n", fnam);      sprintf(ln, "rm -f %s\n", fnam);      system(ln);      exit(-1);   }   return(mf);}double mvcase(int SY, char pre, char *mvnam, char TA, int flag, int mu, int nu,              int cas, char *cc, char *ccflags, int l1mul){   char nTA;   char ln[1024], fnam[64];   const int imul = l1mul;   int i, mb;   double mfs[3], mf;   FILE *fp;   if (TA == 'n' || TA == 'N') nTA = 'T';   else nTA = 'N';   if (mu == 0) mu = 1;   if (nu == 0) nu = 1;   if (SY) sprintf(fnam, "res/%csymv%c_%d_0", pre, TA, cas);   else   {      if (ATL_MVNoBlock(flag)) sprintf(fnam, "res/%cgemv%c_%d_0", pre, TA, cas);      else sprintf(fnam, "res/%cgemv%c_%d_%d", pre, TA, cas, imul);   }   if (!FileExists(fnam))   {      i = sprintf(ln, "make %cmvcase%c mvrout=%s cas=%d xu=%d yu=%d l1mul=%d iflag=%d gmvout=\"-o %s\"",              pre, TA, mvnam, cas, nu, mu, imul, flag, fnam);      if (cc[0] != '\0' && ccflags[0] != '\0')         i += sprintf(ln+i, " %cMVCC=\"%s\" %cMVFLAGS=\"%s\"",                      pre, cc, pre, ccflags);      else if (cc[0] == '\0' && ccflags[0] != '\0')      {         if (SY) i += sprintf(ln+i,            " EXTFLAGS=\"%s -DATL_NOL1PREFETCH -DATL_NOL2PREFETCH\"", ccflags);         else i += sprintf(ln+i, " EXTFLAGS=\"%s\"", ccflags);      }      if (SY)      {         if (pre == 'd' || pre == 'c') mb = 1024 / 8;         else if (pre == 's') mb = 1024 / 4;         else if (pre == 'z') mb = 1024 / 16;         mb = 0.01*l1mul*mb*GetL1CacheSize();         mb = (mb-2*nu)/(nu+2);         if (mb > mu) mb = (mb/mu)*mu;         if (!(cc[0] == '\0' && ccflags[0] != '\0'))            i += sprintf(ln+i,                         " EXTFLAGS=\"-DATL_NOL1PREFETCH -DATL_NOL2PREFETCH\"");         i += sprintf(ln+i, " opt=\"-2 1 -L %d\" M=%d N=%d", mb, mb, nu);      }      sprintf(ln+i, "\n");      fprintf(stderr, "%s", ln);      if (system(ln)) return(-1.0);  /* won't compile here */   }   fp = fopen(fnam, "r");   assert(fp);   assert(fscanf(fp, " %lf %lf %lf", mfs, mfs+1, mfs+2) == 3);   fclose(fp);   mf = GetAvg(3, 1.20, mfs);   if (mf == -1.0)   {      fprintf(stderr,"\n\n%s : VARIATION EXCEEDS TOLERENCE, RERUN WITH HIGHER REPS.\n\n", fnam);      sprintf(ln, "rm -f %s\n", fnam);      system(ln);      exit(-1);   }   return(mf);}int FindL1Mul(char pre, int cas, char *mvnam, char *cc, char *ccflags,              char TA, int flag, int mu, int nu){   double low = .5, high = 1.0;   double mflow, mfhigh;   int ilow, ihigh;   if (ATL_MVNoBlock(flag)) flag -= 32;  /* always actually block these times */   do   {      ilow = (low  * 100.0);      ihigh = (high * 100.0);      mflow  = mvcase(0, pre, mvnam, TA, flag, mu, nu, cas, cc, ccflags, ilow);      mfhigh = mvcase(0, pre, mvnam, TA, flag, mu, nu, cas, cc, ccflags, ihigh);      fprintf(stdout, "      %d%% %.2fMFLOP  ---  %d%% %.2fMFLOP\n",              ilow, mflow, ihigh, mfhigh);      if (mflow < 1.005*mfhigh) low += 0.5*(high-low);      else high -= 0.5 * (high-low);   }   while (ihigh-ilow);   fprintf(stdout, "\n\nBEST %% of L1 cache: %d\n", ilow);   return(ilow);}int ConfirmBlock(char pre, char *mvnam, char *cc, char *ccflags, char TA,                 int flag, int mu, int nu, int cas, int l1mul){   int bflag;   double mfblock, mfnoblock;   if ( ATL_MVNoBlock(flag) )   {      bflag = flag - 32;      mfblock   = mvcase(0, pre, mvnam, TA, bflag, mu, nu, cas, cc, ccflags,                         l1mul);      mfnoblock = mvcase(0, pre, mvnam, TA,  flag, mu, nu, cas, cc, ccflags,                         l1mul);      fprintf(stdout, "\nWith blocking=%lf, without=%lf\n\n",              mfblock, mfnoblock);      if (mfblock >= mfnoblock) return(bflag);   }   return(flag);}void NoEndLineWhiteSpace(char *ln){   int i;   for (i=0; ln[i]; i++);   if (i)      for (i--; isspace(ln[i]); i--) ln[i] = '\0';}int LineIsCont(char *ln){   int i, iret=0;   for(i=0; ln[i]; i++);   if (i)   {      for(i--; isspace(ln[i]); i--);      if (ln[i] == '\\') iret = 1;   }   return(iret);}int iGetIDCase(FILE *fp, int ID, char *fnam, char *auth, char *cc,               char *ccflags, int *flag, int *mu, int *nu){   char ln[256];   int i, n, id;   assert(fgets(ln, 128, fp));   assert(sscanf(ln, " %d", &n) == 1);   for (i=0; i < n; i++)   {      assert(fgets(ln, 256, fp));      assert(sscanf(ln, " %d %d %d %d %s \"%[^\"]",                    &id, flag, mu, nu, fnam, auth) == 6);      if (LineIsCont(ln))      {         assert(fgets(cc, 256, fp));         assert(fgets(ccflags, 256, fp));         NoEndLineWhiteSpace(cc);         NoEndLineWhiteSpace(ccflags);      }      else cc[0] = ccflags[0] = '\0';      if (id == ID) break;   }   return(id);}void GetIDCase(char pre, int ID, char *fnam, char *auth, char *cc,               char *ccflags, int *flag, int *mu, int *nu){   char ln[256];   int i, n, id;   FILE *fp;   sprintf(ln, "%ccases.dsc", pre);   fp = fopen(ln, "r");   assert(fp);   id = iGetIDCase(fp, ID, fnam, auth, cc, ccflags, flag, mu, nu);   if (id != ID) id = iGetIDCase(fp, ID, fnam, auth, cc, ccflags, flag, mu, nu);   fclose(fp);   assert(id == ID);}void GetCases(FILE *fp, int *N, int **IDs, char ***fnams, char ***auths,              char ***CC, char ***CCFLAGS, int **flags, int **mus, int **nus){   int i, j, n;   int *mu, *nu, *flag, *id;   char **fnam, **auth, **cc, **ccflags;   char ln[256];   assert(fgets(ln, 128, fp));   assert(sscanf(ln, " %d", &n) == 1);   assert(n < 100 && n > 0);   fnam    = malloc(n * sizeof(char*));   auth    = malloc(n * sizeof(char*));   cc      = malloc(n * sizeof(char*));   ccflags = malloc(n * sizeof(char*));   assert(fnam && auth && cc && ccflags);   for (i=0; i < n; i++)   {      assert(fnam[i] = malloc(64*sizeof(char)));      assert(auth[i] = malloc(64*sizeof(char)));   }   id = malloc(n * sizeof(int));   mu = malloc(n * sizeof(int));   nu = malloc(n * sizeof(int));   flag = malloc(n * sizeof(int));   assert(id && mu && nu && flag);   for (i=0; i < n; i++)   {      assert(fgets(ln, 256, fp));      assert(sscanf(ln, " %d %d %d %d %s \"%[^\"]",                    id+i, flag+i, mu+i, nu+i, fnam[i], auth[i]) == 6);      assert(mu[i] >= 0 && nu[i] >= 0 && fnam[i][0] != '\0' && id[i] > 0);      if (LineIsCont(ln))      {         fgets(ln, 256, fp);         NoEndLineWhiteSpace(ln);         if (ln[0] == '+' && (ln[1] == '\0' || ln[1] == ' ')) /* adding flags */            cc[i] = NULL;         else         {            j = strlen(ln);            cc[i] = malloc((j+1)*sizeof(char));            assert(cc[i]);            strcpy(cc[i], ln);         }         fgets(ln, 256, fp);         NoEndLineWhiteSpace(ln);         j = strlen(ln);         ccflags[i] = malloc((j+1)*sizeof(char));         assert(ccflags[i]);         strcpy(ccflags[i], ln);      }      else cc[i] = ccflags[i] = NULL;   }   for (i=0; i < n; i++) /* fix cc & ccflags */   {      if (cc[i] == NULL)      {         cc[i] = malloc(2*sizeof(char));         assert(cc[i]);         cc[i][0] = cc[i][1] = '\0';      }      if (ccflags[i] == NULL)      {         ccflags[i] = malloc(2*sizeof(char));         assert(ccflags[i]);         ccflags[i][0] = ccflags[i][1] = '\0';      }   }   *N = n;   *IDs = id;   *fnams = fnam;   *auths = auth;   *CC = cc;   *CCFLAGS = ccflags;   *flags = flag;   *mus = mu;   *nus = nu;}int RunTransCases(int SY, char pre, char TA, int ncases, int *ids, char **fnams,                  char **cc, char **ccflags, int *flags, int *mus, int *nus){   int i, imax=0;   double mf, mfmax=0.0;   for (i=0; i < ncases; i++)   {      mf = mvcase(SY, pre, fnams[i], TA, flags[i], mus[i], nus[i], ids[i],                  cc[i], ccflags[i], 75);      fprintf(stdout, "%s : %.2f\n", fnams[i], mf);      if (mf > mfmax)      {         if (mvtstcase(pre, TA, fnams[i], cc[i], ccflags[i]) == 0)         { /* ensure it passes test */            mfmax = mf;            imax = i+1;         }         else fprintf(stderr, "\n\nROUTINE %s FAILED TESTS!!!\n\n", fnams[i]);      }   }   assert(imax);   fprintf(stdout,           "\nbest %cgemv%c : ID=%d, mu=%d, nu=%d at %.2f MFLOPS\n\n",           pre, TA, ids[imax-1], mus[imax-1], nus[imax-1], mfmax);   return(imax-1);}void ReadSum(char pre, int *l1mul,

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?