mmsearch.c

来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 2,188 行 · 第 1/5 页

C
2,188
字号
/* *             Automatically Tuned Linear Algebra Software v3.8.0 *                    (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: *   1. Redistributions of source code must retain the above copyright *      notice, this list of conditions and the following disclaimer. *   2. Redistributions in binary form must reproduce the above copyright *      notice, this list of conditions, and the following disclaimer in the *      documentation and/or other materials provided with the distribution. *   3. The name of the ATLAS group or the names of its contributers may *      not be used to endorse or promote products derived from this *      software without specific written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */#include <stdio.h>#include <stdlib.h>#include <assert.h>#include <string.h>#include "atlas_misc.h"#include "atlas_fopen.h"#include "atlas_prefetch.h"#define Mmin(x, y) ( (x) > (y) ? (y) : (x) )#define TOLERANCE 1.2#define REPS 4096#define L1FNAME "L1CacheSize"#define NTIM 3#define MAXLAT 6/* * For 2-operand assemblers, no benefit from 2-D register blocking, so flag * them;  If unknown arch is also 2-op, no problem will just search longer */#if defined (ATL_GAS_x8632) || defined(ATL_GAS_x8664)   #define TWO_OP_ASM#endifchar LANG;void PrintUsage(char *xnam){   fprintf(stderr, "\n\nUsage: %s [-r #][-h][-f][-l #][-p s/d/c/z][-m #]\n",           xnam);   fprintf(stderr, "-h         : Print this help screen\n");   fprintf(stderr, "-f         : Force complete search over given parameters\n");   fprintf(stderr, "-p s/d/c/z : set the precision to search for\n");   fprintf(stderr, "-r #       : Set max number of registers to use to # (default 32)\n");   fprintf(stderr, "-m #       : Set max L1 cache size (kilobytes) to #\n");   fprintf(stderr, "-L <c/f>   : Select what language to use (C or Fortran77)\n");   fprintf(stderr, "-K #       : Set K-loop unrolling to # (-1 = K).\n");   fprintf(stderr, "-l #       : Use latency factor #.  If set to 0,\n");   fprintf(stderr,"             do not do latency checking.  By default, latency checking is\n");   fprintf(stderr,"             done only if initial timings show it is a win.\n");   exit(-1);}void GetSettings(int nargs, char *args[], char *pre, char *lang, int *ku,                 int *LAT, int *FRC, int *nreg, int *MaxL1Size, int *ROUT){   int i;   *FRC = 0;   *LAT = -1;   *nreg = -1;   *MaxL1Size = 128;   *pre = 'd';   *lang = 'C';   *ku = 0;   *ROUT = 0;   for (i=1; i < nargs; i++)   {      if (*args[i] != '-') PrintUsage(args[0]);      switch(args[i][1])      {      case 'K':         *ku = atoi(args[++i]);         break;      case 'L':         i++;         if ( (*args[i] == 'F') || (*args[i] == 'f') ) *lang = 'F';         break;      case 'm' :         *MaxL1Size = atoi(args[++i]);         break;      case 'r' :         *nreg = atoi(args[++i]);         break;      case 'f' :         *FRC = atoi(args[++i]);         break;      case 'l' :         *LAT = atoi(args[++i]);         break;      case 'p' :         *pre = *args[++i];         break;      default:      case 'R':         *ROUT = atoi(args[++i]);         break;      case 'h' :         PrintUsage(args[0]);      }   }}int L1Elts(char pre, int MaxL1Size){   FILE *L1f;   int L1Size, tsize;   char ln[128];   if (!FileExists("res/L1CacheSize"))   {      sprintf(ln, "make RunL1 MaxL1=%d\n",MaxL1Size);      if (system(ln) != 0)      {         remove("res/L1CacheSize");         fprintf(stderr, "Error in command: %s", ln);         exit(-1);      }   }   L1f = fopen("res/L1CacheSize", "r");   assert(L1f != NULL);   fscanf(L1f, "%d", &L1Size);   fclose(L1f);   switch (pre)   {      case 's':         tsize = sizeof(float);         break;      case 'd':         tsize = sizeof(double);         break;      case 'q':         tsize = sizeof(long double);         break;      case 'c':         tsize = sizeof(float);         break;      case 'z':         tsize = sizeof(double);         break;   }   return( (L1Size*1024) / tsize);}int GetCacheSize(int MaxL1Size)/* * Returns L1 size in kilobytes */{   FILE *L1f;   int L1Size;   char ln[32];   if (!FileExists("res/L1CacheSize"))   {      sprintf(ln, "make RunL1 MaxL1=%d\n",MaxL1Size);      if (system(ln) != 0)      {         remove("res/L1CacheSize");         fprintf(stderr, "Error in command: %s", ln);         exit(-1);      }   }   L1f = fopen("res/L1CacheSize", "r");   assert(L1f != NULL);   fscanf(L1f, "%d", &L1Size);   fclose(L1f);   fprintf(stderr, "\n      Read in L1 Cache size as = %dKB.\n",L1Size);   return(L1Size);}int GetTypeSize(char pre){   int tsize;   if (pre == 'c' || pre == 's') tsize = ATL_ssize;   else tsize = ATL_dsize;   return(tsize);}void findNBs(char prec, char *NBnam, int MaxL1Size){   FILE *NBf;   char ln[80];   int i, L1Size, tmp, tsize, tL1Size, CL, nNB;   int NB[100];   fprintf(stderr, "NB setting not supplied; calculating:\n");   L1Size = GetCacheSize(MaxL1Size);   tsize = GetTypeSize(prec);   tL1Size = L1Size * (1024 / tsize);   tmp = CL = ATL_Cachelen / tsize;   if (!tmp) tmp=1;   nNB = 0;   fprintf(stderr, "tmp=%d, tL1size=%d\n",tmp, tL1Size);   while (tmp*tmp <= tL1Size)   {      if (tmp >= 16)        /* no block sizes smaller than 16 */         NB[nNB++] = tmp;      if (tmp >= 80) break;  /* no block sizes bigger than 80 */      tmp += CL;   }   if (!nNB)  /* this should never happen */   {      nNB = 3;      NB[0] = 8;      NB[1] = 4;      NB[2] = 16;   }   else if (nNB > 2)  /* put second biggest blocking factor first in list */   {      tmp = NB[nNB-2];      NB[nNB-2] = NB[0];      NB[0] = tmp;   }   NBf = fopen(NBnam, "w");   fprintf(NBf, "%d\n", nNB);   for (i=0; i != nNB; i++) fprintf(NBf, "%d\n", NB[i]);   fclose(NBf);}int GetSafeNB(char pre, int MaxL1){   int i, L1, tsize, inc;   tsize = GetTypeSize(pre);   inc = ATL_MinMMAlign / tsize;   if (inc < 4) inc = 4;   L1 = (GetCacheSize(MaxL1) * 1024) / tsize;   for (i=inc; i*i < L1; i += inc);   if (i*i > L1) i -= inc;   if (pre == 'd' || pre == 's')   {      if (i*i == L1) i -= inc;   }   else   {      if (i*i == L1) i -= 2*inc;      else i -= inc;   }   if (i < 16) i = 16;   if (i > 80) i = 80;   return(i);}double GetAvg(int n, double tolerance, double *mflop){   int i, j;   double t0, tavg;/* * Sort results, largest first */   for (i=0; i != n; i++)   {      for (j=i+1; j < n; j++)      {         if (mflop[i] < mflop[j])         {            t0 = mflop[i];            mflop[i] = mflop[j];            mflop[j] = t0;         }      }   }/* * Not doing tolerance anymore, just take largest mflop rate if doing wall * times, or median value if doing CPU */#if 1   #ifdef WALL      tavg = mflop[0];   #else      tavg = mflop[n/2];   #endif#else/* * Throw out result if it is outside tolerance; rerun if two mflop not within * tolerance;  this code assumes n == 3 */   if (tolerance*mflop[1] < mflop[0])  /* too big a range in results */   {      if (tolerance*mflop[2] < mflop[1]) return(-1.0);      tavg = (mflop[1] + mflop[2]) / 2.0;   }   else if (tolerance*mflop[2] < mflop[0]) tavg = (mflop[0] + mflop[1]) / 2.0;   else tavg = (mflop[0] + mflop[1] + mflop[2]) / 3.0;#endif   return(tavg);}double mms_case(char pre, int MULADD, int NB, int mu, int nu, int ku,                int pfA, int lat){   char fnam[128], ln[256];   int i;   double mflop[NTIM], t0;   FILE *fp;   if (ku > NB) ku = NB;   else if (ku == -1) ku = NB;   sprintf(fnam,           "res/%c%smm%c%c%d_%dx%dx%d_%dx%dx%d_%dx%dx%d%s%s_%dx%d_%d_pf%d",           pre, "JIK", 'T', 'N', NB, NB, NB, NB, NB, NB, 0, mu, nu, ku,           "_a1", "_b1", MULADD, lat, 1, pfA);   if (!FileExists(fnam))   {      if (pre == 'c' || pre == 'z')         sprintf(ln," make mmcase pre=%c loopO=%s ta=%c tb=%c mb=%d nb=%d kb=%d lda=%d ldb=%d ldc=%d mu=%d nu=%d ku=%d alpha=%d beta=%d muladd=%d lat=%d csA=1 csB=1 csC=2 cleanup=%d pfA=%d\n",                   pre, "JIK", 'T', 'N', NB, NB, NB, NB, NB, 0, mu, nu, ku,                   1, 1, MULADD, lat, 1, pfA);      else sprintf(ln," make mmcase pre=%c loopO=%s ta=%c tb=%c mb=%d nb=%d kb=%d lda=%d ldb=%d ldc=%d mu=%d nu=%d ku=%d alpha=%d beta=%d muladd=%d lat=%d cleanup=%d pfA=%d\n",                   pre, "JIK", 'T', 'N', NB, NB, NB, NB, NB, 0, mu, nu, ku,                   1, 1, MULADD, lat, 1, pfA);      fprintf(stderr, "%s:\n",ln);      if (system(ln) != 0)      {         fprintf(stderr, "ERROR IN COMMAND: %s", ln);         fprintf(stderr, "   PROPOSED FILENAME: %s\n", fnam);         sprintf(ln, "rm -f %s\n", fnam);         system(ln);         exit(-1);      }   }   assert( (fp = fopen(fnam, "r")) != NULL );   for (i=0; i != NTIM; i++)   {      if( fscanf(fp, "%lf", &mflop[i]) != 1 )      {         fprintf(stderr, "\nCANNOT READ FILE '%s', DELETING & QUITTING!\n",                 fnam);         sprintf(ln, "rm -f %s\n", fnam);         system(ln);         exit(-1);      }   }   fclose(fp);   t0 = GetAvg(NTIM, TOLERANCE, mflop);   if (t0 == -1.0)   {      fprintf(stderr, "NB=%d, MU=%d, NU=%d, KU=%d: rerun with higher reps; variation exceeds tolerence\n", NB, mu, nu, ku);      sprintf(ln, "rm -f res/%s\n", fnam);      system(ln);      exit(-1);   }   fprintf(stdout,"\npre=%c, muladd=%d, lat=%d, pf=%d, nb=%d, mu=%d, nu=%d, ku=%d, mflop=%.2f\n",           pre, MULADD, lat, pfA, NB, mu, nu, ku, t0);   return(t0);}double mms_caseIC(char pre, int MULADD, int NB, int mu, int nu, int ku,                  int pfA, int lat)/* * Do simple mmcase, where all operands are kept cache-resident * (useful for FPU optimization phases) */{   char fnam[128], ln[512];   int i;   double mflop[NTIM], t0;   FILE *fp;   if (ku > NB) ku = NB;   else if (ku == -1) ku = NB;   sprintf(fnam,           "res/%c%smm%c%c%d_%dx%dx%d_%dx%dx%d_%dx%dx%d%s%s_%dx%d_%d_IC",           pre, "JIK", 'T', 'N', NB, NB, NB, NB, NB, NB, 0, mu, nu, ku,           "_a1", "_b1", MULADD, lat, 1);   if (!FileExists(fnam))   {      if (pre == 'c' || pre == 'z')         sprintf(ln," make mmcase pre=%c loopO=%s ta=%c tb=%c mb=%d nb=%d kb=%d lda=%d ldb=%d ldc=%d mu=%d nu=%d ku=%d alpha=%d beta=%d muladd=%d lat=%d csA=1 csB=1 csC=2 cleanup=%d casnam=%s moves=\"\"\n",                   pre, "JIK", 'T', 'N', NB, NB, NB, NB, NB, 0, mu, nu, ku,                   1, 1, MULADD, lat, 1, fnam);      else sprintf(ln," make mmcase pre=%c loopO=%s ta=%c tb=%c mb=%d nb=%d kb=%d lda=%d ldb=%d ldc=%d mu=%d nu=%d ku=%d alpha=%d beta=%d muladd=%d lat=%d cleanup=%d casnam=%s moves=\"\"\n",                   pre, "JIK", 'T', 'N', NB, NB, NB, NB, NB, 0, mu, nu, ku,                   1, 1, MULADD, lat, 1, fnam);      fprintf(stderr, "%s:\n",ln);      if (system(ln) != 0)      {         fprintf(stderr, "ERROR IN COMMAND: %s", ln);         fprintf(stderr, "   PROPOSED FILENAME: %s\n", fnam);         sprintf(ln, "rm -f %s\n", fnam);         system(ln);         exit(-1);      }   }   assert( (fp = fopen(fnam, "r")) != NULL );   for (i=0; i != NTIM; i++)   {      if( fscanf(fp, "%lf", &mflop[i]) != 1 )      {         fprintf(stderr, "\nCANNOT READ FILE '%s', DELETING & QUITTING!\n",                 fnam);         sprintf(ln, "rm -f %s\n", fnam);         system(ln);         exit(-1);      }   }   fclose(fp);   t0 = GetAvg(NTIM, TOLERANCE, mflop);   if (t0 == -1.0)   {      fprintf(stderr, "NB=%d, MU=%d, NU=%d, KU=%d: rerun with higher reps; variation exceeds tolerence\n", NB, mu, nu, ku);      sprintf(ln, "rm -f res/%s\n", fnam);      system(ln);      exit(-1);   }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?