mmsearch.c
来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 2,188 行 · 第 1/5 页
C
2,188 行
/* * Automatically Tuned Linear Algebra Software v3.8.0 * (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the ATLAS group or the names of its contributers may * not be used to endorse or promote products derived from this * software without specific written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */#include <stdio.h>#include <stdlib.h>#include <assert.h>#include <string.h>#include "atlas_misc.h"#include "atlas_fopen.h"#include "atlas_prefetch.h"#define Mmin(x, y) ( (x) > (y) ? (y) : (x) )#define TOLERANCE 1.2#define REPS 4096#define L1FNAME "L1CacheSize"#define NTIM 3#define MAXLAT 6/* * For 2-operand assemblers, no benefit from 2-D register blocking, so flag * them; If unknown arch is also 2-op, no problem will just search longer */#if defined (ATL_GAS_x8632) || defined(ATL_GAS_x8664) #define TWO_OP_ASM#endifchar LANG;void PrintUsage(char *xnam){ fprintf(stderr, "\n\nUsage: %s [-r #][-h][-f][-l #][-p s/d/c/z][-m #]\n", xnam); fprintf(stderr, "-h : Print this help screen\n"); fprintf(stderr, "-f : Force complete search over given parameters\n"); fprintf(stderr, "-p s/d/c/z : set the precision to search for\n"); fprintf(stderr, "-r # : Set max number of registers to use to # (default 32)\n"); fprintf(stderr, "-m # : Set max L1 cache size (kilobytes) to #\n"); fprintf(stderr, "-L <c/f> : Select what language to use (C or Fortran77)\n"); fprintf(stderr, "-K # : Set K-loop unrolling to # (-1 = K).\n"); fprintf(stderr, "-l # : Use latency factor #. If set to 0,\n"); fprintf(stderr," do not do latency checking. By default, latency checking is\n"); fprintf(stderr," done only if initial timings show it is a win.\n"); exit(-1);}void GetSettings(int nargs, char *args[], char *pre, char *lang, int *ku, int *LAT, int *FRC, int *nreg, int *MaxL1Size, int *ROUT){ int i; *FRC = 0; *LAT = -1; *nreg = -1; *MaxL1Size = 128; *pre = 'd'; *lang = 'C'; *ku = 0; *ROUT = 0; for (i=1; i < nargs; i++) { if (*args[i] != '-') PrintUsage(args[0]); switch(args[i][1]) { case 'K': *ku = atoi(args[++i]); break; case 'L': i++; if ( (*args[i] == 'F') || (*args[i] == 'f') ) *lang = 'F'; break; case 'm' : *MaxL1Size = atoi(args[++i]); break; case 'r' : *nreg = atoi(args[++i]); break; case 'f' : *FRC = atoi(args[++i]); break; case 'l' : *LAT = atoi(args[++i]); break; case 'p' : *pre = *args[++i]; break; default: case 'R': *ROUT = atoi(args[++i]); break; case 'h' : PrintUsage(args[0]); } }}int L1Elts(char pre, int MaxL1Size){ FILE *L1f; int L1Size, tsize; char ln[128]; if (!FileExists("res/L1CacheSize")) { sprintf(ln, "make RunL1 MaxL1=%d\n",MaxL1Size); if (system(ln) != 0) { remove("res/L1CacheSize"); fprintf(stderr, "Error in command: %s", ln); exit(-1); } } L1f = fopen("res/L1CacheSize", "r"); assert(L1f != NULL); fscanf(L1f, "%d", &L1Size); fclose(L1f); switch (pre) { case 's': tsize = sizeof(float); break; case 'd': tsize = sizeof(double); break; case 'q': tsize = sizeof(long double); break; case 'c': tsize = sizeof(float); break; case 'z': tsize = sizeof(double); break; } return( (L1Size*1024) / tsize);}int GetCacheSize(int MaxL1Size)/* * Returns L1 size in kilobytes */{ FILE *L1f; int L1Size; char ln[32]; if (!FileExists("res/L1CacheSize")) { sprintf(ln, "make RunL1 MaxL1=%d\n",MaxL1Size); if (system(ln) != 0) { remove("res/L1CacheSize"); fprintf(stderr, "Error in command: %s", ln); exit(-1); } } L1f = fopen("res/L1CacheSize", "r"); assert(L1f != NULL); fscanf(L1f, "%d", &L1Size); fclose(L1f); fprintf(stderr, "\n Read in L1 Cache size as = %dKB.\n",L1Size); return(L1Size);}int GetTypeSize(char pre){ int tsize; if (pre == 'c' || pre == 's') tsize = ATL_ssize; else tsize = ATL_dsize; return(tsize);}void findNBs(char prec, char *NBnam, int MaxL1Size){ FILE *NBf; char ln[80]; int i, L1Size, tmp, tsize, tL1Size, CL, nNB; int NB[100]; fprintf(stderr, "NB setting not supplied; calculating:\n"); L1Size = GetCacheSize(MaxL1Size); tsize = GetTypeSize(prec); tL1Size = L1Size * (1024 / tsize); tmp = CL = ATL_Cachelen / tsize; if (!tmp) tmp=1; nNB = 0; fprintf(stderr, "tmp=%d, tL1size=%d\n",tmp, tL1Size); while (tmp*tmp <= tL1Size) { if (tmp >= 16) /* no block sizes smaller than 16 */ NB[nNB++] = tmp; if (tmp >= 80) break; /* no block sizes bigger than 80 */ tmp += CL; } if (!nNB) /* this should never happen */ { nNB = 3; NB[0] = 8; NB[1] = 4; NB[2] = 16; } else if (nNB > 2) /* put second biggest blocking factor first in list */ { tmp = NB[nNB-2]; NB[nNB-2] = NB[0]; NB[0] = tmp; } NBf = fopen(NBnam, "w"); fprintf(NBf, "%d\n", nNB); for (i=0; i != nNB; i++) fprintf(NBf, "%d\n", NB[i]); fclose(NBf);}int GetSafeNB(char pre, int MaxL1){ int i, L1, tsize, inc; tsize = GetTypeSize(pre); inc = ATL_MinMMAlign / tsize; if (inc < 4) inc = 4; L1 = (GetCacheSize(MaxL1) * 1024) / tsize; for (i=inc; i*i < L1; i += inc); if (i*i > L1) i -= inc; if (pre == 'd' || pre == 's') { if (i*i == L1) i -= inc; } else { if (i*i == L1) i -= 2*inc; else i -= inc; } if (i < 16) i = 16; if (i > 80) i = 80; return(i);}double GetAvg(int n, double tolerance, double *mflop){ int i, j; double t0, tavg;/* * Sort results, largest first */ for (i=0; i != n; i++) { for (j=i+1; j < n; j++) { if (mflop[i] < mflop[j]) { t0 = mflop[i]; mflop[i] = mflop[j]; mflop[j] = t0; } } }/* * Not doing tolerance anymore, just take largest mflop rate if doing wall * times, or median value if doing CPU */#if 1 #ifdef WALL tavg = mflop[0]; #else tavg = mflop[n/2]; #endif#else/* * Throw out result if it is outside tolerance; rerun if two mflop not within * tolerance; this code assumes n == 3 */ if (tolerance*mflop[1] < mflop[0]) /* too big a range in results */ { if (tolerance*mflop[2] < mflop[1]) return(-1.0); tavg = (mflop[1] + mflop[2]) / 2.0; } else if (tolerance*mflop[2] < mflop[0]) tavg = (mflop[0] + mflop[1]) / 2.0; else tavg = (mflop[0] + mflop[1] + mflop[2]) / 3.0;#endif return(tavg);}double mms_case(char pre, int MULADD, int NB, int mu, int nu, int ku, int pfA, int lat){ char fnam[128], ln[256]; int i; double mflop[NTIM], t0; FILE *fp; if (ku > NB) ku = NB; else if (ku == -1) ku = NB; sprintf(fnam, "res/%c%smm%c%c%d_%dx%dx%d_%dx%dx%d_%dx%dx%d%s%s_%dx%d_%d_pf%d", pre, "JIK", 'T', 'N', NB, NB, NB, NB, NB, NB, 0, mu, nu, ku, "_a1", "_b1", MULADD, lat, 1, pfA); if (!FileExists(fnam)) { if (pre == 'c' || pre == 'z') sprintf(ln," make mmcase pre=%c loopO=%s ta=%c tb=%c mb=%d nb=%d kb=%d lda=%d ldb=%d ldc=%d mu=%d nu=%d ku=%d alpha=%d beta=%d muladd=%d lat=%d csA=1 csB=1 csC=2 cleanup=%d pfA=%d\n", pre, "JIK", 'T', 'N', NB, NB, NB, NB, NB, 0, mu, nu, ku, 1, 1, MULADD, lat, 1, pfA); else sprintf(ln," make mmcase pre=%c loopO=%s ta=%c tb=%c mb=%d nb=%d kb=%d lda=%d ldb=%d ldc=%d mu=%d nu=%d ku=%d alpha=%d beta=%d muladd=%d lat=%d cleanup=%d pfA=%d\n", pre, "JIK", 'T', 'N', NB, NB, NB, NB, NB, 0, mu, nu, ku, 1, 1, MULADD, lat, 1, pfA); fprintf(stderr, "%s:\n",ln); if (system(ln) != 0) { fprintf(stderr, "ERROR IN COMMAND: %s", ln); fprintf(stderr, " PROPOSED FILENAME: %s\n", fnam); sprintf(ln, "rm -f %s\n", fnam); system(ln); exit(-1); } } assert( (fp = fopen(fnam, "r")) != NULL ); for (i=0; i != NTIM; i++) { if( fscanf(fp, "%lf", &mflop[i]) != 1 ) { fprintf(stderr, "\nCANNOT READ FILE '%s', DELETING & QUITTING!\n", fnam); sprintf(ln, "rm -f %s\n", fnam); system(ln); exit(-1); } } fclose(fp); t0 = GetAvg(NTIM, TOLERANCE, mflop); if (t0 == -1.0) { fprintf(stderr, "NB=%d, MU=%d, NU=%d, KU=%d: rerun with higher reps; variation exceeds tolerence\n", NB, mu, nu, ku); sprintf(ln, "rm -f res/%s\n", fnam); system(ln); exit(-1); } fprintf(stdout,"\npre=%c, muladd=%d, lat=%d, pf=%d, nb=%d, mu=%d, nu=%d, ku=%d, mflop=%.2f\n", pre, MULADD, lat, pfA, NB, mu, nu, ku, t0); return(t0);}double mms_caseIC(char pre, int MULADD, int NB, int mu, int nu, int ku, int pfA, int lat)/* * Do simple mmcase, where all operands are kept cache-resident * (useful for FPU optimization phases) */{ char fnam[128], ln[512]; int i; double mflop[NTIM], t0; FILE *fp; if (ku > NB) ku = NB; else if (ku == -1) ku = NB; sprintf(fnam, "res/%c%smm%c%c%d_%dx%dx%d_%dx%dx%d_%dx%dx%d%s%s_%dx%d_%d_IC", pre, "JIK", 'T', 'N', NB, NB, NB, NB, NB, NB, 0, mu, nu, ku, "_a1", "_b1", MULADD, lat, 1); if (!FileExists(fnam)) { if (pre == 'c' || pre == 'z') sprintf(ln," make mmcase pre=%c loopO=%s ta=%c tb=%c mb=%d nb=%d kb=%d lda=%d ldb=%d ldc=%d mu=%d nu=%d ku=%d alpha=%d beta=%d muladd=%d lat=%d csA=1 csB=1 csC=2 cleanup=%d casnam=%s moves=\"\"\n", pre, "JIK", 'T', 'N', NB, NB, NB, NB, NB, 0, mu, nu, ku, 1, 1, MULADD, lat, 1, fnam); else sprintf(ln," make mmcase pre=%c loopO=%s ta=%c tb=%c mb=%d nb=%d kb=%d lda=%d ldb=%d ldc=%d mu=%d nu=%d ku=%d alpha=%d beta=%d muladd=%d lat=%d cleanup=%d casnam=%s moves=\"\"\n", pre, "JIK", 'T', 'N', NB, NB, NB, NB, NB, 0, mu, nu, ku, 1, 1, MULADD, lat, 1, fnam); fprintf(stderr, "%s:\n",ln); if (system(ln) != 0) { fprintf(stderr, "ERROR IN COMMAND: %s", ln); fprintf(stderr, " PROPOSED FILENAME: %s\n", fnam); sprintf(ln, "rm -f %s\n", fnam); system(ln); exit(-1); } } assert( (fp = fopen(fnam, "r")) != NULL ); for (i=0; i != NTIM; i++) { if( fscanf(fp, "%lf", &mflop[i]) != 1 ) { fprintf(stderr, "\nCANNOT READ FILE '%s', DELETING & QUITTING!\n", fnam); sprintf(ln, "rm -f %s\n", fnam); system(ln); exit(-1); } } fclose(fp); t0 = GetAvg(NTIM, TOLERANCE, mflop); if (t0 == -1.0) { fprintf(stderr, "NB=%d, MU=%d, NU=%d, KU=%d: rerun with higher reps; variation exceeds tolerence\n", NB, mu, nu, ku); sprintf(ln, "rm -f res/%s\n", fnam); system(ln); exit(-1); }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?