fc.c

来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 635 行 · 第 1/2 页

C
635
字号
   #define NBmm(m_, n_, k_, alp_, a_, lda_, b_, ldb_, bet_, c_, ldc_) \   { \      zgemm_("T", "N", &(m_), &(n_), &(k_), alp_, a_, &(lda_), b_, \             &(ldb_), bet_, c_, &(ldc_)); \   }#endif#endifvoid SortDoubles(int N, double *X)/* * Simple selection sort on X */{   double small;   int ismall, i, j;   for (i=0; i < N; i++)   {      ismall = i;      small = X[i];      for (j=i+1; j < N; j++)      {         if (X[j] < small)         {            ismall = j;            small = X[j];         }      }     if (ismall != i)      {         X[ismall] = X[i];         X[i] = small;      }   }}#ifndef NSAMPLE   #define NSAMPLE 3#endifvoid time_mm(char *fnam0){   char fnam[80];#if defined(LDA) && LDA != 0      const int lda=LDA;#else      const int lda=LDA2;#endif#if defined(LDB) && LDB != 0   const int ldb=LDB;#else   const int ldb=LDB2;#endif#if defined(LDC) && LDC != 0   const int ldc=LDC;#else   const int ldc=LDC2;#endif   int nA, nB;   int restarts=0;   int i, j, k, reps, len;   int incA, incB, incC, nmov;   double t0, t1, mflop;   double times[NSAMPLE];   void *va=NULL, *vb=NULL, *vc=NULL, *vp=NULL;   TYPE *A, *B, *C, *a, *b, *c, *stA, *stB, *stC;   #ifdef TCPLX      int inca, incb, incc;      const TYPE one=1.0, none=(-1.0);      #if (ALPHA == 1)         TYPE alpha[2] = {1.0, 0.0};      #elif (ALPHA == -1)         TYPE alpha[2] = {-1.0, 0.0};      #else         TYPE alpha[2] = {2.3, 0.0};      #endif      #if (BETA == 1)         TYPE beta[2] = {1.0, 0.0};      #elif (BETA == -1)         TYPE beta[2] = {-1.0, 0.0};      #elif (BETA == 0)         TYPE beta[2] = {0.0, 0.0};      #else         TYPE beta[2] = {1.3, 0.0};      #endif   #else      #ifdef ALPHA         TYPE alpha=ALPHA;      #else         TYPE alpha=1.0;      #endif      #ifdef BETA         TYPE beta=BETA;      #else         TYPE beta=1.0;      #endif   #endif   const TYPE rone=1.0, rnone=(-1.0);   FILE *fpout;   double time00();   #ifdef NoTransA      nA = KB;   #else      nA = MB;   #endif   #ifdef NoTransB      nB = NB;   #else      nB = KB;   #endif   #ifdef ATL_nkflop      #ifdef TCPLX         t0 = 8.0*MB;      #else         t0 = 2.0*MB;      #endif      t0 *= NB;      t0 *= KB;      t1 = ATL_nkflop * 1000.0;      reps = t1 / t0;   #else      reps = REPS * ( (40.0/MB) * (40.0/NB) * (40.0/KB) );      #ifdef TCPLX         reps /= 4;      #endif   #endif   if (reps < 1) reps = 1;   incA = incB = incC = nmov = 0;   #ifdef MoveA      incA = lda*nA;      nmov++;   #else      va = malloc(128+ATL_sizeof*lda*nA);      assert(va);      a = A = (void*) ( 128 + ((((size_t)va)>>7)<<7) );      stA = A + (lda*nA SHIFT);   #endif   #ifdef MoveB      incB = ldb*nB;      nmov++;   #else      vb = malloc(128+ATL_sizeof*ldb*nB);      assert(vb);      b = B = (void*) ( 128 + ((((size_t)vb)>>7)<<7) );      stB = B + (ldb*nB SHIFT);   #endif   #ifdef MoveC      assert(ldc == MB);      incC = ldc*NB;      nmov++;   #else      vc = malloc(128+ATL_sizeof*ldc*NB);      assert(vc);      c = C = (void*) ( 128 + ((((size_t)vc)>>7)<<7) );      stC = C + (ldc * NB SHIFT);   #endif/*   sprintf(fnam, "res/%s", Mstr(NBmm)); */   if (fnam0) strcpy(fnam, fnam0);   else      sprintf(fnam,              "res/%c%smm%s%s%d_%dx%dx%d_%dx%dx%d_%dx%dx%d%s%s_%dx%d_%d_pf%d",              PRE, Mstr(LOOPO), Mstr(TA), Mstr(TB), NB, MB0, NB0, KB0,              LDA, LDB, LDC, MU, NU, KU, Mstr(ALPHAnam), Mstr(BETAnam),              MULADD, LAT, CLEANUP, PREFA);   fpout = fopen(fnam, "w");   if (nmov != 0)  /* need to allocate space */   {      len = (1.2*L2SIZE) / ATL_sizeof;      /* total moving length */      i = Mmin(incA, MB*KB) + Mmin(incB, NB*KB) + Mmin(incC, MB*NB);      j = (len+i-1) / i;       /* number of reps to cause flush */      len = (incA+incB+incC) * j;      vp = malloc(128+len*ATL_sizeof);      assert(vp);      #ifdef TCPLX         inca = incA;         incb = incB;         incc = incC;         incA *= 2;         incB *= 2;         incC *= 2;      #endif      a = (void*) ( 128 + ((((size_t)vp)>>7)<<7) );      if (incA)      {         A = a;         stA = a + incA * j;      }      if (incB)      {         b = B = a + j * incA;         stB = b + j * incB;         if (incC)         {            c = C = stB;            stC = C + j * incC;         }      }      else if (incC)      {         c = C = a + j * incA;         stC = C + j * incC;      }      a = A;   }   #ifdef TCPLX   else   {         inca = incA;         incb = incB;         incc = incC;         incA *= 2;         incB *= 2;         incC *= 2;   }   #endif   while (a != stA) *a++ = dumb_rand();  a = A;   while (b != stB) *b++ = dumb_rand();  b = B;   while (c != stC) *c++ = 0.0;          c = C;   for (i=0; i != NSAMPLE; i++)   {      t0 = time00();      for (k=reps; k; k--)      {         NBmm(MB, NB, KB, alpha, a, lda, b, ldb, beta, c, ldc);         #ifdef MoveA            a += incA;            if (a == stA) a = A;         #endif         #ifdef MoveB            b += incB;            if (b == stB) b = B;         #endif         #ifdef MoveC            c += incC;            if (c == stC)            {               c = C;            #ifdef TREAL               if (beta != 0.0) beta = 1.0 / beta;               if (alpha != 0.0) alpha = -alpha;            #else               if (*beta != 0.0) *beta = 1.0 / (*beta);               if (*alpha != 0.0) *alpha = -(*alpha);            #endif            }         #else            #ifdef TREAL               if (beta != 0.0) beta = 1.0 / beta;               if (alpha != 0.0) alpha = -alpha;            #else               if (*beta != 0.0) *beta = 1.0 / (*beta);               if (*alpha != 0.0) *alpha = -(*alpha);            #endif         #endif      }      t1 = time00() - t0;/* *    Workaround for mystery prob Windows/icc, where first return val is *    always 0 */      if (t1 >= 0.005)      {         mflop = ( (((2.0*MB)*NB)*KB)*reps ) / (t1 * 1000000.0);         #ifdef TCPLX            mflop *= 4.0;         #endif         fprintf(stderr,   "%cNB=%d, ld=%d,%d,%d, mu=%d, nu=%d, ku=%d, lat=%d, pf=%d: time=%.3f, mflop=%.2f\n",                 PRE, NB, lda, ldb, ldc, MU, NU, KU, LAT, PREFA, t1, mflop);         if (fpout) fprintf(fpout, "%lf\n",mflop);         times[i] = t1;      }      else      {         if (++restarts > 5)         {            fclose(fpout);            remove(fnam);            fprintf(stderr, "Too many zero-time values, dying\n");            exit(-1);         }         fprintf(stderr, "Near-zero time %e rejected\n", t0);         i--;      }   }   SortDoubles(NSAMPLE, times);   #ifdef WALL      t1 = times[0];   #else      t1 = times[NSAMPLE/2];   #endif   mflop = ( (((2.0*MB)*NB)*KB)*reps ) / (t1 * 1000000.0);   #ifdef TCPLX      mflop *= 4.0;   #endif   fprintf(stdout, "%cNB=%d, time=%.3f, mflop=%.2f\n", PRE, NB, t1, mflop);   if (fpout) fclose(fpout);   if (vp) free(vp);   if (va) free(va);   if (vb) free(vb);   if (vc) free(vc);}main(int nargs, char **args){   char *fnam;   if (nargs > 1) fnam = args[1];   else fnam = NULL;   time_mm(fnam);   exit(0);}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?