fc.c
来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 635 行 · 第 1/2 页
C
635 行
#define NBmm(m_, n_, k_, alp_, a_, lda_, b_, ldb_, bet_, c_, ldc_) \ { \ zgemm_("T", "N", &(m_), &(n_), &(k_), alp_, a_, &(lda_), b_, \ &(ldb_), bet_, c_, &(ldc_)); \ }#endif#endifvoid SortDoubles(int N, double *X)/* * Simple selection sort on X */{ double small; int ismall, i, j; for (i=0; i < N; i++) { ismall = i; small = X[i]; for (j=i+1; j < N; j++) { if (X[j] < small) { ismall = j; small = X[j]; } } if (ismall != i) { X[ismall] = X[i]; X[i] = small; } }}#ifndef NSAMPLE #define NSAMPLE 3#endifvoid time_mm(char *fnam0){ char fnam[80];#if defined(LDA) && LDA != 0 const int lda=LDA;#else const int lda=LDA2;#endif#if defined(LDB) && LDB != 0 const int ldb=LDB;#else const int ldb=LDB2;#endif#if defined(LDC) && LDC != 0 const int ldc=LDC;#else const int ldc=LDC2;#endif int nA, nB; int restarts=0; int i, j, k, reps, len; int incA, incB, incC, nmov; double t0, t1, mflop; double times[NSAMPLE]; void *va=NULL, *vb=NULL, *vc=NULL, *vp=NULL; TYPE *A, *B, *C, *a, *b, *c, *stA, *stB, *stC; #ifdef TCPLX int inca, incb, incc; const TYPE one=1.0, none=(-1.0); #if (ALPHA == 1) TYPE alpha[2] = {1.0, 0.0}; #elif (ALPHA == -1) TYPE alpha[2] = {-1.0, 0.0}; #else TYPE alpha[2] = {2.3, 0.0}; #endif #if (BETA == 1) TYPE beta[2] = {1.0, 0.0}; #elif (BETA == -1) TYPE beta[2] = {-1.0, 0.0}; #elif (BETA == 0) TYPE beta[2] = {0.0, 0.0}; #else TYPE beta[2] = {1.3, 0.0}; #endif #else #ifdef ALPHA TYPE alpha=ALPHA; #else TYPE alpha=1.0; #endif #ifdef BETA TYPE beta=BETA; #else TYPE beta=1.0; #endif #endif const TYPE rone=1.0, rnone=(-1.0); FILE *fpout; double time00(); #ifdef NoTransA nA = KB; #else nA = MB; #endif #ifdef NoTransB nB = NB; #else nB = KB; #endif #ifdef ATL_nkflop #ifdef TCPLX t0 = 8.0*MB; #else t0 = 2.0*MB; #endif t0 *= NB; t0 *= KB; t1 = ATL_nkflop * 1000.0; reps = t1 / t0; #else reps = REPS * ( (40.0/MB) * (40.0/NB) * (40.0/KB) ); #ifdef TCPLX reps /= 4; #endif #endif if (reps < 1) reps = 1; incA = incB = incC = nmov = 0; #ifdef MoveA incA = lda*nA; nmov++; #else va = malloc(128+ATL_sizeof*lda*nA); assert(va); a = A = (void*) ( 128 + ((((size_t)va)>>7)<<7) ); stA = A + (lda*nA SHIFT); #endif #ifdef MoveB incB = ldb*nB; nmov++; #else vb = malloc(128+ATL_sizeof*ldb*nB); assert(vb); b = B = (void*) ( 128 + ((((size_t)vb)>>7)<<7) ); stB = B + (ldb*nB SHIFT); #endif #ifdef MoveC assert(ldc == MB); incC = ldc*NB; nmov++; #else vc = malloc(128+ATL_sizeof*ldc*NB); assert(vc); c = C = (void*) ( 128 + ((((size_t)vc)>>7)<<7) ); stC = C + (ldc * NB SHIFT); #endif/* sprintf(fnam, "res/%s", Mstr(NBmm)); */ if (fnam0) strcpy(fnam, fnam0); else sprintf(fnam, "res/%c%smm%s%s%d_%dx%dx%d_%dx%dx%d_%dx%dx%d%s%s_%dx%d_%d_pf%d", PRE, Mstr(LOOPO), Mstr(TA), Mstr(TB), NB, MB0, NB0, KB0, LDA, LDB, LDC, MU, NU, KU, Mstr(ALPHAnam), Mstr(BETAnam), MULADD, LAT, CLEANUP, PREFA); fpout = fopen(fnam, "w"); if (nmov != 0) /* need to allocate space */ { len = (1.2*L2SIZE) / ATL_sizeof; /* total moving length */ i = Mmin(incA, MB*KB) + Mmin(incB, NB*KB) + Mmin(incC, MB*NB); j = (len+i-1) / i; /* number of reps to cause flush */ len = (incA+incB+incC) * j; vp = malloc(128+len*ATL_sizeof); assert(vp); #ifdef TCPLX inca = incA; incb = incB; incc = incC; incA *= 2; incB *= 2; incC *= 2; #endif a = (void*) ( 128 + ((((size_t)vp)>>7)<<7) ); if (incA) { A = a; stA = a + incA * j; } if (incB) { b = B = a + j * incA; stB = b + j * incB; if (incC) { c = C = stB; stC = C + j * incC; } } else if (incC) { c = C = a + j * incA; stC = C + j * incC; } a = A; } #ifdef TCPLX else { inca = incA; incb = incB; incc = incC; incA *= 2; incB *= 2; incC *= 2; } #endif while (a != stA) *a++ = dumb_rand(); a = A; while (b != stB) *b++ = dumb_rand(); b = B; while (c != stC) *c++ = 0.0; c = C; for (i=0; i != NSAMPLE; i++) { t0 = time00(); for (k=reps; k; k--) { NBmm(MB, NB, KB, alpha, a, lda, b, ldb, beta, c, ldc); #ifdef MoveA a += incA; if (a == stA) a = A; #endif #ifdef MoveB b += incB; if (b == stB) b = B; #endif #ifdef MoveC c += incC; if (c == stC) { c = C; #ifdef TREAL if (beta != 0.0) beta = 1.0 / beta; if (alpha != 0.0) alpha = -alpha; #else if (*beta != 0.0) *beta = 1.0 / (*beta); if (*alpha != 0.0) *alpha = -(*alpha); #endif } #else #ifdef TREAL if (beta != 0.0) beta = 1.0 / beta; if (alpha != 0.0) alpha = -alpha; #else if (*beta != 0.0) *beta = 1.0 / (*beta); if (*alpha != 0.0) *alpha = -(*alpha); #endif #endif } t1 = time00() - t0;/* * Workaround for mystery prob Windows/icc, where first return val is * always 0 */ if (t1 >= 0.005) { mflop = ( (((2.0*MB)*NB)*KB)*reps ) / (t1 * 1000000.0); #ifdef TCPLX mflop *= 4.0; #endif fprintf(stderr, "%cNB=%d, ld=%d,%d,%d, mu=%d, nu=%d, ku=%d, lat=%d, pf=%d: time=%.3f, mflop=%.2f\n", PRE, NB, lda, ldb, ldc, MU, NU, KU, LAT, PREFA, t1, mflop); if (fpout) fprintf(fpout, "%lf\n",mflop); times[i] = t1; } else { if (++restarts > 5) { fclose(fpout); remove(fnam); fprintf(stderr, "Too many zero-time values, dying\n"); exit(-1); } fprintf(stderr, "Near-zero time %e rejected\n", t0); i--; } } SortDoubles(NSAMPLE, times); #ifdef WALL t1 = times[0]; #else t1 = times[NSAMPLE/2]; #endif mflop = ( (((2.0*MB)*NB)*KB)*reps ) / (t1 * 1000000.0); #ifdef TCPLX mflop *= 4.0; #endif fprintf(stdout, "%cNB=%d, time=%.3f, mflop=%.2f\n", PRE, NB, t1, mflop); if (fpout) fclose(fpout); if (vp) free(vp); if (va) free(va); if (vb) free(vb); if (vc) free(vc);}main(int nargs, char **args){ char *fnam; if (nargs > 1) fnam = args[1]; else fnam = NULL; time_mm(fnam); exit(0);}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?