⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 looptest.cpp

📁 A C++ class library for scientific computing
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include <blitz/timer.h>BZ_USING_NAMESPACE(blitz)void initialize(double& c, double& d, double* a, double* b, int& N);template<class T>void sink(T&){ }void benchmarkLoops(int, long);int main(){    cout << "This program measures the performance of DAXPY operations"          << endl << "using various C loop structures." << endl << endl;    cout << endl << "In-cache:" << endl;    benchmarkLoops(400,50000);    cout << endl << "Out of cache:" << endl;    benchmarkLoops(1000000,50);    return 0;}void benchmarkLoops(int N, long iterations){    double* _bz_restrict a = new double[N];    double* _bz_restrict b = new double[N];    double c, d;    double t1, t2;    initialize(c, d, a, b, N);    double mflops = iterations * 4.0 * N / (1024.0 * 1024.0);    Timer timer;    cout << "Mflops/s Description" << endl;    long iter;    int i;    /*********************************************************************/    timer.start();    for (iter=0; iter < iterations; ++iter)    {        for (i=0; i < N; ++i)            a[i] += c * b[i];        for (i=0; i < N; ++i)            a[i] += d * b[i];    }    timer.stop();    cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds())          << "   for, indirection, unit stride" << endl;    /*********************************************************************/    timer.start();    for (iter=0; iter < iterations; ++iter)    {        for (i=0; i < N; ++i)            a[i] = a[i] + c * b[i];        for (i=0; i < N; ++i)            a[i] = a[i] + d * b[i];    }    timer.stop();    cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds())         << "   for, indirection, unit stride, no +=" << endl;    /*********************************************************************/    timer.start();    for (iter=0; iter < iterations; ++iter)    {        for (i=N-1; i >= 0; --i)            a[i] += c * b[i];        for (i=N-1; i >= 0; --i)            a[i] += d * b[i];    }    timer.stop();    cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds())         << "   for, indirection, unit stride, backwards loops" << endl;    /*********************************************************************/    timer.start();    for (iter=0; iter < iterations; ++iter)    {        double c2 = c;        int n1 = N & 3;        for (i=0; i < n1; ++i)            a[i] += c2 * b[i];        for (; i < N; i += 4)        {            a[i] += c2 * b[i];            a[i+1] += c2 * b[i+1];            a[i+2] += c2 * b[i+2];            a[i+3] += c2 * b[i+3];        }        double d2 = d;        int n2 = N & 3;        for (i=0; i < n2; ++i)            a[i] += d2 * b[i];        for (; i < N; i += 4)        {            a[i] += d2 * b[i];            a[i+1] += d2 * b[i+1];            a[i+2] += d2 * b[i+2];            a[i+3] += d2 * b[i+3];        }     }    timer.stop();    cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds())         << "    for, unroll=4, unit stride, constants loaded into temps"         << endl;    /*********************************************************************/    timer.start();    for (iter=0; iter < iterations; ++iter)    {        double c2 = c;        int n1 = N & 3;        for (i=0; i < n1; ++i)            a[i] += c2 * b[i];        for (; i < N; i += 4)        {            double t1 = c2 * b[i];            double t2 = c2 * b[i+1];            double t3 = c2 * b[i+2];            double t4 = c2 * b[i+3];            a[i] += t1;            a[i+1] += t2;            a[i+2] += t3;            a[i+3] += t4;        }        double d2 = d;        int n2 = N & 3;        for (i=0; i < n2; ++i)            a[i] += d2 * b[i];        for (; i < N; i += 4)        {            double t1 = d2 * b[i];            double t2 = d2 * b[i+1];            double t3 = d2 * b[i+2];            double t4 = d2 * b[i+3];            a[i] += t1;            a[i+1] += t2;            a[i+2] += t3;            a[i+3] += t4;        }    }    timer.stop();    cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds())         << "    for, unroll=4, unit stride, constants loaded into temps,"         << endl << "\t\t4 read then 4 write"          << endl;    /*********************************************************************/    timer.start();    for (iter=0; iter < iterations; ++iter)    {        double c2 = c;        int n1 = N & 3;        for (i=0; i < n1; ++i)            a[i] += c2 * b[i];        for (; i < N; i += 4)        {            a[i] = a[i] + c2 * b[i];            a[i+1] = a[i+1] + c2 * b[i+1];            a[i+2] = a[i+2] + c2 * b[i+2];            a[i+3] = a[i+3] + c2 * b[i+3];        }        double d2 = d;        int n2 = N & 3;        for (i=0; i < n2; ++i)            a[i] += d2 * b[i];        for (; i < N; i += 4)        {            a[i] = a[i] + d2 * b[i];            a[i+1] = a[i+1] + d2 * b[i+1];            a[i+2] = a[i+2] + d2 * b[i+2];            a[i+3] = a[i+3] + d2 * b[i+3];        }    }    timer.stop();    cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds())         << "    for, unroll=4, unit stride, constants loaded into temps,"         << endl << "            no += "         << endl;    /*********************************************************************/    timer.start();    for (iter=0; iter < iterations; ++iter)    {        double c2 = c;        int n1 = N & 3;        for (i=0; i < n1; ++i)            a[i] += c2 * b[i];        for (; i < N; i += 4)        {            int i1 = i + 1;            a[i] += c2 * b[i];            int i2 = i + 2;            a[i1] += c2 * b[i1];            int i3 = i + 3;            a[i2] += c2 * b[i2];            a[i3] += c2 * b[i3];        }        double d2 = d;        int n2 = N & 3;        for (i=0; i < n2; ++i)            a[i] += d2 * b[i];        for (; i < N; i += 4)        {            int i1 = i + 1;            a[i] += d2 * b[i];            int i2 = i + 2;            a[i1] += d2 * b[i1];            int i3 = i + 3;            a[i2] += d2 * b[i2];            a[i3] += d2 * b[i3];        }    }    timer.stop();    cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds())         << "    for, unroll=4, unit stride, constants loaded into temps,"         << endl << "        CSE for index offsets"         << endl;    /*********************************************************************/    timer.start();    for (iter=0; iter < iterations; ++iter)    {        double c2 = c;        int n1 = N & 3;        for (i=0; i < n1; ++i)            a[i] += c2 * b[i];        double* pa = a+n1;        double* pb = b+n1;         int top = N - n1 - 4;        for (i=top; i >= 0; i -= 4)        {            pa[i] += c2 * pb[i];            pa[i+1] += c2 * pb[i+1];            pa[i+2] += c2 * pb[i+2];            pa[i+3] += c2 * pb[i+3];        }        double d2 = d;        int n2 = N & 3;        for (i=0; i < n2; ++i)            a[i] += d2 * b[i];        pa = a+n2;        pb = b+n2;        top = N - n2 - 4;        for (i=top; i >= 0; i -= 4)        {            pa[i] += d2 * pb[i];            pa[i+1] += d2 * pb[i+1];            pa[i+2] += d2 * pb[i+2];            pa[i+3] += d2 * pb[i+3];        }    }    timer.stop();    cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds())         << "    for, unroll=4, unit stride, constants loaded into temps,"         << "            backwards"         << endl;    /*********************************************************************/    timer.start();    for (iter=0; iter < iterations; ++iter)    {        double c2 = c;        int n1 = N & 7;        for (i=0; i < n1; ++i)            a[i] += c2 * b[i];        for (; i < N; i += 8)        {            a[i] += c2 * b[i];            a[i+1] += c2 * b[i+1];            a[i+2] += c2 * b[i+2];            a[i+3] += c2 * b[i+3];            a[i+4] += c2 * b[i+4];            a[i+5] += c2 * b[i+5];            a[i+6] += c2 * b[i+6];            a[i+7] += c2 * b[i+7];        }        double d2 = d;        int n2 = N & 7;        for (i=0; i < n2; ++i)            a[i] += d2 * b[i];        for (; i < N; i += 8)        {            a[i] += d2 * b[i];            a[i+1] += d2 * b[i+1];            a[i+2] += d2 * b[i+2];            a[i+3] += d2 * b[i+3];            a[i+4] += d2 * b[i+4];            a[i+5] += d2 * b[i+5];            a[i+6] += d2 * b[i+6];            a[i+7] += d2 * b[i+7];        }    }    timer.stop();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -