⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 looptest.cpp

📁 数值计算工具库,C语言编写的,可以直接调用.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include <blitz/timer.h>

BZ_USING_NAMESPACE(blitz)

void initialize(double& c, double& d, double* a, double* b, int& N);

template<class T>
void sink(T&)
{ }

void benchmarkLoops(int, long);

int main()
{
    cout << "This program measures the performance of DAXPY operations" 
         << endl << "using various C loop structures." << endl << endl;

    cout << endl << "In-cache:" << endl;

    benchmarkLoops(400,50000);

    cout << endl << "Out of cache:" << endl;

    benchmarkLoops(1000000,50);

    return 0;
}

void benchmarkLoops(int N, long iterations)
{
    double* _bz_restrict a = new double[N];
    double* _bz_restrict b = new double[N];
    double c, d;
    double t1, t2;

    initialize(c, d, a, b, N);

    double mflops = iterations * 4.0 * N / (1024.0 * 1024.0);

    Timer timer;

    cout << "Mflops/s Description" << endl;

    long iter;
    int i;

    /*********************************************************************/

    timer.start();
    for (iter=0; iter < iterations; ++iter)
    {
        for (i=0; i < N; ++i)
            a[i] += c * b[i];

        for (i=0; i < N; ++i)
            a[i] += d * b[i];
    }
    timer.stop();

    cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) 
         << "   for, indirection, unit stride" << endl;

    /*********************************************************************/

    timer.start();
    for (iter=0; iter < iterations; ++iter)
    {
        for (i=0; i < N; ++i)
            a[i] = a[i] + c * b[i];

        for (i=0; i < N; ++i)
            a[i] = a[i] + d * b[i];
    }
    timer.stop();

    cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds())
         << "   for, indirection, unit stride, no +=" << endl;

    /*********************************************************************/

    timer.start();
    for (iter=0; iter < iterations; ++iter)
    {
        for (i=N-1; i >= 0; --i)
            a[i] += c * b[i];

        for (i=N-1; i >= 0; --i)
            a[i] += d * b[i];
    }
    timer.stop();

    cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds())
         << "   for, indirection, unit stride, backwards loops" << endl;

    /*********************************************************************/

    timer.start();
    for (iter=0; iter < iterations; ++iter)
    {
        double c2 = c;

        int n1 = N & 3;
        for (i=0; i < n1; ++i)
            a[i] += c2 * b[i];

        for (; i < N; i += 4)
        {
            a[i] += c2 * b[i];
            a[i+1] += c2 * b[i+1];
            a[i+2] += c2 * b[i+2];
            a[i+3] += c2 * b[i+3];
        }

        double d2 = d;
        int n2 = N & 3;
        for (i=0; i < n2; ++i)
            a[i] += d2 * b[i];

        for (; i < N; i += 4)
        {
            a[i] += d2 * b[i];
            a[i+1] += d2 * b[i+1];
            a[i+2] += d2 * b[i+2];
            a[i+3] += d2 * b[i+3];
        } 
    }
    timer.stop();

    cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds())
         << "    for, unroll=4, unit stride, constants loaded into temps"
         << endl;

    /*********************************************************************/

    timer.start();
    for (iter=0; iter < iterations; ++iter)
    {
        double c2 = c;

        int n1 = N & 3;
        for (i=0; i < n1; ++i)
            a[i] += c2 * b[i];

        for (; i < N; i += 4)
        {
            double t1 = c2 * b[i];
            double t2 = c2 * b[i+1];
            double t3 = c2 * b[i+2];
            double t4 = c2 * b[i+3];

            a[i] += t1;
            a[i+1] += t2;
            a[i+2] += t3;
            a[i+3] += t4;
        }

        double d2 = d;
        int n2 = N & 3;
        for (i=0; i < n2; ++i)
            a[i] += d2 * b[i];

        for (; i < N; i += 4)
        {
            double t1 = d2 * b[i];
            double t2 = d2 * b[i+1];
            double t3 = d2 * b[i+2];
            double t4 = d2 * b[i+3];

            a[i] += t1;
            a[i+1] += t2;
            a[i+2] += t3;
            a[i+3] += t4;
        }
    }
    timer.stop();

    cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds())
         << "    for, unroll=4, unit stride, constants loaded into temps,"
         << endl << "\t\t4 read then 4 write" 
         << endl;

    /*********************************************************************/

    timer.start();
    for (iter=0; iter < iterations; ++iter)
    {
        double c2 = c;

        int n1 = N & 3;
        for (i=0; i < n1; ++i)
            a[i] += c2 * b[i];

        for (; i < N; i += 4)
        {
            a[i] = a[i] + c2 * b[i];
            a[i+1] = a[i+1] + c2 * b[i+1];
            a[i+2] = a[i+2] + c2 * b[i+2];
            a[i+3] = a[i+3] + c2 * b[i+3];
        }

        double d2 = d;
        int n2 = N & 3;
        for (i=0; i < n2; ++i)
            a[i] += d2 * b[i];

        for (; i < N; i += 4)
        {
            a[i] = a[i] + d2 * b[i];
            a[i+1] = a[i+1] + d2 * b[i+1];
            a[i+2] = a[i+2] + d2 * b[i+2];
            a[i+3] = a[i+3] + d2 * b[i+3];
        }
    }
    timer.stop();

    cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds())
         << "    for, unroll=4, unit stride, constants loaded into temps,"
         << endl << "            no += "
         << endl;

    /*********************************************************************/

    timer.start();
    for (iter=0; iter < iterations; ++iter)
    {
        double c2 = c;

        int n1 = N & 3;
        for (i=0; i < n1; ++i)
            a[i] += c2 * b[i];

        for (; i < N; i += 4)
        {
            int i1 = i + 1;
            a[i] += c2 * b[i];
            int i2 = i + 2;
            a[i1] += c2 * b[i1];
            int i3 = i + 3;
            a[i2] += c2 * b[i2];
            a[i3] += c2 * b[i3];
        }

        double d2 = d;
        int n2 = N & 3;
        for (i=0; i < n2; ++i)
            a[i] += d2 * b[i];

        for (; i < N; i += 4)
        {
            int i1 = i + 1;
            a[i] += d2 * b[i];
            int i2 = i + 2;
            a[i1] += d2 * b[i1];
            int i3 = i + 3;
            a[i2] += d2 * b[i2];
            a[i3] += d2 * b[i3];
        }
    }
    timer.stop();

    cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds())
         << "    for, unroll=4, unit stride, constants loaded into temps,"
         << endl << "        CSE for index offsets"
         << endl;

    /*********************************************************************/

    timer.start();
    for (iter=0; iter < iterations; ++iter)
    {
        double c2 = c;

        int n1 = N & 3;
        for (i=0; i < n1; ++i)
            a[i] += c2 * b[i];

        double* pa = a+n1;
        double* pb = b+n1;
 
        int top = N - n1 - 4;

        for (i=top; i >= 0; i -= 4)
        {
            pa[i] += c2 * pb[i];
            pa[i+1] += c2 * pb[i+1];
            pa[i+2] += c2 * pb[i+2];
            pa[i+3] += c2 * pb[i+3];
        }

        double d2 = d;
        int n2 = N & 3;
        for (i=0; i < n2; ++i)
            a[i] += d2 * b[i];

        pa = a+n2;
        pb = b+n2;

        top = N - n2 - 4;
        for (i=top; i >= 0; i -= 4)
        {
            pa[i] += d2 * pb[i];
            pa[i+1] += d2 * pb[i+1];
            pa[i+2] += d2 * pb[i+2];
            pa[i+3] += d2 * pb[i+3];
        }
    }
    timer.stop();

    cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds())
         << "    for, unroll=4, unit stride, constants loaded into temps,"
         << "            backwards"
         << endl;

    /*********************************************************************/

    timer.start();
    for (iter=0; iter < iterations; ++iter)
    {
        double c2 = c;

        int n1 = N & 7;
        for (i=0; i < n1; ++i)
            a[i] += c2 * b[i];

        for (; i < N; i += 8)
        {
            a[i] += c2 * b[i];
            a[i+1] += c2 * b[i+1];
            a[i+2] += c2 * b[i+2];
            a[i+3] += c2 * b[i+3];
            a[i+4] += c2 * b[i+4];
            a[i+5] += c2 * b[i+5];
            a[i+6] += c2 * b[i+6];
            a[i+7] += c2 * b[i+7];
        }

        double d2 = d;
        int n2 = N & 7;
        for (i=0; i < n2; ++i)
            a[i] += d2 * b[i];

        for (; i < N; i += 8)
        {
            a[i] += d2 * b[i];
            a[i+1] += d2 * b[i+1];
            a[i+2] += d2 * b[i+2];
            a[i+3] += d2 * b[i+3];
            a[i+4] += d2 * b[i+4];
            a[i+5] += d2 * b[i+5];
            a[i+6] += d2 * b[i+6];
            a[i+7] += d2 * b[i+7];
        }
    }
    timer.stop();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -