⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 chunky.cpp

📁 A C++ class library for scientific computing
💻 CPP
字号:
#include <blitz/timer.h>using namespace blitz;void report(const char* name, Timer& timer, int N,    long int iterations);void unfused(int N);void fused(int N);void chunky(int N);float* _bz_restrict a;float* _bz_restrict b;float* _bz_restrict c;float* _bz_restrict d;float* _bz_restrict e;float* _bz_restrict f;/* * Code to support the "fuse" macro */int _chunk;bool _done_chunks;int _chunk_size = 512;#define  fuse    _chunk = 0; _done_chunks = false;    \                 for (; !_done_chunks; ++_chunk)int main(int argc, char** argv){    if (argc == 2)        _chunk_size = atoi(argv[1]);    cout << "Using chunk size " << _chunk_size << endl;    const int N = 100000;    a = new float[N];    b = new float[N];    c = new float[N];    d = new float[N];    e = new float[N];    f = new float[N];    for (int i=0; i < N; ++i)    {        a[i] = i;        b[i] = i;        c[i] = i;        d[i] = i;    }    Timer timer;    long int iterations = 100;    timer.start();    for (long i=0; i < iterations; ++i)        unfused(N);    timer.stop();    report("Unfused", timer, N, iterations);    timer.start();    for (long i=0; i < iterations; ++i)        fused(N);    timer.stop();    report("Fused", timer, N, iterations);    timer.start();    for (long i=0; i < iterations; ++i)        chunky(N);    timer.stop();    report("Chunky", timer, N, iterations);    return 0;}void report(const char* name, Timer& timer, int N,    long int iterations){    float flops = float(N) * iterations * 2;    float Mflops = flops / timer.elapsedSeconds() / 1e+6;    cout << setw(20) << name << " " << Mflops << " Mflops/s" << endl;}void __sink() { }void unfused(int N){    for (int i=0; i < N; ++i)        e[i] = a[i] * b[i] + c[i] * d[i];    __sink();    for (int i=0; i < N; ++i)        f[i] = c[i] * b[i] + a[i] * d[i];}void fused(int N){    for (int i=0; i < N; ++i)    {        e[i] = a[i] * b[i] + c[i] * d[i];        f[i] = c[i] * b[i] + a[i] * d[i];    }}// This "chunky" routine is a simulated implementation of// expression templates with tiling across multiple statements// (the "chunky fusion" approach).  This code would be// generated by:////     fuse {//         E = A*B + C*D;//         F = C*B + A*D;//     }void chunky(int N){    fuse {        {   // Code generated by E = A*B + C*D;            int lbound = _chunk * _chunk_size;            int uboundp1 = lbound + _chunk_size;            if (uboundp1 > N)            {                _done_chunks = true;                uboundp1 = N;            }            for (int i=lbound; i < uboundp1; ++i)                e[i] = a[i] * b[i] + c[i] * d[i];        }        __sink();        {   // Code generated by F = C*B + A*D;            int lbound = _chunk * _chunk_size;            int uboundp1 = lbound + _chunk_size;             if (uboundp1 > N)            {                _done_chunks = true;                uboundp1 = N;            }            for (int i=lbound; i < uboundp1; ++i)                f[i] = c[i] * b[i] + a[i] * d[i];        }    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -