efficiency.cpp

来自「Boost provides free peer-reviewed portab」· C++ 代码 · 共 196 行

CPP
196
字号
// Copyright David Abrahams, Matthias Troyer, Michael Gauckler// 2005. Distributed under the Boost Software License, Version// 1.0. (See accompanying file LICENSE_1_0.txt or copy at// http://www.boost.org/LICENSE_1_0.txt)#include <boost/parameter.hpp>#include <boost/timer.hpp>#include <iostream>namespace test{  //  // This test measures the abstraction overhead of using the named  // parameter interface.  Some actual test results have been recorded  // in timings.txt in this source file's directory, or  // http://www.boost.org/libs/parameter/test/timings.txt.  //  // Caveats:  //  //   1. This test penalizes the named parameter library slightly, by  //      passing two arguments through the named interface, while  //      only passing one through the plain C++ interface.  //  //   2. This test does not measure the case where an ArgumentPack is  //      so large that it doesn't fit in the L1 cache.  //  //   3. Although we've tried to make this test as general as  //      possible, we are targeting it at a specific application.  //      Where that affects design decisions, we've noted it below in  //      ***...***.  //  //   4. The first time you run this program, the time may not be  //      representative because of disk and memory cache effects, so  //      always run it multiple times and ignore the first  //      measurement.  This approach will also allow you to estimate  //      the statistical error of your test by observing the  //      variation in the valid times.  //  //   5. Try to run this program on a machine that's otherwise idle,  //      or other processes and even device hardware interrupts may  //      interfere by causing caches to be flushed.    // Accumulator function object with plain C++ interface   template <class T>  struct plain_weight_running_total  {      plain_weight_running_total()#if BOOST_WORKAROUND(BOOST_MSVC, < 1300)        : sum(T())#else        : sum()#endif       {}            void operator()(T w)      {          this->sum += w;      }      T sum;  };  BOOST_PARAMETER_KEYWORD(tag, weight)  BOOST_PARAMETER_KEYWORD(tag, value)        // Accumulator function object with named parameter interface  template <class T>  struct named_param_weight_running_total  {      named_param_weight_running_total()#if BOOST_WORKAROUND(BOOST_MSVC, < 1300)        : sum(T())#else        : sum()#endif       {}      template <class ArgumentPack>      void operator()(ArgumentPack const& variates)      {          this->sum += variates[weight];      }      T sum;  };  // This value is required to ensure that a smart compiler's dead  // code elimination doesn't optimize away anything we're testing.  // We'll use it to compute the return code of the executable to make  // sure it's needed.  double live_code;  // Call objects of the given Accumulator type repeatedly with x as  // an argument.  template <class Accumulator, class Arg>  void hammer(Arg const& x, long const repeats)  {      // Strategy: because the sum in an accumulator after each call      // depends on the previous value of the sum, the CPU's pipeline      // might be stalled while waiting for the previous addition to      // complete.  Therefore, we allocate an array of accumulators,      // and update them in sequence, so that there's no dependency      // between adjacent addition operations.      //      // Additionally, if there were only one accumulator, the      // compiler or CPU might decide to update the value in a      // register rather that writing it back to memory.  we want each      // operation to at least update the L1 cache.  *** Note: This      // concern is specific to the particular application at which      // we're targeting the test. ***      // This has to be at least as large as the number of      // simultaneous accumulations that can be executing in the      // compiler pipeline.  A safe number here is larger than the      // machine's maximum pipeline depth. If you want to test the L2      // or L3 cache, or main memory, you can increase the size of      // this array.  1024 is an upper limit on the pipeline depth of      // current vector machines.      const std::size_t number_of_accumulators = 1024;            Accumulator a[number_of_accumulators];            for (long iteration = 0; iteration < repeats; ++iteration)      {          for (Accumulator* ap = a;  ap < a + number_of_accumulators; ++ap)          {              (*ap)(x);          }      }      // Accumulate all the partial sums to avoid dead code      // elimination.      for (Accumulator* ap = a;  ap < a + number_of_accumulators; ++ap)      {          live_code += ap->sum;      }  }  // Measure the time required to hammer accumulators of the given  // type with the argument x.  template <class Accumulator, class T>  double measure(T const& x, long const repeats)  {      // Hammer accumulators a couple of times to ensure the      // instruction cache is full of our test code, and that we don't      // measure the cost of a page fault for accessing the data page      // containing the memory where the accumulators will be      // allocated      hammer<Accumulator>(x, repeats);      hammer<Accumulator>(x, repeats);      // Now start a timer      boost::timer time;      hammer<Accumulator>(x, repeats);  // This time, we'll measure      return time.elapsed();  }}int main(){    using namespace test;    // first decide how many repetitions to measure    long repeats = 100;    double measured = 0;    while (measured < 1.0 && repeats <= 10000000)    {        repeats *= 10;                boost::timer time;        hammer<plain_weight_running_total<double> >(.1, repeats);        hammer<named_param_weight_running_total<double> >(            (weight = .1, value = .2), repeats);        measured = time.elapsed();    }        std::cout        << "plain time:           "        << measure<plain_weight_running_total<double> >(.1, repeats)        << std::endl;        std::cout        << "named parameter time: "        << measure<named_param_weight_running_total<double> >(            (weight = .1, value = .2), repeats        )        << std::endl;    // This is ultimately responsible for preventing all the test code    // from being optimized away.  Change this to return 0 and you    // unplug the whole test's life support system.    return live_code < 0.;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?