⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bfgssolver.h

📁 Amis - A maximum entropy estimator 一个最大熵模型统计工具
💻 H
字号:
////////////////////////////////////////////////////////////////////////////  Copyright (c) 2000, Yusuke Miyao///  You may distribute under the terms of the Artistic License.//////  <id>$Id: BFGSSolver.h,v 1.10 2003/05/16 13:03:46 yusuke Exp $</id>///  <collection>Maximum Entropy Estimator</collection>///  <name>BFGSSolver.h</name>///  <overview>Implementation of limited-memory BFGS method</overview>/////////////////////////////////////////////////////////////////////////#ifndef Amis_BFGSSolver_h_#define Amis_BFGSSolver_h_#include <amis/configure.h>#include <amis/Real.h>#include <amis/ErrorBase.h>//#include "BFGSMatrix.h"#include <vector>#include <valarray>#include <string>#include <numeric>AMIS_NAMESPACE_BEGIN//////////////////////////////////////////////////////////////////////class BFGSError : public ErrorBase {public:  BFGSError( const std::string& s ) : ErrorBase( s ) {}  BFGSError( const char* s ) : ErrorBase( s ) {}};///////////////////////////////////////////////////////////////////////// <classdef>/// <name>BFGSSolver</name>/// <overview>Limited-memory BFGS method</overview>/// <desc>/// An implementation of limited-memory BFGS method (general-purpose function/// optimizer)./// </desc>/// <body>template < class Function >class BFGSSolver {public:  static const Real LINE_SEARCH_TOLERANCE = 0.0001;  /// Tolerance of decrease of function value in line search  static const Real GRADIENT_TOLERANCE = 0.9;  //static const Real GRADIENT_TOLERANCE = 0.00001;  /// Tolerance of decrease of gradient in line search  static const int LINE_SEARCH_ITERATIONS = 100;  /// Maximum number of iterations of line searchprivate:  Function* function;  /// Objective function to minimize  int vector_size;  /// Size of vector of the function  int memory_size;  /// Number of previous vectors to memorize  std::valarray< Real > grad;  /// Gradient of the function  std::valarray< Real > old_grad;  /// Previous gradient of the function  std::valarray< Real > direction;  /// Direction to minimize  std::vector< std::valarray< Real > > S;  /// Set of differences of input vectors  std::vector< std::valarray< Real > > Y;  /// Set of differences of gradients  std::vector< Real > rho;  /// Set of product of s and y  std::valarray< Real > tmp_factor;  /// Temporal storage  bool is_beginning;  /// Is the beginning of optimization?  bool is_converged;  /// Is the vector converged?  Real func_val;  /// Current functionvalue  Real machine_epsilon;  /// Machine epsilonpublic:  BFGSSolver( Function* f, int v, int m, Real e )    : grad( v ), old_grad( v ), direction( v ),      S(), Y(), rho(), tmp_factor( m ) {    function = f;    vector_size = v;    memory_size = m;    S.reserve( m );    Y.reserve( m );    rho.reserve( m );    is_beginning = true;    is_converged = true;    func_val = 0.0;    machine_epsilon = e;  }  /// Constructor  ~BFGSSolver() {}  /// Destructorprotected:  template < class Val >  void push( std::vector< Val >& vec, const Val& v ) const {    if ( vec.size() == vec.capacity() ) {      for ( typename std::vector< Val >::iterator it = vec.begin();            it + 1 != vec.end();            ++it ) {        std::swap( *it, *( it + 1 ) );      }      vec.pop_back();    }    vec.push_back( v );  }  Real norm( std::valarray< Real >& vec ) const {    Real sum = 0.0;    for ( size_t i = 0; i < vec.size(); ++i ) {      sum += fabs( vec[ i ] );    }    return sum;  }  Real inner_product( std::valarray< Real >& x, std::valarray< Real >& y, Real init = 0.0 ) const {    //return ::inner_product( &x[ 0 ], &x[ x.size() ], &y[ 0 ], init );    return std::inner_product( &x[ 0 ], &x[ x.size() ], &y[ 0 ], init );  }  void lineSearch() {    AMIS_DEBUG_MESSAGE( 3, "BFGSSolver::lineSearch" );    //cerr << "    dir " << direction << std::endl;    Real slope_base = inner_product( grad, direction );    if ( slope_base >= 0.0 ) throw BFGSError( "direction is not descendent" );    Real FUNC_TOLERANCE = LINE_SEARCH_TOLERANCE * slope_base;    Real GRAD_TOLERANCE = fabs( GRADIENT_TOLERANCE * slope_base );    AMIS_DEBUG_MESSAGE( 4, "\tslope_base=" << slope_base << "  FUNC_TOLERANCE=" << FUNC_TOLERANCE << "  GRAD_TOLERANCE=" << GRAD_TOLERANCE << "\n" );    Real scale = 1.0;    Real old_scale = 0.0;    Real old_func = func_val;    function->updateVector( direction, 1.0 );    Real func = function->computeFunctionDerivative( grad );    Real old_slope = slope_base;    Real slope = inner_product( grad, direction );    // Bracketing the minimum    AMIS_DEBUG_MESSAGE( 4, "\tbracketing start\n" );    for ( int i = 0; i < LINE_SEARCH_ITERATIONS; ++i ) {      //cerr << "  direction=" << direction << std::endl;      AMIS_DEBUG_MESSAGE( 4, "\t\tscale=" << scale << "\n" );      AMIS_DEBUG_MESSAGE( 4, "\t\tfunc=" << func << "  orig=" << func_val << "\n" );      AMIS_DEBUG_MESSAGE( 4, "\t\tslope=" << slope << "  slope_base=" << slope_base << "\n" );      if ( func > func_val + scale * FUNC_TOLERANCE ) {        AMIS_DEBUG_MESSAGE( 4, "\tCondition 1 violated\n" );        break;      }      // Condition 1 holds      if ( fabs( slope ) <= GRAD_TOLERANCE ) {        AMIS_DEBUG_MESSAGE( 4, "\tCondition 1 & 2 hold\n" );        if ( fabs( 1.0 - fabs( func / func_val ) ) <= machine_epsilon ) {          AMIS_DEBUG_MESSAGE( 3, "\tFunction value converged  " << func << "  " << func_val << "\n" );          is_converged = true;        }        func_val = func;        if ( scale != 1.0 ) direction *= scale;        return;      }      AMIS_DEBUG_MESSAGE( 4, "\tCondition 2 violated\n" );      if ( slope >= 0.0 ) break;      old_scale = scale;      scale *= 2.0;      old_func = func;      function->updateVector( direction, scale - old_scale );      func = function->computeFunctionDerivative( grad );      old_slope = slope;      slope = inner_product( grad, direction );    }    // Quadratic minimization    AMIS_DEBUG_MESSAGE( 4, "\tSwitch to quadratic minimization\n" );    Real min_scale = old_scale;    Real max_scale = scale;    Real min_func = old_func;    Real max_func = func;    Real min_slope = old_slope;    Real max_slope = slope;    Real new_scale = scale;    for ( int i = 0; i < LINE_SEARCH_ITERATIONS; ++i ) {      // Compute the next trial point      old_scale = new_scale;      Real scale_step = max_scale - min_scale;      Real constant = ( max_func - min_func - scale_step * min_slope )        / ( scale_step * scale_step );      if ( constant <= 0.0 ) {        // Bisection        new_scale = ( min_scale + max_scale ) * 0.5;      } else {        // Quadratic minimization        new_scale = min_scale - 0.5 * min_slope / constant;      }      //cerr << "scale-step=" << new_scale - old_scale << std::endl;      function->updateVector( direction, new_scale - old_scale );      Real new_func = function->computeFunctionDerivative( grad );      // Check!      Real new_slope = inner_product( grad, direction );      AMIS_DEBUG_MESSAGE( 4, "\t\tscale=" << new_scale << "\n" );      AMIS_DEBUG_MESSAGE( 4, "\t\tfunc=" << new_func << "  orig=" << func_val << "\n" );      AMIS_DEBUG_MESSAGE( 4, "\t\tslope=" << new_slope << "  slope_base=" << slope_base << "\n" );      if ( new_func > func_val + new_scale * FUNC_TOLERANCE ) {        // Condition 1 does not hold        AMIS_DEBUG_MESSAGE( 4, "\tCondition 1 violated\n" );        max_scale = new_scale;        max_func = new_func;        max_slope = new_slope;        continue;      }      // Condition 1 holds      if ( fabs( new_slope ) <= GRAD_TOLERANCE ) {        AMIS_DEBUG_MESSAGE( 4, "\tCondition 1 & 2 hold\n" );        if ( fabs( 1.0 - fabs( new_func / func_val ) ) <= machine_epsilon ) {          AMIS_DEBUG_MESSAGE( 3, "\tFunction value converged  " << new_func << "  " << func_val << "\n" );          is_converged = true;        }        func_val = new_func;        direction *= new_scale;        //cerr << "    step " << direction << std::endl;        return;      }      AMIS_DEBUG_MESSAGE( 4, "\tCondition 2 violated\n" );      if ( new_slope * scale_step >= 0.0 ) {        // new point is ascendent -> new values are replaced        AMIS_DEBUG_MESSAGE( 4, "\t\tNew point it ascendent\n" );        max_scale = min_scale;        max_func = min_func;        max_slope = min_slope;        min_scale = new_scale;        min_func = new_func;        min_slope = new_slope;      } else {        // new point is descendent -> old values are replaced        AMIS_DEBUG_MESSAGE( 4, "\t\tNew point is descendent\n" );        min_scale = new_scale;        min_func = new_func;        min_slope = new_slope;      }    }    AMIS_ERROR_MESSAGE( "Line search in BFGS algorithm not terminated\n" );    is_converged = true;    return;  }  /// Routine for line minimization  bool checkConvergence() {    for ( size_t i = 0; i < grad.size(); ++i ) {      if ( fabs( grad[ i ] ) > machine_epsilon ) return false;    }    AMIS_DEBUG_MESSAGE( 3, "\tGradient converged!\n" );    is_converged = true;    return true;  }  /// Check whether the gradient is sufficiently close to a zero vectorpublic:  bool isConverged() {    return is_converged;  }  /// Whether the vector is converged  void initialize() {    AMIS_DEBUG_MESSAGE( 3, "start BFGSSolver::initialize\n" );    is_beginning = true;    is_converged = false;    AMIS_DEBUG_MESSAGE( 3, "end BFGSSolver::initialize\n" );  }  /// Initialize the optimizer  void iteration() {    AMIS_DEBUG_MESSAGE( 3, "start BFGSSolver::iteration\n" );    if ( is_beginning ) {      func_val = function->computeFunctionDerivative( grad );      //direction = grad * ( -vector_size / norm( grad ) );      direction = grad * ( -norm( grad ) );      is_beginning = false;    } else {      if ( checkConvergence() ) return;      //cerr << "grad=" << grad << std::endl;      push( S, direction );      push( Y, grad );      Y.back() -= old_grad;      Real sy = inner_product( S.back(), Y.back() );      Real gamma = sy / inner_product( Y.back(), Y.back() );      push( rho, 1.0 / sy );      //cerr << "  gamma=" << gamma << std::endl;      direction = -grad;      for ( int i = S.size() - 1; i >= 0; --i ) {        tmp_factor[ i ] = rho[ i ] * ( inner_product( S[ i ], direction ) );        direction -= Y[ i ] * tmp_factor[ i ];      }      direction *= gamma;      for ( size_t i = 0; i < S.size(); ++i ) {        Real tmp = rho[ i ] * ( inner_product( Y[ i ], direction ) );        direction += S[ i ] * ( tmp_factor[ i ] - tmp );      }    }    //cerr << "direction=" << direction << std::endl;    old_grad = grad;    lineSearch();    AMIS_DEBUG_MESSAGE( 3, "end BFGSSolver::iteration\n" );  }  /// An iteration of the limited-memory BFGS method  Real functionValue() const {    return func_val;  }  /// Get the function value  //const std::valarray< Real >& updateVector( void ) const {  //  return direction;  //}  /// Get the direction of update};/// </body>/// </classdef>AMIS_NAMESPACE_END#endif // BFGSSolver_h_// end of BFGSSolver.h

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -