📄 bfgssolver.h
字号:
//////////////////////////////////////////////////////////////////////////// Copyright (c) 2000, Yusuke Miyao/// You may distribute under the terms of the Artistic License.////// <id>$Id: BFGSSolver.h,v 1.10 2003/05/16 13:03:46 yusuke Exp $</id>/// <collection>Maximum Entropy Estimator</collection>/// <name>BFGSSolver.h</name>/// <overview>Implementation of limited-memory BFGS method</overview>/////////////////////////////////////////////////////////////////////////#ifndef Amis_BFGSSolver_h_#define Amis_BFGSSolver_h_#include <amis/configure.h>#include <amis/Real.h>#include <amis/ErrorBase.h>//#include "BFGSMatrix.h"#include <vector>#include <valarray>#include <string>#include <numeric>AMIS_NAMESPACE_BEGIN//////////////////////////////////////////////////////////////////////class BFGSError : public ErrorBase {public: BFGSError( const std::string& s ) : ErrorBase( s ) {} BFGSError( const char* s ) : ErrorBase( s ) {}};///////////////////////////////////////////////////////////////////////// <classdef>/// <name>BFGSSolver</name>/// <overview>Limited-memory BFGS method</overview>/// <desc>/// An implementation of limited-memory BFGS method (general-purpose function/// optimizer)./// </desc>/// <body>template < class Function >class BFGSSolver {public: static const Real LINE_SEARCH_TOLERANCE = 0.0001; /// Tolerance of decrease of function value in line search static const Real GRADIENT_TOLERANCE = 0.9; //static const Real GRADIENT_TOLERANCE = 0.00001; /// Tolerance of decrease of gradient in line search static const int LINE_SEARCH_ITERATIONS = 100; /// Maximum number of iterations of line searchprivate: Function* function; /// Objective function to minimize int vector_size; /// Size of vector of the function int memory_size; /// Number of previous vectors to memorize std::valarray< Real > grad; /// Gradient of the function std::valarray< Real > old_grad; /// Previous gradient of the function std::valarray< Real > direction; /// Direction to minimize std::vector< std::valarray< Real > > S; /// Set of differences of input vectors std::vector< std::valarray< Real > > Y; /// Set of differences of gradients std::vector< Real > rho; /// Set of product of s and y std::valarray< Real > tmp_factor; /// Temporal storage bool is_beginning; /// Is the beginning of optimization? bool is_converged; /// Is the vector converged? Real func_val; /// Current functionvalue Real machine_epsilon; /// Machine epsilonpublic: BFGSSolver( Function* f, int v, int m, Real e ) : grad( v ), old_grad( v ), direction( v ), S(), Y(), rho(), tmp_factor( m ) { function = f; vector_size = v; memory_size = m; S.reserve( m ); Y.reserve( m ); rho.reserve( m ); is_beginning = true; is_converged = true; func_val = 0.0; machine_epsilon = e; } /// Constructor ~BFGSSolver() {} /// Destructorprotected: template < class Val > void push( std::vector< Val >& vec, const Val& v ) const { if ( vec.size() == vec.capacity() ) { for ( typename std::vector< Val >::iterator it = vec.begin(); it + 1 != vec.end(); ++it ) { std::swap( *it, *( it + 1 ) ); } vec.pop_back(); } vec.push_back( v ); } Real norm( std::valarray< Real >& vec ) const { Real sum = 0.0; for ( size_t i = 0; i < vec.size(); ++i ) { sum += fabs( vec[ i ] ); } return sum; } Real inner_product( std::valarray< Real >& x, std::valarray< Real >& y, Real init = 0.0 ) const { //return ::inner_product( &x[ 0 ], &x[ x.size() ], &y[ 0 ], init ); return std::inner_product( &x[ 0 ], &x[ x.size() ], &y[ 0 ], init ); } void lineSearch() { AMIS_DEBUG_MESSAGE( 3, "BFGSSolver::lineSearch" ); //cerr << " dir " << direction << std::endl; Real slope_base = inner_product( grad, direction ); if ( slope_base >= 0.0 ) throw BFGSError( "direction is not descendent" ); Real FUNC_TOLERANCE = LINE_SEARCH_TOLERANCE * slope_base; Real GRAD_TOLERANCE = fabs( GRADIENT_TOLERANCE * slope_base ); AMIS_DEBUG_MESSAGE( 4, "\tslope_base=" << slope_base << " FUNC_TOLERANCE=" << FUNC_TOLERANCE << " GRAD_TOLERANCE=" << GRAD_TOLERANCE << "\n" ); Real scale = 1.0; Real old_scale = 0.0; Real old_func = func_val; function->updateVector( direction, 1.0 ); Real func = function->computeFunctionDerivative( grad ); Real old_slope = slope_base; Real slope = inner_product( grad, direction ); // Bracketing the minimum AMIS_DEBUG_MESSAGE( 4, "\tbracketing start\n" ); for ( int i = 0; i < LINE_SEARCH_ITERATIONS; ++i ) { //cerr << " direction=" << direction << std::endl; AMIS_DEBUG_MESSAGE( 4, "\t\tscale=" << scale << "\n" ); AMIS_DEBUG_MESSAGE( 4, "\t\tfunc=" << func << " orig=" << func_val << "\n" ); AMIS_DEBUG_MESSAGE( 4, "\t\tslope=" << slope << " slope_base=" << slope_base << "\n" ); if ( func > func_val + scale * FUNC_TOLERANCE ) { AMIS_DEBUG_MESSAGE( 4, "\tCondition 1 violated\n" ); break; } // Condition 1 holds if ( fabs( slope ) <= GRAD_TOLERANCE ) { AMIS_DEBUG_MESSAGE( 4, "\tCondition 1 & 2 hold\n" ); if ( fabs( 1.0 - fabs( func / func_val ) ) <= machine_epsilon ) { AMIS_DEBUG_MESSAGE( 3, "\tFunction value converged " << func << " " << func_val << "\n" ); is_converged = true; } func_val = func; if ( scale != 1.0 ) direction *= scale; return; } AMIS_DEBUG_MESSAGE( 4, "\tCondition 2 violated\n" ); if ( slope >= 0.0 ) break; old_scale = scale; scale *= 2.0; old_func = func; function->updateVector( direction, scale - old_scale ); func = function->computeFunctionDerivative( grad ); old_slope = slope; slope = inner_product( grad, direction ); } // Quadratic minimization AMIS_DEBUG_MESSAGE( 4, "\tSwitch to quadratic minimization\n" ); Real min_scale = old_scale; Real max_scale = scale; Real min_func = old_func; Real max_func = func; Real min_slope = old_slope; Real max_slope = slope; Real new_scale = scale; for ( int i = 0; i < LINE_SEARCH_ITERATIONS; ++i ) { // Compute the next trial point old_scale = new_scale; Real scale_step = max_scale - min_scale; Real constant = ( max_func - min_func - scale_step * min_slope ) / ( scale_step * scale_step ); if ( constant <= 0.0 ) { // Bisection new_scale = ( min_scale + max_scale ) * 0.5; } else { // Quadratic minimization new_scale = min_scale - 0.5 * min_slope / constant; } //cerr << "scale-step=" << new_scale - old_scale << std::endl; function->updateVector( direction, new_scale - old_scale ); Real new_func = function->computeFunctionDerivative( grad ); // Check! Real new_slope = inner_product( grad, direction ); AMIS_DEBUG_MESSAGE( 4, "\t\tscale=" << new_scale << "\n" ); AMIS_DEBUG_MESSAGE( 4, "\t\tfunc=" << new_func << " orig=" << func_val << "\n" ); AMIS_DEBUG_MESSAGE( 4, "\t\tslope=" << new_slope << " slope_base=" << slope_base << "\n" ); if ( new_func > func_val + new_scale * FUNC_TOLERANCE ) { // Condition 1 does not hold AMIS_DEBUG_MESSAGE( 4, "\tCondition 1 violated\n" ); max_scale = new_scale; max_func = new_func; max_slope = new_slope; continue; } // Condition 1 holds if ( fabs( new_slope ) <= GRAD_TOLERANCE ) { AMIS_DEBUG_MESSAGE( 4, "\tCondition 1 & 2 hold\n" ); if ( fabs( 1.0 - fabs( new_func / func_val ) ) <= machine_epsilon ) { AMIS_DEBUG_MESSAGE( 3, "\tFunction value converged " << new_func << " " << func_val << "\n" ); is_converged = true; } func_val = new_func; direction *= new_scale; //cerr << " step " << direction << std::endl; return; } AMIS_DEBUG_MESSAGE( 4, "\tCondition 2 violated\n" ); if ( new_slope * scale_step >= 0.0 ) { // new point is ascendent -> new values are replaced AMIS_DEBUG_MESSAGE( 4, "\t\tNew point it ascendent\n" ); max_scale = min_scale; max_func = min_func; max_slope = min_slope; min_scale = new_scale; min_func = new_func; min_slope = new_slope; } else { // new point is descendent -> old values are replaced AMIS_DEBUG_MESSAGE( 4, "\t\tNew point is descendent\n" ); min_scale = new_scale; min_func = new_func; min_slope = new_slope; } } AMIS_ERROR_MESSAGE( "Line search in BFGS algorithm not terminated\n" ); is_converged = true; return; } /// Routine for line minimization bool checkConvergence() { for ( size_t i = 0; i < grad.size(); ++i ) { if ( fabs( grad[ i ] ) > machine_epsilon ) return false; } AMIS_DEBUG_MESSAGE( 3, "\tGradient converged!\n" ); is_converged = true; return true; } /// Check whether the gradient is sufficiently close to a zero vectorpublic: bool isConverged() { return is_converged; } /// Whether the vector is converged void initialize() { AMIS_DEBUG_MESSAGE( 3, "start BFGSSolver::initialize\n" ); is_beginning = true; is_converged = false; AMIS_DEBUG_MESSAGE( 3, "end BFGSSolver::initialize\n" ); } /// Initialize the optimizer void iteration() { AMIS_DEBUG_MESSAGE( 3, "start BFGSSolver::iteration\n" ); if ( is_beginning ) { func_val = function->computeFunctionDerivative( grad ); //direction = grad * ( -vector_size / norm( grad ) ); direction = grad * ( -norm( grad ) ); is_beginning = false; } else { if ( checkConvergence() ) return; //cerr << "grad=" << grad << std::endl; push( S, direction ); push( Y, grad ); Y.back() -= old_grad; Real sy = inner_product( S.back(), Y.back() ); Real gamma = sy / inner_product( Y.back(), Y.back() ); push( rho, 1.0 / sy ); //cerr << " gamma=" << gamma << std::endl; direction = -grad; for ( int i = S.size() - 1; i >= 0; --i ) { tmp_factor[ i ] = rho[ i ] * ( inner_product( S[ i ], direction ) ); direction -= Y[ i ] * tmp_factor[ i ]; } direction *= gamma; for ( size_t i = 0; i < S.size(); ++i ) { Real tmp = rho[ i ] * ( inner_product( Y[ i ], direction ) ); direction += S[ i ] * ( tmp_factor[ i ] - tmp ); } } //cerr << "direction=" << direction << std::endl; old_grad = grad; lineSearch(); AMIS_DEBUG_MESSAGE( 3, "end BFGSSolver::iteration\n" ); } /// An iteration of the limited-memory BFGS method Real functionValue() const { return func_val; } /// Get the function value //const std::valarray< Real >& updateVector( void ) const { // return direction; //} /// Get the direction of update};/// </body>/// </classdef>AMIS_NAMESPACE_END#endif // BFGSSolver_h_// end of BFGSSolver.h
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -