📄 lbfgs.h
字号:
if (sgnd < FloatType(0)) { sty = stx; fy = fx; dy = dx; } stx = stp; fx = fp; dx = dp; } // Compute the new step and safeguard it. stpf = std::min(stpmax, stpf); stpf = std::max(stpmin, stpf); stp = stpf; if (brackt && bound) { if (sty > stx) { stp = std::min(stx + FloatType(0.66) * (sty - stx), stp); } else { stp = std::max(stx + FloatType(0.66) * (sty - stx), stp); } } return info; } /* Compute the sum of a vector times a scalar plus another vector. Adapted from the subroutine <code>daxpy</code> in <code>lbfgs.f</code>. dy=dy+lamad*dx concretely speaking, it computes relative weights at one time. dy[iy0+incy*k]=dy[iy0+incy*k]+lamad*dx[ix0+incx*k] */ template <typename FloatType, typename SizeType> void daxpy( SizeType n, FloatType da,//累加系数 const FloatType* dx, SizeType ix0,//x向量起始位置 SizeType incx,//x向量移动步长 FloatType* dy, SizeType iy0,//y向量起始位置 SizeType incy//y向量移动步长 ) { SizeType i, ix, iy, m; if (n == 0) return; if (da == FloatType(0)) return; if (!(incx == 1 && incy == 1)) { ix = 0; iy = 0; for (i = 0; i < n; i++) { dy[iy0+iy] += da * dx[ix0+ix]; ix += incx; iy += incy; } return; } m = n % 4; for (i = 0; i < m; i++) { dy[iy0+i] += da * dx[ix0+i]; } for (; i < n;) { dy[iy0+i] += da * dx[ix0+i]; i++; dy[iy0+i] += da * dx[ix0+i]; i++; dy[iy0+i] += da * dx[ix0+i]; i++; dy[iy0+i] += da * dx[ix0+i]; i++; } } template <typename FloatType, typename SizeType> inline void daxpy( SizeType n, FloatType da, const FloatType* dx, SizeType ix0, FloatType* dy) { daxpy(n, da, dx, ix0, SizeType(1), dy, SizeType(0), SizeType(1)); } /* Compute the dot product of two vectors. Adapted from the subroutine <code>ddot</code> in <code>lbfgs.f</code>. ddot function realizes inner production of two input vectors x and y. Concretely speacking, it computes relative weights at one time. sum(x[ix0+incx*k]*x[iy0+incy*k]) for all k=0,...,n-1 */ template <typename FloatType, typename SizeType> FloatType ddot( SizeType n, const FloatType* dx, SizeType ix0,//x向量起始位置 SizeType incx,//x向量移动步长 const FloatType* dy, SizeType iy0,//y向量起始位置 SizeType incy//y向量移动步长 ) { SizeType i, ix, iy, m; FloatType dtemp(0); if (n == 0) return FloatType(0); if (!(incx == 1 && incy == 1)) { ix = 0; iy = 0; for (i = 0; i < n; i++) { dtemp += dx[ix0+ix] * dy[iy0+iy]; ix += incx; iy += incy; } return dtemp; } m = n % 5; for (i = 0; i < m; i++) { dtemp += dx[ix0+i] * dy[iy0+i]; } for (; i < n;) { dtemp += dx[ix0+i] * dy[iy0+i]; i++; dtemp += dx[ix0+i] * dy[iy0+i]; i++; dtemp += dx[ix0+i] * dy[iy0+i]; i++; dtemp += dx[ix0+i] * dy[iy0+i]; i++; dtemp += dx[ix0+i] * dy[iy0+i]; i++; } return dtemp; } template <typename FloatType, typename SizeType> inline FloatType ddot( SizeType n, const FloatType* dx, const FloatType* dy) { return ddot( n, dx, SizeType(0), SizeType(1), dy, SizeType(0), SizeType(1)); } } // namespace detail //! Interface to the LBFGS %minimizer. /*! This class solves the unconstrained minimization problem <pre> min f(x), x = (x1,x2,...,x_n), </pre> using the limited-memory BFGS method. The routine is especially effective on problems involving a large number of variables. In a typical iteration of this method an approximation Hk to the inverse of the Hessian is obtained by applying <code>m</code> BFGS updates to a diagonal matrix Hk0, using information from the previous <code>m</code> steps. The user specifies the number <code>m</code>, which determines the amount of storage required by the routine. The user may also provide the diagonal matrices Hk0 (parameter <code>diag</code> in the run() function) if not satisfied with the default choice. The algorithm is described in "On the limited memory BFGS method for large scale optimization", by D. Liu and J. Nocedal, Mathematical Programming B 45 (1989) 503-528. The user is required to calculate the function value <code>f</code> and its gradient <code>g</code>. In order to allow the user complete control over these computations, reverse communication is used. The routine must be called repeatedly under the control of the member functions <code>requests_f_and_g()</code>, <code>requests_diag()</code>. If neither requests_f_and_g() nor requests_diag() is <code>true</code> the user should check for convergence (using class traditional_convergence_test or any other custom test). If the convergence test is negative, the minimizer may be called again for the next iteration. The steplength (stp()) is determined at each iteration by means of the line search routine <code>mcsrch</code>, which is a slight modification of the routine <code>CSRCH</code> written by More' and Thuente. The only variables that are machine-dependent are <code>xtol</code>, <code>stpmin</code> and <code>stpmax</code>. Fatal errors cause <code>error</code> exceptions to be thrown. The generic class <code>error</code> is sub-classed (e.g. class <code>error_line_search_failed</code>) to facilitate granular %error handling. A note on performance: Using Compaq Fortran V5.4 and Compaq C++ V6.5, the C++ implementation is about 15% slower than the Fortran implementation. */ template <typename FloatType, typename SizeType = std::size_t> class minimizer { public: //! Default constructor. Some members are not initialized! minimizer() : n_(0), m_(0), maxfev_(0), gtol_(0), xtol_(0), stpmin_(0), stpmax_(0), ispt(0), iypt(0) {} //! Constructor. /*! @param n The number of variables in the minimization problem. Restriction: <code>n > 0</code>. @param m The number of corrections used in the BFGS update. Values of <code>m</code> less than 3 are not recommended; large values of <code>m</code> will result in excessive computing time. <code>3 <= m <= 7</code> is recommended. Restriction: <code>m > 0</code>. @param maxfev Termination occurs when the number of evaluations of the objective function is at least <code>maxfev</code> by the end of an iteration. @param gtol Controls the accuracy of the line search. If the function and gradient evaluations are inexpensive with respect to the cost of the iteration (which is sometimes the case when solving very large problems) it may be advantageous to set <code>gtol</code> to a small value. A typical small value is 0.1. Restriction: <code>gtol</code> should be greater than 1e-4. @param xtol An estimate of the machine precision (e.g. 10e-16 on a SUN station 3/60). The line search routine will terminate if the relative width of the interval of uncertainty is less than <code>xtol</code>. @param stpmin Specifies the lower bound for the step in the line search. The default value is 1e-20. This value need not be modified unless the exponent is too large for the machine being used, or unless the problem is extremely badly scaled (in which case the exponent should be increased). @param stpmax specifies the upper bound for the step in the line search. The default value is 1e20. This value need not be modified unless the exponent is too large for the machine being used, or unless the problem is extremely badly scaled (in which case the exponent should be increased). */ explicit minimizer( SizeType n, SizeType m = 5, SizeType maxfev = 20, FloatType gtol = FloatType(0.9), FloatType xtol = FloatType(1.e-16), FloatType stpmin = FloatType(1.e-20), FloatType stpmax = FloatType(1.e20)) : n_(n), m_(m), maxfev_(maxfev), gtol_(gtol), xtol_(xtol), stpmin_(stpmin), stpmax_(stpmax), iflag_(0), requests_f_and_g_(false), requests_diag_(false), iter_(0), nfun_(0), stp_(0), stp1(0), ftol(0.0001), ys(0), point(0), npt(0), ispt(n+2*m), iypt((n+2*m)+n*m), info(0), bound(0), nfev(0) { if (n_ == 0) { throw error_improper_input_parameter("n = 0."); } if (m_ == 0) { throw error_improper_input_parameter("m = 0."); } if (maxfev_ == 0) { throw error_improper_input_parameter("maxfev = 0."); } if (gtol_ <= FloatType(1.e-4)) { throw error_improper_input_parameter("gtol <= 1.e-4."); } if (xtol_ < FloatType(0)) { throw error_improper_input_parameter("xtol < 0."); } if (stpmin_ < FloatType(0)) { throw error_improper_input_parameter("stpmin < 0."); } if (stpmax_ < stpmin) { throw error_improper_input_parameter("stpmax < stpmin"); } w_.resize(n_*(2*m_+1)+2*m_); scratch_array_.resize(n_); } //! Number of free parameters (as passed to the constructor). SizeType n() const { return n_; } //! Number of corrections kept (as passed to the constructor). SizeType m() const { return m_; } /*! \brief Maximum number of evaluations of the objective function (as passed to the constructor). */ SizeType maxfev() const { return maxfev_; } /*! \brief Control of the accuracy of the line search. (as passed to the constructor). */ FloatType gtol() const { return gtol_; } //! Estimate of the machine precision (as passed to the constructor). FloatType xtol() const { return xtol_; } /*! \brief Lower bound for the step in the line search. (as passed to the constructor). */ FloatType stpmin() const { return stpmin_; } /*! \brief Upper bound for the step in the line search. (as passed to the constructor). */ FloatType stpmax() const { return stpmax_; } //! Status indicator for reverse communication. /*! <code>true</code> if the run() function returns to request evaluation of the objective function (<code>f</code>) and gradients (<code>g</code>) for the current point (<code>x</code>). To continue the minimization the run() function is called again with the updated values for <code>f</code> and <code>g</code>. <p> See also: requests_diag() */ bool requests_f_and_g() const { return requests_f_and_g_; } //! Status indicator for reverse communication. /*! <code>true</code> if the run() function returns to request evaluation of the diagonal matrix (<code>diag</code>) for the current point (<code>x</code>). To continue the minimization the run() function is called again with the updated values for <code>diag</code>. <p> See also: requests_f_and_g() */ bool requests_diag() const { return requests_diag_; } //! Number of iterations so far. /*! Note that one iteration may involve multiple evaluations of the objective function. <p> See also: nfun() */ SizeType iter() const { return iter_; } //! Total number of evaluations of the objective function so far. /*! The total number of function evaluations increases by the number of evaluations required for the line search. The total is only increased after a successful line search. <p> See also: iter()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -