📄 lbfgs.h

📁 最大熵等模型使用的 lbfgs 训练源代码。
💻 H
📖 第 1 页 / 共 4 页
字号:
        if (sgnd < FloatType(0)) {          sty = stx;          fy = fx;          dy = dx;        }        stx = stp;        fx = fp;        dx = dp;      }      // Compute the new step and safeguard it.      stpf = std::min(stpmax, stpf);      stpf = std::max(stpmin, stpf);      stp = stpf;      if (brackt && bound) {        if (sty > stx) {          stp = std::min(stx + FloatType(0.66) * (sty - stx), stp);        }        else {          stp = std::max(stx + FloatType(0.66) * (sty - stx), stp);        }      }      return info;    }    /* Compute the sum of a vector times a scalar plus another vector.       Adapted from the subroutine <code>daxpy</code> in       <code>lbfgs.f</code>.					dy=dy+lamad*dx	  concretely speaking, it computes relative weights at one time.					dy[iy0+incy*k]=dy[iy0+incy*k]+lamad*dx[ix0+incx*k]     */    template <typename FloatType, typename SizeType>    void daxpy(      SizeType n,      FloatType da,//累加系数      const FloatType* dx,      SizeType ix0,//x向量起始位置      SizeType incx,//x向量移动步长      FloatType* dy,      SizeType iy0,//y向量起始位置      SizeType incy//y向量移动步长	  )    {      SizeType i, ix, iy, m;      if (n == 0) return;      if (da == FloatType(0)) return;      if  (!(incx == 1 && incy == 1)) {        ix = 0;        iy = 0;        for (i = 0; i < n; i++) {          dy[iy0+iy] += da * dx[ix0+ix];          ix += incx;          iy += incy;        }        return;      }      m = n % 4;      for (i = 0; i < m; i++) {        dy[iy0+i] += da * dx[ix0+i];      }      for (; i < n;) {        dy[iy0+i] += da * dx[ix0+i]; i++;        dy[iy0+i] += da * dx[ix0+i]; i++;        dy[iy0+i] += da * dx[ix0+i]; i++;        dy[iy0+i] += da * dx[ix0+i]; i++;      }    }    template <typename FloatType, typename SizeType>    inline    void daxpy(      SizeType n,      FloatType da,      const FloatType* dx,      SizeType ix0,      FloatType* dy)    {      daxpy(n, da, dx, ix0, SizeType(1), dy, SizeType(0), SizeType(1));    }    /* Compute the dot product of two vectors.       Adapted from the subroutine <code>ddot</code>       in <code>lbfgs.f</code>.	   ddot function realizes inner production of two input vectors x and y.	   Concretely speacking, it computes relative weights at one time.				sum(x[ix0+incx*k]*x[iy0+incy*k]) for all k=0,...,n-1     */    template <typename FloatType, typename SizeType>    FloatType ddot(      SizeType n,      const FloatType* dx,      SizeType ix0,//x向量起始位置      SizeType incx,//x向量移动步长      const FloatType* dy,      SizeType iy0,//y向量起始位置      SizeType incy//y向量移动步长	  )    {      SizeType i, ix, iy, m;      FloatType dtemp(0);      if (n == 0) return FloatType(0);      if (!(incx == 1 && incy == 1)) {        ix = 0;        iy = 0;        for (i = 0; i < n; i++) {          dtemp += dx[ix0+ix] * dy[iy0+iy];          ix += incx;          iy += incy;        }        return dtemp;      }      m = n % 5;      for (i = 0; i < m; i++) {        dtemp += dx[ix0+i] * dy[iy0+i];      }      for (; i < n;) {        dtemp += dx[ix0+i] * dy[iy0+i]; i++;        dtemp += dx[ix0+i] * dy[iy0+i]; i++;        dtemp += dx[ix0+i] * dy[iy0+i]; i++;        dtemp += dx[ix0+i] * dy[iy0+i]; i++;        dtemp += dx[ix0+i] * dy[iy0+i]; i++;      }      return dtemp;    }    template <typename FloatType, typename SizeType>    inline    FloatType ddot(      SizeType n,      const FloatType* dx,      const FloatType* dy)    {      return ddot(        n, dx, SizeType(0), SizeType(1), dy, SizeType(0), SizeType(1));    }  } // namespace detail  //! Interface to the LBFGS %minimizer.  /*! This class solves the unconstrained minimization problem      <pre>          min f(x),  x = (x1,x2,...,x_n),      </pre>      using the limited-memory BFGS method. The routine is      especially effective on problems involving a large number of      variables. In a typical iteration of this method an      approximation Hk to the inverse of the Hessian      is obtained by applying <code>m</code> BFGS updates to a      diagonal matrix Hk0, using information from the      previous <code>m</code> steps.  The user specifies the number      <code>m</code>, which determines the amount of storage      required by the routine. The user may also provide the      diagonal matrices Hk0 (parameter <code>diag</code> in the run()      function) if not satisfied with the default choice. The      algorithm is described in "On the limited memory BFGS method for      large scale optimization", by D. Liu and J. Nocedal, Mathematical      Programming B 45 (1989) 503-528.      The user is required to calculate the function value      <code>f</code> and its gradient <code>g</code>. In order to      allow the user complete control over these computations,      reverse communication is used. The routine must be called      repeatedly under the control of the member functions      <code>requests_f_and_g()</code>,      <code>requests_diag()</code>.      If neither requests_f_and_g() nor requests_diag() is      <code>true</code> the user should check for convergence      (using class traditional_convergence_test or any      other custom test). If the convergence test is negative,      the minimizer may be called again for the next iteration.      The steplength (stp()) is determined at each iteration      by means of the line search routine <code>mcsrch</code>, which is      a slight modification of the routine <code>CSRCH</code> written      by More' and Thuente.      The only variables that are machine-dependent are      <code>xtol</code>,      <code>stpmin</code> and      <code>stpmax</code>.      Fatal errors cause <code>error</code> exceptions to be thrown.      The generic class <code>error</code> is sub-classed (e.g.      class <code>error_line_search_failed</code>) to facilitate      granular %error handling.      A note on performance: Using Compaq Fortran V5.4 and      Compaq C++ V6.5, the C++ implementation is about 15% slower      than the Fortran implementation.   */  template <typename FloatType, typename SizeType = std::size_t>  class minimizer  {    public:      //! Default constructor. Some members are not initialized!      minimizer()      : n_(0), m_(0), maxfev_(0),        gtol_(0), xtol_(0),        stpmin_(0), stpmax_(0),        ispt(0), iypt(0)      {}      //! Constructor.      /*! @param n The number of variables in the minimization problem.             Restriction: <code>n &gt; 0</code>.          @param m The number of corrections used in the BFGS update.             Values of <code>m</code> less than 3 are not recommended;             large values of <code>m</code> will result in excessive             computing time. <code>3 &lt;= m &lt;= 7</code> is             recommended.             Restriction: <code>m &gt; 0</code>.          @param maxfev Termination occurs when the number of evaluations             of the objective function is at least <code>maxfev</code> by             the end of an iteration.          @param gtol Controls the accuracy of the line search.            If the function and gradient evaluations are inexpensive with            respect to the cost of the iteration (which is sometimes the            case when solving very large problems) it may be advantageous            to set <code>gtol</code> to a small value. A typical small            value is 0.1.            Restriction: <code>gtol</code> should be greater than 1e-4.          @param xtol An estimate of the machine precision (e.g. 10e-16            on a SUN station 3/60). The line search routine will            terminate if the relative width of the interval of            uncertainty is less than <code>xtol</code>.          @param stpmin Specifies the lower bound for the step            in the line search.            The default value is 1e-20. This value need not be modified            unless the exponent is too large for the machine being used,            or unless the problem is extremely badly scaled (in which            case the exponent should be increased).          @param stpmax specifies the upper bound for the step            in the line search.            The default value is 1e20. This value need not be modified            unless the exponent is too large for the machine being used,            or unless the problem is extremely badly scaled (in which            case the exponent should be increased).       */      explicit      minimizer(        SizeType n,        SizeType m = 5,        SizeType maxfev = 20,        FloatType gtol = FloatType(0.9),        FloatType xtol = FloatType(1.e-16),        FloatType stpmin = FloatType(1.e-20),        FloatType stpmax = FloatType(1.e20))        : n_(n), m_(m), maxfev_(maxfev),          gtol_(gtol), xtol_(xtol),          stpmin_(stpmin), stpmax_(stpmax),          iflag_(0), requests_f_and_g_(false), requests_diag_(false),          iter_(0), nfun_(0), stp_(0),          stp1(0), ftol(0.0001), ys(0), point(0), npt(0),          ispt(n+2*m), iypt((n+2*m)+n*m),          info(0), bound(0), nfev(0)      {        if (n_ == 0) {          throw error_improper_input_parameter("n = 0.");        }        if (m_ == 0) {          throw error_improper_input_parameter("m = 0.");        }        if (maxfev_ == 0) {         throw error_improper_input_parameter("maxfev = 0.");        }        if (gtol_ <= FloatType(1.e-4)) {          throw error_improper_input_parameter("gtol <= 1.e-4.");        }        if (xtol_ < FloatType(0)) {          throw error_improper_input_parameter("xtol < 0.");        }        if (stpmin_ < FloatType(0)) {          throw error_improper_input_parameter("stpmin < 0.");        }        if (stpmax_ < stpmin) {          throw error_improper_input_parameter("stpmax < stpmin");        }        w_.resize(n_*(2*m_+1)+2*m_);        scratch_array_.resize(n_);      }      //! Number of free parameters (as passed to the constructor).      SizeType n() const { return n_; }      //! Number of corrections kept (as passed to the constructor).      SizeType m() const { return m_; }      /*! \brief Maximum number of evaluations of the objective function          (as passed to the constructor).       */      SizeType maxfev() const { return maxfev_; }      /*! \brief Control of the accuracy of the line search.          (as passed to the constructor).       */      FloatType gtol() const { return gtol_; }      //! Estimate of the machine precision (as passed to the constructor).      FloatType xtol() const { return xtol_; }      /*! \brief Lower bound for the step in the line search.          (as passed to the constructor).       */      FloatType stpmin() const { return stpmin_; }      /*! \brief Upper bound for the step in the line search.          (as passed to the constructor).       */      FloatType stpmax() const { return stpmax_; }      //! Status indicator for reverse communication.      /*! <code>true</code> if the run() function returns to request          evaluation of the objective function (<code>f</code>) and          gradients (<code>g</code>) for the current point          (<code>x</code>). To continue the minimization the          run() function is called again with the updated values for          <code>f</code> and <code>g</code>.          <p>          See also: requests_diag()       */      bool requests_f_and_g() const { return requests_f_and_g_; }      //! Status indicator for reverse communication.      /*! <code>true</code> if the run() function returns to request          evaluation of the diagonal matrix (<code>diag</code>)          for the current point (<code>x</code>).          To continue the minimization the run() function is called          again with the updated values for <code>diag</code>.          <p>          See also: requests_f_and_g()       */      bool requests_diag() const { return requests_diag_; }      //! Number of iterations so far.      /*! Note that one iteration may involve multiple evaluations          of the objective function.          <p>          See also: nfun()       */      SizeType iter() const { return iter_; }      //! Total number of evaluations of the objective function so far.      /*! The total number of function evaluations increases by the          number of evaluations required for the line search. The total          is only increased after a successful line search.          <p>          See also: iter()
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -