📄 lbfgs.h

📁 最大熵等模型使用的 lbfgs 训练源代码。
💻 H
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
       */      SizeType nfun() const { return nfun_; }      //! Norm of gradient given gradient array of length n().      FloatType euclidean_norm(const FloatType* a) const {        return std::sqrt(detail::ddot(n_, a, a));      }      //! Current stepsize.      FloatType stp() const { return stp_; }      //! Execution of one step of the minimization.      /*! @param x On initial entry this must be set by the user to             the values of the initial estimate of the solution vector.          @param f Before initial entry or on re-entry under the             control of requests_f_and_g(), <code>f</code> must be set             by the user to contain the value of the objective function             at the current point <code>x</code>.          @param g Before initial entry or on re-entry under the             control of requests_f_and_g(), <code>g</code> must be set             by the user to contain the components of the gradient at             the current point <code>x</code>.          The return value is <code>true</code> if either          requests_f_and_g() or requests_diag() is <code>true</code>.          Otherwise the user should check for convergence          (e.g. using class traditional_convergence_test) and          call the run() function again to continue the minimization.          If the return value is <code>false</code> the user          should <b>not</b> update <code>f</code>, <code>g</code> or          <code>diag</code> (other overload) before calling          the run() function again.          Note that <code>x</code> is always modified by the run()          function. Depending on the situation it can therefore be          necessary to evaluate the objective function one more time          after the minimization is terminated.       */      bool run(        FloatType* x,        FloatType f,        const FloatType* g)      {        return generic_run(x, f, g, false, 0);      }      //! Execution of one step of the minimization.      /*! @param x See other overload.          @param f See other overload.          @param g See other overload.          @param diag On initial entry or on re-entry under the             control of requests_diag(), <code>diag</code> must be set by             the user to contain the values of the diagonal matrix Hk0.             The routine will return at each iteration of the algorithm             with requests_diag() set to <code>true</code>.             <p>             Restriction: all elements of <code>diag</code> must be             positive.       */      bool run(        FloatType* x,        FloatType f,        const FloatType* g,        const FloatType* diag)      {        return generic_run(x, f, g, true, diag);      }	  //.........//	  void write()	  {		  std::ofstream out;		  out.open("args.txt",std::ios::app,0);		  out<<"iterater:"<<iter_<<" stp_:"<<stp_<<std::endl;		  //out<<"n_:"<<n_<<" m_:"<<m_<<" gtol_:"<<gtol_<<" ispt:"<<ispt<<" iypt:"<<iypt<<" maxfev:"<<" ftol:"<<ftol<<" ispt:"<<ispt<<" iypt:"<<iypt<<" stp_:"<<stp_<<" stp1:"<stp1<<std::endl;		  std::vector<FloatType>::const_iterator wi=w_.begin();		  for(int i=0;wi!=w_.end();++i,++wi)		  {			  out<<"w_["<<i<<"]:"<<*wi<<" ";		  }		  out<<std::endl;		  std::vector<FloatType>::const_iterator sai=scratch_array_.begin();		  for (int i=0;sai!=scratch_array_.end();++i,++sai)		  {			  out<<"scratch_array_["<<i<<"]:"<<*sai<<" ";		  }		  out<<std::endl;		  out.close();	  }    protected:      static void throw_diagonal_element_not_positive(SizeType i) {        throw error_improper_input_data(          "The " + error::itoa(i) + ". diagonal element of the"          " inverse Hessian approximation is not positive.");      }      bool generic_run(        FloatType* x,        FloatType f,        const FloatType* g,        bool diagco,        const FloatType* diag);      detail::mcsrch<FloatType, SizeType> mcsrch_instance;      const SizeType n_;      const SizeType m_;      const SizeType maxfev_;      const FloatType gtol_;      const FloatType xtol_;      const FloatType stpmin_;      const FloatType stpmax_;      int iflag_;      bool requests_f_and_g_;      bool requests_diag_;      SizeType iter_;      SizeType nfun_;      FloatType stp_;      FloatType stp1;      FloatType ftol;      FloatType ys;      SizeType point;      SizeType npt;      const SizeType ispt;      const SizeType iypt;      int info;      SizeType bound;      SizeType nfev;      std::vector<FloatType> w_;      std::vector<FloatType> scratch_array_;  };  template <typename FloatType, typename SizeType>  bool minimizer<FloatType, SizeType>::generic_run(    FloatType* x,    FloatType f,    const FloatType* g,    bool diagco,    const FloatType* diag)  {    bool execute_entire_while_loop = false;    if (!(requests_f_and_g_ || requests_diag_)) {      execute_entire_while_loop = true;    }    requests_f_and_g_ = false;    requests_diag_ = false;    FloatType* w = &(*(w_.begin()));    if (iflag_ == 0) { // Initialize.      nfun_ = 1;      if (diagco) {        for (SizeType i = 0; i < n_; i++) {          if (diag[i] <= FloatType(0)) {            throw_diagonal_element_not_positive(i);          }        }      }      else {        std::fill_n(scratch_array_.begin(), n_, FloatType(1));		//unit matrix as H(0);H(0)=I        diag = &(*(scratch_array_.begin()));      }      for (SizeType i = 0; i < n_; i++) {		  //d(k)=-H(k)g(k)        w[ispt + i] = -g[i] * diag[i];      }      FloatType gnorm = std::sqrt(detail::ddot(n_, g, g));      if (gnorm == FloatType(0)) return false;      stp1 = FloatType(1) / gnorm;      execute_entire_while_loop = true;    }    if (execute_entire_while_loop) {      bound = iter_;      iter_++;      info = 0;      if (iter_ != 1) {        if (iter_ > m_) bound = m_;        ys = detail::ddot(          n_, w, iypt + npt, SizeType(1), w, ispt + npt, SizeType(1));        if (!diagco) {          FloatType yy = detail::ddot(            n_, w, iypt + npt, SizeType(1), w, iypt + npt, SizeType(1));          std::fill_n(scratch_array_.begin(), n_, ys / yy);          diag = &(*(scratch_array_.begin()));        }        else {          iflag_ = 2;          requests_diag_ = true;          return true;        }      }    }    if (execute_entire_while_loop || iflag_ == 2) {      if (iter_ != 1) {        if (diag == 0) {          throw error_internal_error(__FILE__, __LINE__);        }        if (diagco) {          for (SizeType i = 0; i < n_; i++) {            if (diag[i] <= FloatType(0)) {              throw_diagonal_element_not_positive(i);            }          }        }        SizeType cp = point;        if (point == 0) cp = m_;        w[n_ + cp -1] = 1 / ys;        SizeType i;        for (i = 0; i < n_; i++) {          w[i] = -g[i];        }        cp = point;        for (i = 0; i < bound; i++) {          if (cp == 0) cp = m_;          cp--;          FloatType sq = detail::ddot(            n_, w, ispt + cp * n_, SizeType(1), w, SizeType(0), SizeType(1));          SizeType inmc=n_+m_+cp;          SizeType iycn=iypt+cp*n_;          w[inmc] = w[n_ + cp] * sq;          detail::daxpy(n_, -w[inmc], w, iycn, w);        }        for (i = 0; i < n_; i++) {          w[i] *= diag[i];        }        for (i = 0; i < bound; i++) {          FloatType yr = detail::ddot(            n_, w, iypt + cp * n_, SizeType(1), w, SizeType(0), SizeType(1));          FloatType beta = w[n_ + cp] * yr;          SizeType inmc=n_+m_+cp;          beta = w[inmc] - beta;          SizeType iscn=ispt+cp*n_;          detail::daxpy(n_, beta, w, iscn, w);          cp++;          if (cp == m_) cp = 0;        }        std::copy(w, w+n_, w+(ispt + point * n_));      }      stp_ = FloatType(1);      if (iter_ == 1) stp_ = stp1;      std::copy(g, g+n_, w);    }	//.......	std::ofstream out;	out.open("args.txt",std::ios::app,0);    for(int i=0;i<4;++i)	{		out<<"x["<<i<<"]:"<<x[i]<<" ";	}	out<<std::endl;	out.close();	write();	//......    mcsrch_instance.run(      gtol_, stpmin_, stpmax_, n_, x, f, g, w, ispt + point * n_,      stp_, ftol, xtol_, maxfev_, info, nfev, &(*(scratch_array_.begin())));    if (info == -1) {      iflag_ = 1;      requests_f_and_g_ = true;      return true;    }    if (info != 1) {      throw error_internal_error(__FILE__, __LINE__);    }    nfun_ += nfev;    npt = point*n_;    for (SizeType i = 0; i < n_; i++) {      w[ispt + npt + i] = stp_ * w[ispt + npt + i];      w[iypt + npt + i] = g[i] - w[i];    }    point++;    if (point == m_) point = 0;    return false;  }  //! Traditional LBFGS convergence test.  /*! This convergence test is equivalent to the test embedded      in the <code>lbfgs.f</code> Fortran code. The test assumes that      there is a meaningful relation between the Euclidean norm of the      parameter vector <code>x</code> and the norm of the gradient      vector <code>g</code>. Therefore this test should not be used if      this assumption is not correct for a given problem.   */  template <typename FloatType, typename SizeType = std::size_t>  class traditional_convergence_test  {    public:      //! Default constructor.      traditional_convergence_test()      : n_(0), eps_(0)      {}      //! Constructor.      /*! @param n The number of variables in the minimization problem.             Restriction: <code>n &gt; 0</code>.          @param eps Determines the accuracy with which the solution            is to be found.       */      explicit      traditional_convergence_test(        SizeType n,        FloatType eps = FloatType(1.e-5))      : n_(n), eps_(eps)      {        if (n_ == 0) {          throw error_improper_input_parameter("n = 0.");        }        if (eps_ < FloatType(0)) {          throw error_improper_input_parameter("eps < 0.");        }      }      //! Number of free parameters (as passed to the constructor).      SizeType n() const { return n_; }      /*! \brief Accuracy with which the solution is to be found          (as passed to the constructor).       */      FloatType eps() const { return eps_; }      //! Execution of the convergence test for the given parameters.      /*! Returns <code>true</code> if          <pre>            ||g|| &lt; eps * max(1,||x||),          </pre>          where <code>||.||</code> denotes the Euclidean norm.          @param x Current solution vector.          @param g Components of the gradient at the current            point <code>x</code>.       */      bool      operator()(const FloatType* x, const FloatType* g) const      {        FloatType xnorm = std::sqrt(detail::ddot(n_, x, x));        FloatType gnorm = std::sqrt(detail::ddot(n_, g, g));        if (gnorm <= eps_ * std::max(FloatType(1), xnorm)) return true;        return false;      }    protected:      const SizeType n_;      const FloatType eps_;  };}} // namespace scitbx::lbfgs#endif // SCITBX_LBFGS_H
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -