📄 basic_regex_parser.hpp

📁 C++的一个好库。。。现在很流行
💻 HPP
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/*
 *
 * Copyright (c) 2004
 * John Maddock
 *
 * Use, modification and distribution are subject to the
 * Boost Software License, Version 1.0. (See accompanying file
 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 *
 */

 /*
  *   LOCATION:    see http://www.boost.org for most recent version.
  *   FILE         basic_regex_parser.cpp
  *   VERSION      see <boost/version.hpp>
  *   DESCRIPTION: Declares template class basic_regex_parser.
  */

#ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
#define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP

#ifdef BOOST_HAS_ABI_HEADERS
#  include BOOST_ABI_PREFIX
#endif

namespace boost{
namespace re_detail{

#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable:4244)
#endif

template <class charT, class traits>
class basic_regex_parser : public basic_regex_creator<charT, traits>
{
public:
   basic_regex_parser(regex_data<charT, traits>* data);
   void parse(const charT* p1, const charT* p2, unsigned flags);
   void fail(regex_constants::error_type error_code, std::ptrdiff_t position);

   bool parse_all();
   bool parse_basic();
   bool parse_extended();
   bool parse_literal();
   bool parse_open_paren();
   bool parse_basic_escape();
   bool parse_extended_escape();
   bool parse_match_any();
   bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
   bool parse_repeat_range(bool isbasic);
   bool parse_alt();
   bool parse_set();
   bool parse_backref();
   void parse_set_literal(basic_char_set<charT, traits>& char_set);
   bool parse_inner_set(basic_char_set<charT, traits>& char_set);
   bool parse_QE();
   bool parse_perl_extension();
   bool add_emacs_code(bool negate);
   bool unwind_alts(std::ptrdiff_t last_paren_start);
   digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
   charT unescape_character();
   regex_constants::syntax_option_type parse_options();

private:
   typedef bool (basic_regex_parser::*parser_proc_type)();
   typedef typename traits::string_type string_type;
   typedef typename traits::char_class_type char_class_type;
   parser_proc_type           m_parser_proc;    // the main parser to use
   const charT*               m_base;           // the start of the string being parsed
   const charT*               m_end;            // the end of the string being parsed
   const charT*               m_position;       // our current parser position
   unsigned                   m_mark_count;     // how many sub-expressions we have
   std::ptrdiff_t             m_paren_start;    // where the last seen ')' began (where repeats are inserted).
   std::ptrdiff_t             m_alt_insert_point; // where to insert the next alternative
   bool                       m_has_case_change; // true if somewhere in the current block the case has changed
#if defined(BOOST_MSVC) && defined(_M_IX86)
   // This is an ugly warning suppression workaround (for warnings *inside* std::vector
   // that can not otherwise be suppressed)...
   BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
   std::vector<long>           m_alt_jumps;      // list of alternative in the current scope.
#else
   std::vector<std::ptrdiff_t> m_alt_jumps;      // list of alternative in the current scope.
#endif

   basic_regex_parser& operator=(const basic_regex_parser&);
   basic_regex_parser(const basic_regex_parser&);
};

template <class charT, class traits>
basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
   : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false)
{
}

template <class charT, class traits>
void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned flags)
{
   // pass flags on to base class:
   this->init(flags);
   // set up pointers:
   m_position = m_base = p1;
   m_end = p2;
   // empty strings are errors:
   if(p1 == p2)
   {
      fail(regex_constants::error_empty, 0);
      return;
   }
   // select which parser to use:
   switch(flags & regbase::main_option_type)
   {
   case regbase::perl_syntax_group:
      m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
      break;
   case regbase::basic_syntax_group:
      m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
      break;
   case regbase::literal:
      m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
      break;
   }

   // parse all our characters:
   bool result = parse_all();
   //
   // Unwind our alternatives:
   //
   unwind_alts(-1);
   // reset flags as a global scope (?imsx) may have altered them:
   this->flags(flags);
   // if we haven't gobbled up all the characters then we must
   // have had an unexpected ')' :
   if(!result)
   {
      fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_position));
      return;
   }
   // if an error has been set then give up now:
   if(this->m_pdata->m_status)
      return;
   // fill in our sub-expression count:
   this->m_pdata->m_mark_count = 1 + m_mark_count;
   this->finalize(p1, p2);
}

template <class charT, class traits>
void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
{
   if(0 == this->m_pdata->m_status) // update the error code if not already set
      this->m_pdata->m_status = error_code;
   m_position = m_end; // don't bother parsing anything else
   // get the error message:
   std::string message = this->m_pdata->m_ptraits->error_string(error_code);
   // and raise the exception, this will do nothing if exceptions are disabled:
#ifndef BOOST_NO_EXCEPTIONS
   if(0 == (this->flags() & regex_constants::no_except))
   {
      boost::regex_error e(message, error_code, position);
      e.raise();
   }
#else
   (void)position; // suppress warnings.
#endif
}

template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_all()
{
   bool result = true;
   while(result && (m_position != m_end))
   {
      result = (this->*m_parser_proc)();
   }
   return result;
}

#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable:4702)
#endif
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_basic()
{
   switch(this->m_traits.syntax_type(*m_position))
   {
   case regex_constants::syntax_escape:
      return parse_basic_escape();
   case regex_constants::syntax_dot:
      return parse_match_any();
   case regex_constants::syntax_caret:
      ++m_position;
      this->append_state(syntax_element_start_line);
      break;
   case regex_constants::syntax_dollar:
      ++m_position;
      this->append_state(syntax_element_end_line);
      break;
   case regex_constants::syntax_star:
      if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
         return parse_literal();
      else
      {
         ++m_position;
         return parse_repeat();
      }
   case regex_constants::syntax_plus:
      if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
         return parse_literal();
      else
      {
         ++m_position;
         return parse_repeat(1);
      }
   case regex_constants::syntax_question:
      if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
         return parse_literal();
      else
      {
         ++m_position;
         return parse_repeat(0, 1);
      }
   case regex_constants::syntax_open_set:
      return parse_set();
   default:
      return parse_literal();
   }
   return true;
}

template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_extended()
{
   bool result = true;
   switch(this->m_traits.syntax_type(*m_position))
   {
   case regex_constants::syntax_open_mark:
      return parse_open_paren();
   case regex_constants::syntax_close_mark:
      return false;
   case regex_constants::syntax_escape:
      return parse_extended_escape();
   case regex_constants::syntax_dot:
      return parse_match_any();
   case regex_constants::syntax_caret:
      ++m_position;
      this->append_state(
         (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
      break;
   case regex_constants::syntax_dollar:
      ++m_position;
      this->append_state(
         (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
      break;
   case regex_constants::syntax_star:
      if(m_position == this->m_base)
      {
         fail(regex_constants::error_badrepeat, 0);
         return false;
      }
      ++m_position;
      return parse_repeat();
   case regex_constants::syntax_question:
      if(m_position == this->m_base)
      {
         fail(regex_constants::error_badrepeat, 0);
         return false;
      }
      ++m_position;
      return parse_repeat(0,1);
   case regex_constants::syntax_plus:
      if(m_position == this->m_base)
      {
         fail(regex_constants::error_badrepeat, 0);
         return false;
      }
      ++m_position;
      return parse_repeat(1);
   case regex_constants::syntax_open_brace:
      ++m_position;
      return parse_repeat_range(false);
   case regex_constants::syntax_close_brace:
      fail(regex_constants::error_brace, this->m_position - this->m_end);
      return false;
   case regex_constants::syntax_or:
      return parse_alt();
   case regex_constants::syntax_open_set:
      return parse_set();
   case regex_constants::syntax_hash:
      //
      // If we have a mod_x flag set, then skip until
      // we get to a newline character:
      //
      if((this->flags()
         & (regbase::no_perl_ex|regbase::mod_x))
         == regbase::mod_x)
      {
         while((m_position != m_end) && !is_separator(*m_position++)){}
         return true;
      }
      // Otherwise fall through:
   default:
      result = parse_literal();
      break;
   }
   return result;
}
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif

template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_literal()
{
   // append this as a literal provided it's not a space character
   // or the perl option regbase::mod_x is not set:
   if(
      ((this->flags()
         & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
            != regbase::mod_x)
      || !this->m_traits.isctype(*m_position, this->m_mask_space))
         this->append_literal(*m_position);
   ++m_position;
   return true;
}

template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_open_paren()
{
   //
   // skip the '(' and error check:
   //
   if(++m_position == m_end)
   {
      fail(regex_constants::error_paren, m_position - m_base);
      return false;
   }
   //
   // begin by checking for a perl-style (?...) extension:
   //
   if(
         ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
         || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
     )
   {
      if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
         return parse_perl_extension();
   }
   //
   // update our mark count, and append the required state:
   //
   unsigned markid = 0;
   if(0 == (this->flags() & regbase::nosubs))
      markid = ++m_mark_count;
   re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
   pb->index = markid;
   std::ptrdiff_t last_paren_start = this->getoffset(pb);
   // back up insertion point for alternations, and set new point:
   std::ptrdiff_t last_alt_point = m_alt_insert_point;
   this->m_pdata->m_data.align();
   m_alt_insert_point = this->m_pdata->m_data.size();
   //
   // back up the current flags in case we have a nested (?imsx) group:
   //
   regex_constants::syntax_option_type opts = this->flags();
   bool old_case_change = m_has_case_change;
   m_has_case_change = false; // no changes to this scope as yet...
   //
   // now recursively add more states, this will terminate when we get to a
   // matching ')' :
   //
   parse_all();
   //
   // Unwind pushed alternatives:
   //
   if(0 == unwind_alts(last_paren_start))
      return false;
   //
   // restore flags:
   //
   if(m_has_case_change)
   {
      // the case has changed in one or more of the alternatives
      // within the scoped (...) block: we have to add a state
      // to reset the case sensitivity:
      static_cast<re_case*>(
         this->append_state(syntax_element_toggle_case, sizeof(re_case))
         )->icase = opts & regbase::icase;
   }
   this->flags(opts);
   m_has_case_change = old_case_change;
   //
   // we either have a ')' or we have run out of characters prematurely:
   //
   if(m_position == m_end)
   {
      this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
      return false;
   }
   BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
   ++m_position;
   //
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -