basic_regex_parser.hpp

来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 2,075 行 · 第 1/5 页

HPP
2,075
字号
/* * * Copyright (c) 2004 * John Maddock * * Use, modification and distribution are subject to the  * Boost Software License, Version 1.0. (See accompanying file  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) * */ /*  *   LOCATION:    see http://www.boost.org for most recent version.  *   FILE         basic_regex_parser.cpp  *   VERSION      see <boost/version.hpp>  *   DESCRIPTION: Declares template class basic_regex_parser.  */#ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP#define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP#ifdef BOOST_MSVC#pragma warning(push)#pragma warning(disable: 4103)#endif#ifdef BOOST_HAS_ABI_HEADERS#  include BOOST_ABI_PREFIX#endif#ifdef BOOST_MSVC#pragma warning(pop)#endifnamespace boost{namespace re_detail{#ifdef BOOST_MSVC#pragma warning(push)#pragma warning(disable:4244 4800)#endiftemplate <class charT, class traits>class basic_regex_parser : public basic_regex_creator<charT, traits>{public:   basic_regex_parser(regex_data<charT, traits>* data);   void parse(const charT* p1, const charT* p2, unsigned flags);   void fail(regex_constants::error_type error_code, std::ptrdiff_t position);   bool parse_all();   bool parse_basic();   bool parse_extended();   bool parse_literal();   bool parse_open_paren();   bool parse_basic_escape();   bool parse_extended_escape();   bool parse_match_any();   bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());   bool parse_repeat_range(bool isbasic);   bool parse_alt();   bool parse_set();   bool parse_backref();   void parse_set_literal(basic_char_set<charT, traits>& char_set);   bool parse_inner_set(basic_char_set<charT, traits>& char_set);   bool parse_QE();   bool parse_perl_extension();   bool add_emacs_code(bool negate);   bool unwind_alts(std::ptrdiff_t last_paren_start);   digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);   charT unescape_character();   regex_constants::syntax_option_type parse_options();private:   typedef bool (basic_regex_parser::*parser_proc_type)();   typedef typename traits::string_type string_type;   typedef typename traits::char_class_type char_class_type;   parser_proc_type           m_parser_proc;    // the main parser to use   const charT*               m_base;           // the start of the string being parsed   const charT*               m_end;            // the end of the string being parsed   const charT*               m_position;       // our current parser position   unsigned                   m_mark_count;     // how many sub-expressions we have   std::ptrdiff_t             m_paren_start;    // where the last seen ')' began (where repeats are inserted).   std::ptrdiff_t             m_alt_insert_point; // where to insert the next alternative   bool                       m_has_case_change; // true if somewhere in the current block the case has changed#if defined(BOOST_MSVC) && defined(_M_IX86)   // This is an ugly warning suppression workaround (for warnings *inside* std::vector   // that can not otherwise be suppressed)...   BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));   std::vector<long>           m_alt_jumps;      // list of alternative in the current scope.#else   std::vector<std::ptrdiff_t> m_alt_jumps;      // list of alternative in the current scope.#endif   basic_regex_parser& operator=(const basic_regex_parser&);   basic_regex_parser(const basic_regex_parser&);};template <class charT, class traits>basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)   : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false){}template <class charT, class traits>void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags){   // pass l_flags on to base class:   this->init(l_flags);   // set up pointers:   m_position = m_base = p1;   m_end = p2;   // empty strings are errors:   if(p1 == p2)   {      fail(regex_constants::error_empty, 0);      return;   }   // select which parser to use:   switch(l_flags & regbase::main_option_type)   {   case regbase::perl_syntax_group:      m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;      break;   case regbase::basic_syntax_group:      m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;      break;   case regbase::literal:      m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;      break;   }   // parse all our characters:   bool result = parse_all();   //   // Unwind our alternatives:   //   unwind_alts(-1);   // reset l_flags as a global scope (?imsx) may have altered them:   this->flags(l_flags);   // if we haven't gobbled up all the characters then we must   // have had an unexpected ')' :   if(!result)   {      fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_position));      return;   }   // if an error has been set then give up now:   if(this->m_pdata->m_status)      return;   // fill in our sub-expression count:   this->m_pdata->m_mark_count = 1 + m_mark_count;   this->finalize(p1, p2);}template <class charT, class traits>void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position){   if(0 == this->m_pdata->m_status) // update the error code if not already set      this->m_pdata->m_status = error_code;   m_position = m_end; // don't bother parsing anything else   // get the error message:   std::string message = this->m_pdata->m_ptraits->error_string(error_code);   // and raise the exception, this will do nothing if exceptions are disabled:#ifndef BOOST_NO_EXCEPTIONS   if(0 == (this->flags() & regex_constants::no_except))   {      boost::regex_error e(message, error_code, position);      e.raise();   }#else   (void)position; // suppress warnings.#endif}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_all(){   bool result = true;   while(result && (m_position != m_end))   {      result = (this->*m_parser_proc)();   }   return result;}#ifdef BOOST_MSVC#pragma warning(push)#pragma warning(disable:4702)#endiftemplate <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_basic(){   switch(this->m_traits.syntax_type(*m_position))   {   case regex_constants::syntax_escape:      return parse_basic_escape();   case regex_constants::syntax_dot:      return parse_match_any();   case regex_constants::syntax_caret:      ++m_position;      this->append_state(syntax_element_start_line);      break;   case regex_constants::syntax_dollar:      ++m_position;      this->append_state(syntax_element_end_line);      break;   case regex_constants::syntax_star:      if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))         return parse_literal();      else      {         ++m_position;         return parse_repeat();      }   case regex_constants::syntax_plus:      if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))         return parse_literal();      else      {         ++m_position;         return parse_repeat(1);      }   case regex_constants::syntax_question:      if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))         return parse_literal();      else      {         ++m_position;         return parse_repeat(0, 1);      }   case regex_constants::syntax_open_set:      return parse_set();   case regex_constants::syntax_newline:      if(this->flags() & regbase::newline_alt)         return parse_alt();      else         return parse_literal();   default:      return parse_literal();   }   return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_extended(){   bool result = true;   switch(this->m_traits.syntax_type(*m_position))   {   case regex_constants::syntax_open_mark:      return parse_open_paren();   case regex_constants::syntax_close_mark:      return false;   case regex_constants::syntax_escape:      return parse_extended_escape();   case regex_constants::syntax_dot:      return parse_match_any();   case regex_constants::syntax_caret:      ++m_position;      this->append_state(         (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));      break;   case regex_constants::syntax_dollar:      ++m_position;      this->append_state(         (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));      break;   case regex_constants::syntax_star:      if(m_position == this->m_base)      {         fail(regex_constants::error_badrepeat, 0);         return false;      }      ++m_position;      return parse_repeat();   case regex_constants::syntax_question:      if(m_position == this->m_base)      {         fail(regex_constants::error_badrepeat, 0);         return false;      }      ++m_position;      return parse_repeat(0,1);   case regex_constants::syntax_plus:      if(m_position == this->m_base)      {         fail(regex_constants::error_badrepeat, 0);         return false;      }      ++m_position;      return parse_repeat(1);   case regex_constants::syntax_open_brace:      ++m_position;      return parse_repeat_range(false);   case regex_constants::syntax_close_brace:      fail(regex_constants::error_brace, this->m_position - this->m_end);      return false;   case regex_constants::syntax_or:      return parse_alt();   case regex_constants::syntax_open_set:      return parse_set();   case regex_constants::syntax_newline:      if(this->flags() & regbase::newline_alt)         return parse_alt();      else         return parse_literal();   case regex_constants::syntax_hash:      //      // If we have a mod_x flag set, then skip until      // we get to a newline character:      //      if((this->flags()          & (regbase::no_perl_ex|regbase::mod_x))         == regbase::mod_x)      {         while((m_position != m_end) && !is_separator(*m_position++)){}         return true;      }      // Otherwise fall through:   default:      result = parse_literal();      break;   }   return result;}#ifdef BOOST_MSVC#pragma warning(pop)#endiftemplate <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_literal(){   // append this as a literal provided it's not a space character   // or the perl option regbase::mod_x is not set:   if(      ((this->flags()          & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))             != regbase::mod_x)      || !this->m_traits.isctype(*m_position, this->m_mask_space))         this->append_literal(*m_position);   ++m_position;   return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_open_paren(){   //   // skip the '(' and error check:   //   if(++m_position == m_end)   {      fail(regex_constants::error_paren, m_position - m_base);      return false;   }   //   // begin by checking for a perl-style (?...) extension:   //   if(         ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)         || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))     )   {      if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)         return parse_perl_extension();   }   //   // update our mark count, and append the required state:   //   unsigned markid = 0;   if(0 == (this->flags() & regbase::nosubs))      markid = ++m_mark_count;   re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));   pb->index = markid;   std::ptrdiff_t last_paren_start = this->getoffset(pb);   // back up insertion point for alternations, and set new point:   std::ptrdiff_t last_alt_point = m_alt_insert_point;   this->m_pdata->m_data.align();   m_alt_insert_point = this->m_pdata->m_data.size();   //   // back up the current flags in case we have a nested (?imsx) group:   //   regex_constants::syntax_option_type opts = this->flags();   bool old_case_change = m_has_case_change;   m_has_case_change = false; // no changes to this scope as yet...   //   // now recursively add more states, this will terminate when we get to a   // matching ')' :   //   parse_all();   //   // Unwind pushed alternatives:   //   if(0 == unwind_alts(last_paren_start))      return false;   //   // restore flags:   //   if(m_has_case_change)   {      // the case has changed in one or more of the alternatives      // within the scoped (...) block: we have to add a state      // to reset the case sensitivity:      static_cast<re_case*>(         this->append_state(syntax_element_toggle_case, sizeof(re_case))         )->icase = opts & regbase::icase;   }   this->flags(opts);   m_has_case_change = old_case_change;   //   // we either have a ')' or we have run out of characters prematurely:   //   if(m_position == m_end)   {      this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));      return false;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?