basic_regex_parser.hpp

来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 2,075 行 · 第 1/5 页

HPP
2,075
字号
   }   BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);   ++m_position;   //   // append closing parenthesis state:   //   pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));   pb->index = markid;   this->m_paren_start = last_paren_start;   //   // restore the alternate insertion point:   //   this->m_alt_insert_point = last_alt_point;   //   // allow backrefs to this mark:   //   if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))      this->m_backrefs |= 1u << (markid - 1);   return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_basic_escape(){   ++m_position;   bool result = true;   switch(this->m_traits.escape_syntax_type(*m_position))   {   case regex_constants::syntax_open_mark:      return parse_open_paren();   case regex_constants::syntax_close_mark:      return false;   case regex_constants::syntax_plus:      if(this->flags() & regex_constants::bk_plus_qm)      {         ++m_position;         return parse_repeat(1);      }      else         return parse_literal();   case regex_constants::syntax_question:      if(this->flags() & regex_constants::bk_plus_qm)      {         ++m_position;         return parse_repeat(0, 1);      }      else         return parse_literal();   case regex_constants::syntax_open_brace:      if(this->flags() & regbase::no_intervals)         return parse_literal();      ++m_position;      return parse_repeat_range(true);   case regex_constants::syntax_close_brace:      if(this->flags() & regbase::no_intervals)         return parse_literal();      fail(regex_constants::error_brace, this->m_position - this->m_base);      return false;   case regex_constants::syntax_or:      if(this->flags() & regbase::bk_vbar)         return parse_alt();      else         result = parse_literal();      break;   case regex_constants::syntax_digit:      return parse_backref();   case regex_constants::escape_type_start_buffer:      if(this->flags() & regbase::emacs_ex)      {         ++m_position;         this->append_state(syntax_element_buffer_start);      }      else         result = parse_literal();      break;   case regex_constants::escape_type_end_buffer:      if(this->flags() & regbase::emacs_ex)      {         ++m_position;         this->append_state(syntax_element_buffer_end);      }      else         result = parse_literal();      break;   case regex_constants::escape_type_word_assert:      if(this->flags() & regbase::emacs_ex)      {         ++m_position;         this->append_state(syntax_element_word_boundary);      }      else         result = parse_literal();      break;   case regex_constants::escape_type_not_word_assert:      if(this->flags() & regbase::emacs_ex)      {         ++m_position;         this->append_state(syntax_element_within_word);      }      else         result = parse_literal();      break;   case regex_constants::escape_type_left_word:      if(this->flags() & regbase::emacs_ex)      {         ++m_position;         this->append_state(syntax_element_word_start);      }      else         result = parse_literal();      break;   case regex_constants::escape_type_right_word:      if(this->flags() & regbase::emacs_ex)      {         ++m_position;         this->append_state(syntax_element_word_end);      }      else         result = parse_literal();      break;   default:      if(this->flags() & regbase::emacs_ex)      {         bool negate = true;         switch(*m_position)         {         case 'w':            negate = false;            // fall through:         case 'W':            {            basic_char_set<charT, traits> char_set;            if(negate)               char_set.negate();            char_set.add_class(this->m_word_mask);            if(0 == this->append_set(char_set))            {               fail(regex_constants::error_ctype, m_position - m_base);               return false;            }            ++m_position;            return true;            }         case 's':            negate = false;            // fall through:         case 'S':            return add_emacs_code(negate);         case 'c':         case 'C':            // not supported yet:            fail(regex_constants::error_escape, m_position - m_base);            return false;         default:            break;         }      }      result = parse_literal();      break;   }   return result;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_extended_escape(){   ++m_position;   bool negate = false; // in case this is a character class escape: \w \d etc   switch(this->m_traits.escape_syntax_type(*m_position))   {   case regex_constants::escape_type_not_class:      negate = true;      // fall through:   case regex_constants::escape_type_class:      {         typedef typename traits::char_class_type mask_type;         mask_type m = this->m_traits.lookup_classname(m_position, m_position+1);         if(m != 0)         {            basic_char_set<charT, traits> char_set;            if(negate)               char_set.negate();            char_set.add_class(m);            if(0 == this->append_set(char_set))            {               fail(regex_constants::error_ctype, m_position - m_base);               return false;            }            ++m_position;            return true;         }         //         // not a class, just a regular unknown escape:         //         this->append_literal(unescape_character());         break;      }   case regex_constants::syntax_digit:      return parse_backref();   case regex_constants::escape_type_left_word:      ++m_position;      this->append_state(syntax_element_word_start);      break;   case regex_constants::escape_type_right_word:      ++m_position;      this->append_state(syntax_element_word_end);      break;   case regex_constants::escape_type_start_buffer:      ++m_position;      this->append_state(syntax_element_buffer_start);      break;   case regex_constants::escape_type_end_buffer:      ++m_position;      this->append_state(syntax_element_buffer_end);      break;   case regex_constants::escape_type_word_assert:      ++m_position;      this->append_state(syntax_element_word_boundary);      break;   case regex_constants::escape_type_not_word_assert:      ++m_position;      this->append_state(syntax_element_within_word);      break;   case regex_constants::escape_type_Z:      ++m_position;      this->append_state(syntax_element_soft_buffer_end);      break;   case regex_constants::escape_type_Q:      return parse_QE();   case regex_constants::escape_type_C:      return parse_match_any();   case regex_constants::escape_type_X:      ++m_position;      this->append_state(syntax_element_combining);      break;   case regex_constants::escape_type_G:      ++m_position;      this->append_state(syntax_element_restart_continue);      break;   case regex_constants::escape_type_not_property:      negate = true;      // fall through:   case regex_constants::escape_type_property:      {         ++m_position;         char_class_type m;         if(m_position == m_end)         {            fail(regex_constants::error_escape, m_position - m_base);            return false;         }         // maybe have \p{ddd}         if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)         {            const charT* base = m_position;            // skip forward until we find enclosing brace:            while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))               ++m_position;            if(m_position == m_end)            {               fail(regex_constants::error_escape, m_position - m_base);               return false;            }            m = this->m_traits.lookup_classname(++base, m_position++);         }         else         {            m = this->m_traits.lookup_classname(m_position, m_position+1);            ++m_position;         }         if(m != 0)         {            basic_char_set<charT, traits> char_set;            if(negate)               char_set.negate();            char_set.add_class(m);            if(0 == this->append_set(char_set))            {               fail(regex_constants::error_ctype, m_position - m_base);               return false;            }            return true;         }         fail(regex_constants::error_ctype, m_position - m_base);      }   default:      this->append_literal(unescape_character());      break;   }   return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_match_any(){   //   // we have a '.' that can match any character:   //   ++m_position;   static_cast<re_dot*>(      this->append_state(syntax_element_wild, sizeof(re_dot))      )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s       ? re_detail::force_not_newline          : this->flags() & regbase::mod_s ?            re_detail::force_newline : re_detail::dont_care);   return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high){   bool greedy = true;   std::size_t insert_point;   //    // when we get to here we may have a non-greedy ? mark still to come:   //   if((m_position != m_end)       && (            (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))            || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))         )      )   {      // OK we have a perl regex, check for a '?':      if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)      {         greedy = false;         ++m_position;      }   }   if(0 == this->m_last_state)   {      fail(regex_constants::error_badrepeat, ::boost::re_detail::distance(m_base, m_position));      return false;   }   if(this->m_last_state->type == syntax_element_endmark)   {      // insert a repeat before the '(' matching the last ')':      insert_point = this->m_paren_start;   }   else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))   {      // the last state was a literal with more than one character, split it in two:      re_literal* lit = static_cast<re_literal*>(this->m_last_state);      charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];      --(lit->length);      // now append new state:      lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));      lit->length = 1;      (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;      insert_point = this->getoffset(this->m_last_state);   }   else   {      // repeat the last state whatever it was, need to add some error checking here:      switch(this->m_last_state->type)      {      case syntax_element_start_line:      case syntax_element_end_line:      case syntax_element_word_boundary:      case syntax_element_within_word:      case syntax_element_word_start:      case syntax_element_word_end:      case syntax_element_buffer_start:      case syntax_element_buffer_end:      case syntax_element_alt:      case syntax_element_soft_buffer_end:      case syntax_element_restart_continue:      case syntax_element_jump:      case syntax_element_startmark:      case syntax_element_backstep:         // can't legally repeat any of the above:         fail(regex_constants::error_badrepeat, m_position - m_base);         return false;      default:         // do nothing...         break;      }      insert_point = this->getoffset(this->m_last_state);   }   //   // OK we now know what to repeat, so insert the repeat around it:   //   re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));   rep->min = low;   rep->max = high;   rep->greedy = greedy;   rep->leading = false;   // store our repeater position for later:   std::ptrdiff_t rep_off = this->getoffset(rep);   // and append a back jump to the repeat:   re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));   jmp->alt.i = rep_off - this->getoffset(jmp);   this->m_pdata->m_data.align();   // now fill in the alt jump for the repeat:   rep = static_cast<re_repeat*>(this->getaddress(rep_off));   rep->alt.i = this->m_pdata->m_data.size() - rep_off;   return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic){   //   // parse a repeat-range:   //   std::size_t min, max;   int v;   // skip whitespace:   while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))      ++m_position;   // fail if at end:   if(this->m_position == this->m_end)   {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?