basic_regex_parser.hpp

来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 2,075 行 · 第 1/5 页

HPP
2,075
字号
   case regex_constants::syntax_equal:      pb->index = markid = -1;      ++m_position;      jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));      this->m_pdata->m_data.align();      m_alt_insert_point = this->m_pdata->m_data.size();      break;   case regex_constants::syntax_not:      pb->index = markid = -2;      ++m_position;      jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));      this->m_pdata->m_data.align();      m_alt_insert_point = this->m_pdata->m_data.size();      break;   case regex_constants::escape_type_left_word:      {         // a lookbehind assertion:         if(++m_position == m_end)         {            fail(regex_constants::error_badrepeat, m_position - m_base);            return false;         }         regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);         if(t == regex_constants::syntax_not)            pb->index = markid = -2;         else if(t == regex_constants::syntax_equal)            pb->index = markid = -1;         else         {            fail(regex_constants::error_badrepeat, m_position - m_base);            return false;         }         ++m_position;         jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));         this->append_state(syntax_element_backstep, sizeof(re_brace));         this->m_pdata->m_data.align();         m_alt_insert_point = this->m_pdata->m_data.size();         break;      }   case regex_constants::escape_type_right_word:      //      // an independent sub-expression:      //      pb->index = markid = -3;      ++m_position;      jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));      this->m_pdata->m_data.align();      m_alt_insert_point = this->m_pdata->m_data.size();      break;   case regex_constants::syntax_open_mark:      {      // a conditional expression:      pb->index = markid = -4;      if(++m_position == m_end)      {         fail(regex_constants::error_badrepeat, m_position - m_base);         return false;      }      int v = this->m_traits.toi(m_position, m_end, 10);      if(v > 0)      {         re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));         br->index = v;         if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)         {            fail(regex_constants::error_badrepeat, m_position - m_base);            return false;         }         if(++m_position == m_end)         {            fail(regex_constants::error_badrepeat, m_position - m_base);            return false;         }      }      else      {         // verify that we have a lookahead or lookbehind assert:         if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)         {            fail(regex_constants::error_badrepeat, m_position - m_base);            return false;         }         if(++m_position == m_end)         {            fail(regex_constants::error_badrepeat, m_position - m_base);            return false;         }         if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)         {            if(++m_position == m_end)            {               fail(regex_constants::error_badrepeat, m_position - m_base);               return false;            }            if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)               && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))            {               fail(regex_constants::error_badrepeat, m_position - m_base);               return false;            }            m_position -= 3;         }         else         {            if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)               && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))            {               fail(regex_constants::error_badrepeat, m_position - m_base);               return false;            }            m_position -= 2;         }      }      break;      }   case regex_constants::syntax_close_mark:      fail(regex_constants::error_badrepeat, m_position - m_base);      return false;   default:      //      // lets assume that we have a (?imsx) group and try and parse it:      //      regex_constants::syntax_option_type opts = parse_options();      if(m_position == m_end)         return false;      // make a note of whether we have a case change:      m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));      pb->index = markid = 0;      if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)      {         // update flags and carry on as normal:         this->flags(opts);         restore_flags = false;         old_case_change |= m_has_case_change; // defer end of scope by one ')'      }      else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)      {         // update flags and carry on until the matching ')' is found:         this->flags(opts);         ++m_position;      }      else      {         fail(regex_constants::error_badrepeat, m_position - m_base);         return false;      }      // finally append a case change state if we need it:      if(m_has_case_change)      {         static_cast<re_case*>(            this->append_state(syntax_element_toggle_case, sizeof(re_case))            )->icase = opts & regbase::icase;      }   }   //   // now recursively add more states, this will terminate when we get to a   // matching ')' :   //   parse_all();   //   // Unwind alternatives:   //   if(0 == unwind_alts(last_paren_start))      return false;   //   // we either have a ')' or we have run out of characters prematurely:   //   if(m_position == m_end)   {      this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));      return false;   }   BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);   ++m_position;   //   // restore the flags:   //   if(restore_flags)   {      // append a case change state if we need it:      if(m_has_case_change)      {         static_cast<re_case*>(            this->append_state(syntax_element_toggle_case, sizeof(re_case))            )->icase = old_flags & regbase::icase;      }      this->flags(old_flags);   }   //   // set up the jump pointer if we have one:   //   if(jump_offset)   {      this->m_pdata->m_data.align();      re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));      jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);      if(this->m_last_state == jmp)      {         // Oops... we didn't have anything inside the assertion:         fail(regex_constants::error_empty, m_position - m_base);         return false;      }   }   //   // verify that if this is conditional expression, that we do have   // an alternative, if not add one:   //   if(markid == -4)   {      re_syntax_base* b = this->getaddress(expected_alt_point);      // Make sure we have exactly one alternative following this state:      if(b->type != syntax_element_alt)      {         re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));         alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);      }      else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)      {         fail(regex_constants::error_bad_pattern, m_position - m_base);         return false;      }      // check for invalid repetition of next state:      b = this->getaddress(expected_alt_point);      b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);      if((b->type != syntax_element_assert_backref)         && (b->type != syntax_element_startmark))      {         fail(regex_constants::error_badrepeat, m_position - m_base);         return false;      }   }   //   // append closing parenthesis state:   //   pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));   pb->index = markid;   this->m_paren_start = last_paren_start;   //   // restore the alternate insertion point:   //   this->m_alt_insert_point = last_alt_point;   //   // and the case change data:   //   m_has_case_change = old_case_change;   return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate){   //   // parses an emacs style \sx or \Sx construct.   //   if(++m_position == m_end)   {      fail(regex_constants::error_escape, m_position - m_base);      return false;   }   basic_char_set<charT, traits> char_set;   if(negate)      char_set.negate();   static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };   switch(*m_position)   {   case 's':   case ' ':      char_set.add_class(this->m_mask_space);      break;   case 'w':      char_set.add_class(this->m_word_mask);      break;   case '_':      char_set.add_single(digraph<charT>(charT('$')));       char_set.add_single(digraph<charT>(charT('&')));       char_set.add_single(digraph<charT>(charT('*')));       char_set.add_single(digraph<charT>(charT('+')));       char_set.add_single(digraph<charT>(charT('-')));       char_set.add_single(digraph<charT>(charT('_')));       char_set.add_single(digraph<charT>(charT('<')));       char_set.add_single(digraph<charT>(charT('>')));       break;   case '.':      char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));      break;   case '(':      char_set.add_single(digraph<charT>(charT('(')));       char_set.add_single(digraph<charT>(charT('[')));       char_set.add_single(digraph<charT>(charT('{')));       break;   case ')':      char_set.add_single(digraph<charT>(charT(')')));       char_set.add_single(digraph<charT>(charT(']')));       char_set.add_single(digraph<charT>(charT('}')));       break;   case '"':      char_set.add_single(digraph<charT>(charT('"')));       char_set.add_single(digraph<charT>(charT('\'')));       char_set.add_single(digraph<charT>(charT('`')));       break;   case '\'':      char_set.add_single(digraph<charT>(charT('\'')));       char_set.add_single(digraph<charT>(charT(',')));       char_set.add_single(digraph<charT>(charT('#')));       break;   case '<':      char_set.add_single(digraph<charT>(charT(';')));       break;   case '>':      char_set.add_single(digraph<charT>(charT('\n')));       char_set.add_single(digraph<charT>(charT('\f')));       break;   default:      fail(regex_constants::error_ctype, m_position - m_base);      return false;   }   if(0 == this->append_set(char_set))   {      fail(regex_constants::error_ctype, m_position - m_base);      return false;   }   ++m_position;   return true;}template <class charT, class traits>regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options(){   // we have a (?imsx-imsx) group, convert it into a set of flags:   regex_constants::syntax_option_type f = this->flags();   bool breakout = false;   do   {      switch(*m_position)      {      case 's':         f |= regex_constants::mod_s;         f &= ~regex_constants::no_mod_s;         break;      case 'm':         f &= ~regex_constants::no_mod_m;         break;      case 'i':         f |= regex_constants::icase;         break;      case 'x':         f |= regex_constants::mod_x;         break;      default:         breakout = true;         continue;      }      if(++m_position == m_end)      {         fail(regex_constants::error_paren, m_position - m_base);         return false;      }   }   while(!breakout);   if(*m_position == static_cast<charT>('-'))   {      if(++m_position == m_end)      {         fail(regex_constants::error_paren, m_position - m_base);         return false;      }      do      {         switch(*m_position)         {         case 's':            f &= ~regex_constants::mod_s;            f |= regex_constants::no_mod_s;            break;         case 'm':            f |= regex_constants::no_mod_m;            break;         case 'i':            f &= ~regex_constants::icase;            break;         case 'x':            f &= ~regex_constants::mod_x;            break;         default:            breakout = true;            continue;         }         if(++m_position == m_end)         {            fail(regex_constants::error_paren, m_position - m_base);            return false;         }      }      while(!breakout);   }   return f;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start){   //   // If we didn't actually add any states after the last    // alternative then that's an error:   //   if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))      && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start))   {      fail(regex_constants::error_empty, this->m_position - this->m_base);      return false

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?