📄 basic_regex_parser.hpp

📁 C++的一个好库。。。现在很流行
💻 HPP
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
   if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
   {
      while((m_position != m_end)
         && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
      {}
      return true;
   }
   //
   // backup some state, and prepare the way:
   //
   int markid = 0;
   std::ptrdiff_t jump_offset = 0;
   re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
   std::ptrdiff_t last_paren_start = this->getoffset(pb);
   // back up insertion point for alternations, and set new point:
   std::ptrdiff_t last_alt_point = m_alt_insert_point;
   this->m_pdata->m_data.align();
   m_alt_insert_point = this->m_pdata->m_data.size();
   std::ptrdiff_t expected_alt_point = m_alt_insert_point;
   bool restore_flags = true;
   regex_constants::syntax_option_type old_flags = this->flags();
   bool old_case_change = m_has_case_change;
   m_has_case_change = false;
   //
   // select the actual extension used:
   //
   switch(this->m_traits.syntax_type(*m_position))
   {
   case regex_constants::syntax_colon:
      //
      // a non-capturing mark:
      //
      pb->index = markid = 0;
      ++m_position;
      break;
   case regex_constants::syntax_equal:
      pb->index = markid = -1;
      ++m_position;
      jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
      this->m_pdata->m_data.align();
      m_alt_insert_point = this->m_pdata->m_data.size();
      break;
   case regex_constants::syntax_not:
      pb->index = markid = -2;
      ++m_position;
      jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
      this->m_pdata->m_data.align();
      m_alt_insert_point = this->m_pdata->m_data.size();
      break;
   case regex_constants::escape_type_left_word:
      {
         // a lookbehind assertion:
         if(++m_position == m_end)
         {
            fail(regex_constants::error_badrepeat, m_position - m_base);
            return false;
         }
         regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
         if(t == regex_constants::syntax_not)
            pb->index = markid = -2;
         else if(t == regex_constants::syntax_equal)
            pb->index = markid = -1;
         else
         {
            fail(regex_constants::error_badrepeat, m_position - m_base);
            return false;
         }
         ++m_position;
         jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
         this->append_state(syntax_element_backstep, sizeof(re_brace));
         this->m_pdata->m_data.align();
         m_alt_insert_point = this->m_pdata->m_data.size();
         break;
      }
   case regex_constants::escape_type_right_word:
      //
      // an independent sub-expression:
      //
      pb->index = markid = -3;
      ++m_position;
      jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
      this->m_pdata->m_data.align();
      m_alt_insert_point = this->m_pdata->m_data.size();
      break;
   case regex_constants::syntax_open_mark:
      {
      // a conditional expression:
      pb->index = markid = -4;
      if(++m_position == m_end)
      {
         fail(regex_constants::error_badrepeat, m_position - m_base);
         return false;
      }
      int v = this->m_traits.toi(m_position, m_end, 10);
      if(v > 0)
      {
         re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
         br->index = v;
         if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
         {
            fail(regex_constants::error_badrepeat, m_position - m_base);
            return false;
         }
         if(++m_position == m_end)
         {
            fail(regex_constants::error_badrepeat, m_position - m_base);
            return false;
         }
      }
      else
      {
         // verify that we have a lookahead or lookbehind assert:
         if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
         {
            fail(regex_constants::error_badrepeat, m_position - m_base);
            return false;
         }
         if(++m_position == m_end)
         {
            fail(regex_constants::error_badrepeat, m_position - m_base);
            return false;
         }
         if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
         {
            if(++m_position == m_end)
            {
               fail(regex_constants::error_badrepeat, m_position - m_base);
               return false;
            }
            if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
               && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
            {
               fail(regex_constants::error_badrepeat, m_position - m_base);
               return false;
            }
            m_position -= 3;
         }
         else
         {
            if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
               && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
            {
               fail(regex_constants::error_badrepeat, m_position - m_base);
               return false;
            }
            m_position -= 2;
         }
      }
      break;
      }
   case regex_constants::syntax_close_mark:
      fail(regex_constants::error_badrepeat, m_position - m_base);
      return false;
   default:
      //
      // lets assume that we have a (?imsx) group and try and parse it:
      //
      regex_constants::syntax_option_type opts = parse_options();
      if(m_position == m_end)
         return false;
      // make a note of whether we have a case change:
      m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
      pb->index = markid = 0;
      if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
      {
         // update flags and carry on as normal:
         this->flags(opts);
         restore_flags = false;
         old_case_change |= m_has_case_change; // defer end of scope by one ')'
      }
      else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
      {
         // update flags and carry on until the matching ')' is found:
         this->flags(opts);
         ++m_position;
      }
      else
      {
         fail(regex_constants::error_badrepeat, m_position - m_base);
         return false;
      }

      // finally append a case change state if we need it:
      if(m_has_case_change)
      {
         static_cast<re_case*>(
            this->append_state(syntax_element_toggle_case, sizeof(re_case))
            )->icase = opts & regbase::icase;
      }

   }
   //
   // now recursively add more states, this will terminate when we get to a
   // matching ')' :
   //
   parse_all();
   //
   // Unwind alternatives:
   //
   if(0 == unwind_alts(last_paren_start))
      return false;
   //
   // we either have a ')' or we have run out of characters prematurely:
   //
   if(m_position == m_end)
   {
      this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
      return false;
   }
   BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
   ++m_position;
   //
   // restore the flags:
   //
   if(restore_flags)
   {
      // append a case change state if we need it:
      if(m_has_case_change)
      {
         static_cast<re_case*>(
            this->append_state(syntax_element_toggle_case, sizeof(re_case))
            )->icase = old_flags & regbase::icase;
      }
      this->flags(old_flags);
   }
   //
   // set up the jump pointer if we have one:
   //
   if(jump_offset)
   {
      this->m_pdata->m_data.align();
      re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
      jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
      if(this->m_last_state == jmp)
      {
         // Oops... we didn't have anything inside the assertion:
         fail(regex_constants::error_empty, m_position - m_base);
         return false;
      }
   }
   //
   // verify that if this is conditional expression, that we do have
   // an alternative, if not add one:
   //
   if(markid == -4)
   {
      re_syntax_base* b = this->getaddress(expected_alt_point);
      if(b->type != syntax_element_alt)
      {
         re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
         alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
      }
      else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
      {
         fail(regex_constants::error_bad_pattern, m_position - m_base);
         return false;
      }
   }
   //
   // append closing parenthesis state:
   //
   pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
   pb->index = markid;
   this->m_paren_start = last_paren_start;
   //
   // restore the alternate insertion point:
   //
   this->m_alt_insert_point = last_alt_point;
   //
   // and the case change data:
   //
   m_has_case_change = old_case_change;
   return true;
}

template <class charT, class traits>
bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
{
   //
   // parses an emacs style \sx or \Sx construct.
   //
   if(++m_position == m_end)
   {
      fail(regex_constants::error_escape, m_position - m_base);
      return false;
   }
   basic_char_set<charT, traits> char_set;
   if(negate)
      char_set.negate();

   static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };

   switch(*m_position)
   {
   case 's':
   case ' ':
      char_set.add_class(this->m_mask_space);
      break;
   case 'w':
      char_set.add_class(this->m_word_mask);
      break;
   case '_':
      char_set.add_single(digraph<charT>(charT('$')));
      char_set.add_single(digraph<charT>(charT('&')));
      char_set.add_single(digraph<charT>(charT('*')));
      char_set.add_single(digraph<charT>(charT('+')));
      char_set.add_single(digraph<charT>(charT('-')));
      char_set.add_single(digraph<charT>(charT('_')));
      char_set.add_single(digraph<charT>(charT('<')));
      char_set.add_single(digraph<charT>(charT('>')));
      break;
   case '.':
      char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
      break;
   case '(':
      char_set.add_single(digraph<charT>(charT('(')));
      char_set.add_single(digraph<charT>(charT('[')));
      char_set.add_single(digraph<charT>(charT('{')));
      break;
   case ')':
      char_set.add_single(digraph<charT>(charT(')')));
      char_set.add_single(digraph<charT>(charT(']')));
      char_set.add_single(digraph<charT>(charT('}')));
      break;
   case '"':
      char_set.add_single(digraph<charT>(charT('"')));
      char_set.add_single(digraph<charT>(charT('\'')));
      char_set.add_single(digraph<charT>(charT('`')));
      break;
   case '\'':
      char_set.add_single(digraph<charT>(charT('\'')));
      char_set.add_single(digraph<charT>(charT(',')));
      char_set.add_single(digraph<charT>(charT('#')));
      break;
   case '<':
      char_set.add_single(digraph<charT>(charT(';')));
      break;
   case '>':
      char_set.add_single(digraph<charT>(charT('\n')));
      char_set.add_single(digraph<charT>(charT('\f')));
      break;
   default:
      fail(regex_constants::error_ctype, m_position - m_base);
      return false;
   }
   if(0 == this->append_set(char_set))
   {
      fail(regex_constants::error_ctype, m_position - m_base);
      return false;
   }
   ++m_position;
   return true;
}

template <class charT, class traits>
regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
{
   // we have a (?imsx-imsx) group, convert it into a set of flags:
   regex_constants::syntax_option_type f = this->flags();
   bool breakout = false;
   do
   {
      switch(*m_position)
      {
      case 's':
         f |= regex_constants::mod_s;
         f &= ~regex_constants::no_mod_s;
         break;
      case 'm':
         f &= ~regex_constants::no_mod_m;
         break;
      case 'i':
         f |= regex_constants::icase;
         break;
      case 'x':
         f |= regex_constants::mod_x;
         break;
      default:
         breakout = true;
         continue;
      }
      if(++m_position == m_end)
      {
         fail(regex_constants::error_paren, m_position - m_base);
         return false;
      }
   }
   while(!breakout);

   if(*m_position == static_cast<charT>('-'))
   {
      if(++m_position == m_end)
      {
         fail(regex_constants::error_paren, m_position - m_base);
         return false;
      }
      do
      {
         swi
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -