📄 basic_regex_parser.hpp

📁 C++的一个好库。。。现在很流行
💻 HPP
📖 第 1 页 / 共 5 页
字号:
   std::size_t min, max;
   int v;
   // skip whitespace:
   while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
      ++m_position;
   // fail if at end:
   if(this->m_position == this->m_end)
   {
      fail(regex_constants::error_brace, this->m_position - this->m_base);
      return false;
   }
   // get min:
   v = this->m_traits.toi(m_position, m_end, 10);
   // skip whitespace:
   while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
      ++m_position;
   if(v < 0)
   {
      fail(regex_constants::error_badbrace, this->m_position - this->m_base);
      return false;
   }
   else if(this->m_position == this->m_end)
   {
      fail(regex_constants::error_brace, this->m_position - this->m_base);
      return false;
   }
   min = v;
   // see if we have a comma:
   if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
   {
      // move on and error check:
      ++m_position;
      // skip whitespace:
      while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
         ++m_position;
      if(this->m_position == this->m_end)
      {
         fail(regex_constants::error_brace, this->m_position - this->m_base);
         return false;
      }
      // get the value if any:
      v = this->m_traits.toi(m_position, m_end, 10);
      max = (v >= 0) ? v : (std::numeric_limits<std::size_t>::max)();
   }
   else
   {
      // no comma, max = min:
      max = min;
   }
   // skip whitespace:
   while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
      ++m_position;
   // OK now check trailing }:
   if(this->m_position == this->m_end)
   {
      fail(regex_constants::error_brace, this->m_position - this->m_base);
      return false;
   }
   if(isbasic)
   {
      if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
      {
         ++m_position;
         if(this->m_position == this->m_end)
         {
            fail(regex_constants::error_brace, this->m_position - this->m_base);
            return false;
         }
      }
      else
      {
         fail(regex_constants::error_badbrace, this->m_position - this->m_base);
         return false;
      }
   }
   if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
      ++m_position;
   else
   {
      fail(regex_constants::error_badbrace, this->m_position - this->m_base);
      return false;
   }
   //
   // finally go and add the repeat, unless error:
   //
   if(min > max)
   {
      fail(regex_constants::error_range, this->m_position - this->m_base);
      return false;
   }
   return parse_repeat(min, max);
}

template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_alt()
{
   //
   // error check: if there have been no previous states,
   // or if the last state was a '(' then error:
   //
   if((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
   {
      fail(regex_constants::error_empty, this->m_position - this->m_base);
      return false;
   }
   ++m_position;
   //
   // we need to append a trailing jump:
   //
   re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump));
   std::ptrdiff_t jump_offset = this->getoffset(pj);
   //
   // now insert the alternative:
   //
   re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
   jump_offset += re_alt_size;
   this->m_pdata->m_data.align();
   palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
   //
   // update m_alt_insert_point so that the next alternate gets
   // inserted at the start of the second of the two we've just created:
   //
   this->m_alt_insert_point = this->m_pdata->m_data.size();
   //
   // the start of this alternative must have a case changes state
   // if the current block has messed around with case changes:
   //
   if(m_has_case_change)
   {
      static_cast<re_case*>(
         this->append_state(syntax_element_toggle_case, sizeof(re_case))
         )->icase = this->m_icase;
   }
   //
   // push the alternative onto our stack, a recursive
   // implementation here is easier to understand (and faster
   // as it happens), but causes all kinds of stack overflow problems
   // on programs with small stacks (COM+).
   //
   m_alt_jumps.push_back(jump_offset);
   return true;
}

template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_set()
{
   ++m_position;
   if(m_position == m_end)
   {
      fail(regex_constants::error_brack, m_position - m_base);
      return false;
   }
   basic_char_set<charT, traits> char_set;

   const charT* base = m_position;  // where the '[' was
   const charT* item_base = m_position;  // where the '[' or '^' was

   while(m_position != m_end)
   {
      switch(this->m_traits.syntax_type(*m_position))
      {
      case regex_constants::syntax_caret:
         if(m_position == base)
         {
            char_set.negate();
            ++m_position;
            item_base = m_position;
         }
         else
            parse_set_literal(char_set);
         break;
      case regex_constants::syntax_close_set:
         if(m_position == item_base)
         {
            parse_set_literal(char_set);
            break;
         }
         else
         {
            ++m_position;
            if(0 == this->append_set(char_set))
            {
               fail(regex_constants::error_range, m_position - m_base);
               return false;
            }
         }
         return true;
      case regex_constants::syntax_open_set:
         if(parse_inner_set(char_set))
            break;
         return true;
      case regex_constants::syntax_escape:
         {
            //
            // look ahead and see if this is a character class shortcut
            // \d \w \s etc...
            //
            ++m_position;
            if(this->m_traits.escape_syntax_type(*m_position)
               == regex_constants::escape_type_class)
            {
               char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
               if(m != 0)
               {
                  char_set.add_class(m);
                  ++m_position;
                  break;
               }
            }
            else if(this->m_traits.escape_syntax_type(*m_position)
               == regex_constants::escape_type_not_class)
            {
               // negated character class:
               char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
               if(m != 0)
               {
                  char_set.add_negated_class(m);
                  ++m_position;
                  break;
               }
            }
            // not a character class, just a regular escape:
            --m_position;
            parse_set_literal(char_set);
            break;
         }
      default:
         parse_set_literal(char_set);
         break;
      }
   }
   return m_position != m_end;
}

template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
{
   //
   // we have either a character class [:name:]
   // a collating element [.name.]
   // or an equivalence class [=name=]
   //
   if(m_end == ++m_position)
   {
      fail(regex_constants::error_brack, m_position - m_base);
      return false;
   }
   switch(this->m_traits.syntax_type(*m_position))
   {
   case regex_constants::syntax_dot:
      //
      // a collating element is treated as a literal:
      //
      --m_position;
      parse_set_literal(char_set);
      return true;
   case regex_constants::syntax_colon:
      {
      // check that character classes are actually enabled:
      if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
         == (regbase::basic_syntax_group  | regbase::no_char_classes))
      {
         --m_position;
         parse_set_literal(char_set);
         return true;
      }
      // skip the ':'
      if(m_end == ++m_position)
      {
         fail(regex_constants::error_brack, m_position - m_base);
         return false;
      }
      const charT* name_first = m_position;
      // skip at least one character, then find the matching ':]'
      if(m_end == ++m_position)
      {
         fail(regex_constants::error_brack, m_position - m_base);
         return false;
      }
      while((m_position != m_end)
         && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
         ++m_position;
      const charT* name_last = m_position;
      if(m_end == m_position)
      {
         fail(regex_constants::error_brack, m_position - m_base);
         return false;
      }
      if((m_end == ++m_position)
         || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
      {
         fail(regex_constants::error_brack, m_position - m_base);
         return false;
      }
      //
      // check for negated class:
      //
      bool negated = false;
      if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
      {
         ++name_first;
         negated = true;
      }
      typedef typename traits::char_class_type mask_type;
      mask_type m = this->m_traits.lookup_classname(name_first, name_last);
      if(m == 0)
      {
         if(char_set.empty() && (name_last - name_first == 1))
         {
            // maybe a special case:
            ++m_position;
            if( (m_position != m_end)
               && (this->m_traits.syntax_type(*m_position)
                  == regex_constants::syntax_close_set))
            {
               if(this->m_traits.escape_syntax_type(*name_first)
                  == regex_constants::escape_type_left_word)
               {
                  ++m_position;
                  this->append_state(syntax_element_word_start);
                  return false;
               }
               if(this->m_traits.escape_syntax_type(*name_first)
                  == regex_constants::escape_type_right_word)
               {
                  ++m_position;
                  this->append_state(syntax_element_word_end);
                  return false;
               }
            }
         }
         fail(regex_constants::error_ctype, name_first - m_base);
         return false;
      }
      if(negated == false)
         char_set.add_class(m);
      else
         char_set.add_negated_class(m);
      ++m_position;
      break;
   }
   case regex_constants::syntax_equal:
      {
      // skip the '='
      if(m_end == ++m_position)
      {
         fail(regex_constants::error_brack, m_position - m_base);
         return false;
      }
      const charT* name_first = m_position;
      // skip at least one character, then find the matching '=]'
      if(m_end == ++m_position)
      {
         fail(regex_constants::error_brack, m_position - m_base);
         return false;
      }
      while((m_position != m_end)
         && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
         ++m_position;
      const charT* name_last = m_position;
      if(m_end == m_position)
      {
         fail(regex_constants::error_brack, m_position - m_base);
         return false;
      }
      if((m_end == ++m_position)
         || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
      {
         fail(regex_constants::error_brack, m_position - m_base);
         return false;
      }
      string_type m = this->m_traits.lookup_collatename(name_first, name_last);
      if((0 == m.size()) || (m.size() > 2))
      {
         fail(regex_constants::error_collate, name_first - m_base);
         return false;
      }
      digraph<charT> d;
      d.first = m[0];
      if(m.size() > 1)
         d.second = m[1];
      else
         d.second = 0;
      char_set.add_equivalent(d);
      ++m_position;
      break;
   }
   default:
      --m_position;
      parse_set_literal(char_set);
      break;
   }
   return true;
}

template <class charT, class traits>
void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
{
   digraph<charT> start_range(get_next_set_literal(char_set));
   if(m_end == m_position)
   {
      fail(regex_constants::error_brack, m_position - m_base);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -