basic_regex_parser.hpp

来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 2,075 行 · 第 1/5 页

HPP
2,075
字号
      fail(regex_constants::error_brace, this->m_position - this->m_base);      return false;   }   // get min:   v = this->m_traits.toi(m_position, m_end, 10);   // skip whitespace:   while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))      ++m_position;   if(v < 0)   {      fail(regex_constants::error_badbrace, this->m_position - this->m_base);      return false;   }   else if(this->m_position == this->m_end)   {      fail(regex_constants::error_brace, this->m_position - this->m_base);      return false;   }   min = v;   // see if we have a comma:   if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)   {      // move on and error check:      ++m_position;      // skip whitespace:      while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))         ++m_position;      if(this->m_position == this->m_end)      {         fail(regex_constants::error_brace, this->m_position - this->m_base);         return false;      }      // get the value if any:      v = this->m_traits.toi(m_position, m_end, 10);      max = (v >= 0) ? v : (std::numeric_limits<std::size_t>::max)();   }   else   {      // no comma, max = min:      max = min;   }   // skip whitespace:   while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))      ++m_position;   // OK now check trailing }:   if(this->m_position == this->m_end)   {      fail(regex_constants::error_brace, this->m_position - this->m_base);      return false;   }   if(isbasic)   {      if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)      {         ++m_position;         if(this->m_position == this->m_end)         {            fail(regex_constants::error_brace, this->m_position - this->m_base);            return false;         }      }      else      {         fail(regex_constants::error_badbrace, this->m_position - this->m_base);         return false;      }   }   if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)      ++m_position;   else   {      fail(regex_constants::error_badbrace, this->m_position - this->m_base);      return false;   }   //   // finally go and add the repeat, unless error:   //   if(min > max)   {      fail(regex_constants::error_badbrace, this->m_position - this->m_base);      return false;   }   return parse_repeat(min, max);}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_alt(){   //   // error check: if there have been no previous states,   // or if the last state was a '(' then error:   //   if((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))   {      fail(regex_constants::error_empty, this->m_position - this->m_base);      return false;   }   ++m_position;   //   // we need to append a trailing jump:    //   re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump));   std::ptrdiff_t jump_offset = this->getoffset(pj);   //   // now insert the alternative:   //   re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));   jump_offset += re_alt_size;   this->m_pdata->m_data.align();   palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);   //   // update m_alt_insert_point so that the next alternate gets   // inserted at the start of the second of the two we've just created:   //   this->m_alt_insert_point = this->m_pdata->m_data.size();   //   // the start of this alternative must have a case changes state   // if the current block has messed around with case changes:   //   if(m_has_case_change)   {      static_cast<re_case*>(         this->append_state(syntax_element_toggle_case, sizeof(re_case))         )->icase = this->m_icase;   }   //   // push the alternative onto our stack, a recursive   // implementation here is easier to understand (and faster   // as it happens), but causes all kinds of stack overflow problems   // on programs with small stacks (COM+).   //   m_alt_jumps.push_back(jump_offset);   return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_set(){   ++m_position;   if(m_position == m_end)   {      fail(regex_constants::error_brack, m_position - m_base);      return false;   }   basic_char_set<charT, traits> char_set;   const charT* base = m_position;  // where the '[' was   const charT* item_base = m_position;  // where the '[' or '^' was   while(m_position != m_end)   {      switch(this->m_traits.syntax_type(*m_position))      {      case regex_constants::syntax_caret:         if(m_position == base)         {            char_set.negate();            ++m_position;            item_base = m_position;         }         else            parse_set_literal(char_set);         break;      case regex_constants::syntax_close_set:         if(m_position == item_base)         {            parse_set_literal(char_set);            break;         }         else         {            ++m_position;            if(0 == this->append_set(char_set))            {               fail(regex_constants::error_range, m_position - m_base);               return false;            }         }         return true;      case regex_constants::syntax_open_set:         if(parse_inner_set(char_set))            break;         return true;      case regex_constants::syntax_escape:         {            //             // look ahead and see if this is a character class shortcut            // \d \w \s etc...            //            ++m_position;            if(this->m_traits.escape_syntax_type(*m_position)               == regex_constants::escape_type_class)            {               char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);               if(m != 0)               {                  char_set.add_class(m);                  ++m_position;                  break;               }            }            else if(this->m_traits.escape_syntax_type(*m_position)               == regex_constants::escape_type_not_class)            {               // negated character class:               char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);               if(m != 0)               {                  char_set.add_negated_class(m);                  ++m_position;                  break;               }            }            // not a character class, just a regular escape:            --m_position;            parse_set_literal(char_set);            break;         }      default:         parse_set_literal(char_set);         break;      }   }   return m_position != m_end;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set){   //   // we have either a character class [:name:]   // a collating element [.name.]   // or an equivalence class [=name=]   //   if(m_end == ++m_position)   {      fail(regex_constants::error_brack, m_position - m_base);      return false;   }   switch(this->m_traits.syntax_type(*m_position))   {   case regex_constants::syntax_dot:      //      // a collating element is treated as a literal:      //      --m_position;      parse_set_literal(char_set);      return true;   case regex_constants::syntax_colon:      {      // check that character classes are actually enabled:      if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))          == (regbase::basic_syntax_group  | regbase::no_char_classes))      {         --m_position;         parse_set_literal(char_set);         return true;      }      // skip the ':'      if(m_end == ++m_position)      {         fail(regex_constants::error_brack, m_position - m_base);         return false;      }      const charT* name_first = m_position;      // skip at least one character, then find the matching ':]'      if(m_end == ++m_position)      {         fail(regex_constants::error_brack, m_position - m_base);         return false;      }      while((m_position != m_end)          && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))          ++m_position;      const charT* name_last = m_position;      if(m_end == m_position)      {         fail(regex_constants::error_brack, m_position - m_base);         return false;      }      if((m_end == ++m_position)          || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))      {         fail(regex_constants::error_brack, m_position - m_base);         return false;      }      //      // check for negated class:      //      bool negated = false;      if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)      {         ++name_first;         negated = true;      }      typedef typename traits::char_class_type mask_type;      mask_type m = this->m_traits.lookup_classname(name_first, name_last);      if(m == 0)      {         if(char_set.empty() && (name_last - name_first == 1))         {            // maybe a special case:            ++m_position;            if( (m_position != m_end)                && (this->m_traits.syntax_type(*m_position)                   == regex_constants::syntax_close_set))            {               if(this->m_traits.escape_syntax_type(*name_first)                   == regex_constants::escape_type_left_word)               {                  ++m_position;                  this->append_state(syntax_element_word_start);                  return false;               }               if(this->m_traits.escape_syntax_type(*name_first)                   == regex_constants::escape_type_right_word)               {                  ++m_position;                  this->append_state(syntax_element_word_end);                  return false;               }            }         }         fail(regex_constants::error_ctype, name_first - m_base);         return false;      }      if(negated == false)         char_set.add_class(m);      else         char_set.add_negated_class(m);      ++m_position;      break;   }   case regex_constants::syntax_equal:      {      // skip the '='      if(m_end == ++m_position)      {         fail(regex_constants::error_brack, m_position - m_base);         return false;      }      const charT* name_first = m_position;      // skip at least one character, then find the matching '=]'      if(m_end == ++m_position)      {         fail(regex_constants::error_brack, m_position - m_base);         return false;      }      while((m_position != m_end)          && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))          ++m_position;      const charT* name_last = m_position;      if(m_end == m_position)      {         fail(regex_constants::error_brack, m_position - m_base);         return false;      }      if((m_end == ++m_position)          || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))      {         fail(regex_constants::error_brack, m_position - m_base);         return false;      }      string_type m = this->m_traits.lookup_collatename(name_first, name_last);      if((0 == m.size()) || (m.size() > 2))      {         fail(regex_constants::error_collate, name_first - m_base);         return false;      }      digraph<charT> d;      d.first = m[0];      if(m.size() > 1)         d.second = m[1];      else         d.second = 0;      char_set.add_equivalent(d);      ++m_position;      break;   }   default:      --m_position;      parse_set_literal(char_set);      break;   }   return true;}template <class charT, class traits>void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set){   digraph<charT> start_range(get_next_set_literal(char_set));   if(m_end == m_position)   {      fail(regex_constants::error_brack, m_position - m_base);      return;   }   if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)   {      // we have a range:      if(m_end == ++m_position)      {         fail(regex_constants::error_brack, m_position - m_base);         return;      }      if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)      {         digraph<charT> end_range = get_next_set_literal(char_set);         char_set.add_range(start_range, end_range);         if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)         {            if(m_end == ++m_position)            {               fail(regex_constants::error_brack, m_position - m_base);               return;            }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?