basic_regex_parser.hpp

来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 2,075 行 · 第 1/5 页

HPP
2,075
字号
            if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)            {               // trailing - :               --m_position;               return;            }            fail(regex_constants::error_range, m_position - m_base);            return;         }         return;      }      --m_position;   }   char_set.add_single(start_range);}template <class charT, class traits>digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set){   digraph<charT> result;   switch(this->m_traits.syntax_type(*m_position))   {   case regex_constants::syntax_dash:      if(!char_set.empty())      {         // see if we are at the end of the set:         if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))         {            fail(regex_constants::error_range, m_position - m_base);            return result;         }         --m_position;      }      result.first = *m_position++;      return result;   case regex_constants::syntax_escape:      // check to see if escapes are supported first:      if(this->flags() & regex_constants::no_escape_in_lists)      {         result = *m_position++;         break;      }      ++m_position;      result = unescape_character();      break;   case regex_constants::syntax_open_set:   {      if(m_end == ++m_position)      {         fail(regex_constants::error_collate, m_position - m_base);         return result;      }      if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)      {         --m_position;         result.first = *m_position;         ++m_position;         return result;      }      if(m_end == ++m_position)      {         fail(regex_constants::error_collate, m_position - m_base);         return result;      }      const charT* name_first = m_position;      // skip at least one character, then find the matching ':]'      if(m_end == ++m_position)      {         fail(regex_constants::error_collate, name_first - m_base);         return result;      }      while((m_position != m_end)          && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))          ++m_position;      const charT* name_last = m_position;      if(m_end == m_position)      {         fail(regex_constants::error_collate, name_first - m_base);         return result;      }      if((m_end == ++m_position)          || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))      {         fail(regex_constants::error_collate, name_first - m_base);         return result;      }      ++m_position;      string_type s = this->m_traits.lookup_collatename(name_first, name_last);      if(s.empty() || (s.size() > 2))      {         fail(regex_constants::error_collate, name_first - m_base);         return result;      }      result.first = s[0];      if(s.size() > 1)         result.second = s[1];      else         result.second = 0;      return result;   }   default:      result = *m_position++;   }   return result;}//// does a value fit in the specified charT type?//template <class charT>bool valid_value(charT, int v, const mpl::true_&){   return (v >> (sizeof(charT) * CHAR_BIT)) == 0;}template <class charT>bool valid_value(charT, int, const mpl::false_&){   return true; // v will alsways fit in a charT}template <class charT>bool valid_value(charT c, int v){   return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>());}template <class charT, class traits>charT basic_regex_parser<charT, traits>::unescape_character(){#ifdef BOOST_MSVC#pragma warning(push)#pragma warning(disable:4127)#endif   charT result(0);   if(m_position == m_end)   {      fail(regex_constants::error_escape, m_position - m_base);      return false;   }   switch(this->m_traits.escape_syntax_type(*m_position))   {   case regex_constants::escape_type_control_a:      result = charT('\a');      break;   case regex_constants::escape_type_e:      result = charT(27);      break;   case regex_constants::escape_type_control_f:      result = charT('\f');      break;   case regex_constants::escape_type_control_n:      result = charT('\n');      break;   case regex_constants::escape_type_control_r:      result = charT('\r');      break;   case regex_constants::escape_type_control_t:      result = charT('\t');      break;   case regex_constants::escape_type_control_v:      result = charT('\v');      break;   case regex_constants::escape_type_word_assert:      result = charT('\b');      break;   case regex_constants::escape_type_ascii_control:      ++m_position;      if(m_position == m_end)      {         fail(regex_constants::error_escape, m_position - m_base);         return result;      }      /*      if((*m_position < charT('@'))            || (*m_position > charT(125)) )      {         fail(regex_constants::error_escape, m_position - m_base);         return result;      }      */      result = static_cast<charT>(*m_position % 32);      break;   case regex_constants::escape_type_hex:      ++m_position;      if(m_position == m_end)      {         fail(regex_constants::error_escape, m_position - m_base);         return result;      }      // maybe have \x{ddd}      if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)      {         ++m_position;         if(m_position == m_end)         {            fail(regex_constants::error_escape, m_position - m_base);            return result;         }         int i = this->m_traits.toi(m_position, m_end, 16);         if((m_position == m_end)            || (i < 0)            || ((std::numeric_limits<charT>::is_specialized) && (charT(i) > (std::numeric_limits<charT>::max)()))            || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))         {            fail(regex_constants::error_badbrace, m_position - m_base);            return result;         }         ++m_position;         result = charT(i);      }      else      {         std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), m_end - m_position);         int i = this->m_traits.toi(m_position, m_position + len, 16);         if((i < 0)            || !valid_value(charT(0), i))         {            fail(regex_constants::error_escape, m_position - m_base);            return result;         }         result = charT(i);      }      return result;   case regex_constants::syntax_digit:      {      // an octal escape sequence, the first character must be a zero      // followed by up to 3 octal digits:      std::ptrdiff_t len = (std::min)(::boost::re_detail::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));      const charT* bp = m_position;      int val = this->m_traits.toi(bp, bp + 1, 8);      if(val != 0)      {         // Oops not an octal escape after all:         fail(regex_constants::error_escape, m_position - m_base);         return result;      }      val = this->m_traits.toi(m_position, m_position + len, 8);      if(val < 0)       {         fail(regex_constants::error_escape, m_position - m_base);         return result;      }      return static_cast<charT>(val);      }   case regex_constants::escape_type_named_char:      {         ++m_position;         if(m_position == m_end)         {            fail(regex_constants::error_escape, m_position - m_base);            return false;         }         // maybe have \N{name}         if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)         {            const charT* base = m_position;            // skip forward until we find enclosing brace:            while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))               ++m_position;            if(m_position == m_end)            {               fail(regex_constants::error_escape, m_position - m_base);               return false;            }            string_type s = this->m_traits.lookup_collatename(++base, m_position++);            if(s.empty())            {               fail(regex_constants::error_collate, m_position - m_base);               return false;            }            if(s.size() == 1)            {               return s[0];            }         }         // fall through is a failure:         fail(regex_constants::error_escape, m_position - m_base);         return false;      }   default:      result = *m_position;      break;   }   ++m_position;   return result;#ifdef BOOST_MSVC#pragma warning(pop)#endif}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_backref(){   BOOST_ASSERT(m_position != m_end);   const charT* pc = m_position;   int i = this->m_traits.toi(pc, pc + 1, 10);   if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))   {      // not a backref at all but an octal escape sequence:      charT c = unescape_character();      this->append_literal(c);   }   else if((i > 0) && (this->m_backrefs & (1u << (i-1))))   {      m_position = pc;      re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));      pb->index = i;   }   else   {      fail(regex_constants::error_backref, m_position - m_end);      return false;   }   return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_QE(){#ifdef BOOST_MSVC#pragma warning(push)#pragma warning(disable:4127)#endif   //   // parse a \Q...\E sequence:   //   ++m_position; // skip the Q   const charT* start = m_position;   const charT* end;   do   {      while((m_position != m_end)          && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))         ++m_position;      if(m_position == m_end)      {         //  a \Q...\E sequence may terminate with the end of the expression:         end = m_position;         break;        }      if(++m_position == m_end) // skip the escape      {         fail(regex_constants::error_escape, m_position - m_base);         return false;      }      // check to see if it's a \E:      if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)      {         ++m_position;         end = m_position - 2;         break;      }      // otherwise go round again:   }while(true);   //   // now add all the character between the two escapes as literals:   //   while(start != end)   {      this->append_literal(*start);      ++start;   }   return true;#ifdef BOOST_MSVC#pragma warning(pop)#endif}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_perl_extension(){   if(++m_position == m_end)   {      fail(regex_constants::error_badrepeat, m_position - m_base);      return false;   }   //   // treat comments as a special case, as these   // are the only ones that don't start with a leading   // startmark state:   //   if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)   {      while((m_position != m_end)          && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))      {}            return true;   }   //   // backup some state, and prepare the way:   //   int markid = 0;   std::ptrdiff_t jump_offset = 0;   re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));   std::ptrdiff_t last_paren_start = this->getoffset(pb);   // back up insertion point for alternations, and set new point:   std::ptrdiff_t last_alt_point = m_alt_insert_point;   this->m_pdata->m_data.align();   m_alt_insert_point = this->m_pdata->m_data.size();   std::ptrdiff_t expected_alt_point = m_alt_insert_point;   bool restore_flags = true;   regex_constants::syntax_option_type old_flags = this->flags();   bool old_case_change = m_has_case_change;   m_has_case_change = false;   //   // select the actual extension used:   //   switch(this->m_traits.syntax_type(*m_position))   {   case regex_constants::syntax_colon:      //      // a non-capturing mark:      //      pb->index = markid = 0;      ++m_position;      break;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?