⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 basic_regex_parser.hpp

📁 C++的一个好库。。。现在很流行
💻 HPP
📖 第 1 页 / 共 5 页
字号:
      return;
   }
   if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
   {
      // we have a range:
      if(m_end == ++m_position)
      {
         fail(regex_constants::error_brack, m_position - m_base);
         return;
      }
      if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
      {
         digraph<charT> end_range = get_next_set_literal(char_set);
         char_set.add_range(start_range, end_range);
         if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
         {
                           if(m_end == ++m_position)
                           {
                                   fail(regex_constants::error_brack, m_position - m_base);
                                   return;
                           }
                           if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
                           {
               // trailing - :
               --m_position;
               return;
                           }
            fail(regex_constants::error_range, m_position - m_base);
            return;
         }
         return;
      }
      --m_position;
   }
   char_set.add_single(start_range);
}

template <class charT, class traits>
digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
{
   typedef typename traits::string_type string_type;
   digraph<charT> result;
   switch(this->m_traits.syntax_type(*m_position))
   {
   case regex_constants::syntax_dash:
      if(!char_set.empty())
      {
         // see if we are at the end of the set:
         if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
         {
            fail(regex_constants::error_range, m_position - m_base);
            return result;
         }
         --m_position;
      }
      result.first = *m_position++;
      return result;
   case regex_constants::syntax_escape:
      // check to see if escapes are supported first:
      if(this->flags() & regex_constants::no_escape_in_lists)
      {
         result = *m_position++;
         break;
      }
      ++m_position;
      result = unescape_character();
      break;
   case regex_constants::syntax_open_set:
   {
      if(m_end == ++m_position)
      {
         fail(regex_constants::error_collate, m_position - m_base);
         return result;
      }
      if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
      {
         --m_position;
         result.first = *m_position;
         ++m_position;
         return result;
      }
      if(m_end == ++m_position)
      {
         fail(regex_constants::error_collate, m_position - m_base);
         return result;
      }
      const charT* name_first = m_position;
      // skip at least one character, then find the matching ':]'
      if(m_end == ++m_position)
      {
         fail(regex_constants::error_collate, name_first - m_base);
         return result;
      }
      while((m_position != m_end)
         && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
         ++m_position;
      const charT* name_last = m_position;
      if(m_end == m_position)
      {
         fail(regex_constants::error_collate, name_first - m_base);
         return result;
      }
      if((m_end == ++m_position)
         || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
      {
         fail(regex_constants::error_collate, name_first - m_base);
         return result;
      }
      ++m_position;
      string_type s = this->m_traits.lookup_collatename(name_first, name_last);
      if(s.empty() || (s.size() > 2))
      {
         fail(regex_constants::error_collate, name_first - m_base);
         return result;
      }
      result.first = s[0];
      if(s.size() > 1)
         result.second = s[1];
      else
         result.second = 0;
      return result;
   }
   default:
      result = *m_position++;
   }
   return result;
}

//
// does a value fit in the specified charT type?
//
template <class charT>
bool valid_value(charT, int v, const mpl::true_&)
{
   return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
}
template <class charT>
bool valid_value(charT, int, const mpl::false_&)
{
   return true; // v will alsways fit in a charT
}
template <class charT>
bool valid_value(charT c, int v)
{
   return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>());
}

template <class charT, class traits>
charT basic_regex_parser<charT, traits>::unescape_character()
{
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable:4127)
#endif
   charT result(0);
   if(m_position == m_end)
   {
      fail(regex_constants::error_escape, m_position - m_base);
      return false;
   }
   switch(this->m_traits.escape_syntax_type(*m_position))
   {
   case regex_constants::escape_type_control_a:
      result = charT('\a');
      break;
   case regex_constants::escape_type_e:
      result = charT(27);
      break;
   case regex_constants::escape_type_control_f:
      result = charT('\f');
      break;
   case regex_constants::escape_type_control_n:
      result = charT('\n');
      break;
   case regex_constants::escape_type_control_r:
      result = charT('\r');
      break;
   case regex_constants::escape_type_control_t:
      result = charT('\t');
      break;
   case regex_constants::escape_type_control_v:
      result = charT('\v');
      break;
   case regex_constants::escape_type_word_assert:
      result = charT('\b');
      break;
   case regex_constants::escape_type_ascii_control:
      ++m_position;
      if(m_position == m_end)
      {
         fail(regex_constants::error_escape, m_position - m_base);
         return result;
      }
      /*
      if((*m_position < charT('@'))
            || (*m_position > charT(125)) )
      {
         fail(regex_constants::error_escape, m_position - m_base);
         return result;
      }
      */
      result = static_cast<charT>(*m_position % 32);
      break;
   case regex_constants::escape_type_hex:
      ++m_position;
      if(m_position == m_end)
      {
         fail(regex_constants::error_escape, m_position - m_base);
         return result;
      }
      // maybe have \x{ddd}
      if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
      {
         ++m_position;
         if(m_position == m_end)
         {
            fail(regex_constants::error_escape, m_position - m_base);
            return result;
         }
         int i = this->m_traits.toi(m_position, m_end, 16);
         if((m_position == m_end)
            || (i < 0)
            || ((std::numeric_limits<charT>::is_specialized) && (charT(i) > (std::numeric_limits<charT>::max)()))
            || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
         {
            fail(regex_constants::error_badbrace, m_position - m_base);
            return result;
         }
         ++m_position;
         result = charT(i);
      }
      else
      {
         std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), m_end - m_position);
         int i = this->m_traits.toi(m_position, m_position + len, 16);
         if((i < 0)
            || !valid_value(charT(0), i))
         {
            fail(regex_constants::error_escape, m_position - m_base);
            return result;
         }
         result = charT(i);
      }
      return result;
   case regex_constants::syntax_digit:
      {
      // an octal escape sequence, the first character must be a zero
      // followed by up to 3 octal digits:
      std::ptrdiff_t len = (std::min)(::boost::re_detail::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
      const charT* bp = m_position;
      int val = this->m_traits.toi(bp, bp + 1, 8);
      if(val != 0)
      {
         // Oops not an octal escape after all:
         fail(regex_constants::error_escape, m_position - m_base);
         return result;
      }
      val = this->m_traits.toi(m_position, m_position + len, 8);
      if(val < 0)
      {
         fail(regex_constants::error_escape, m_position - m_base);
         return result;
      }
      return static_cast<charT>(val);
      }
   case regex_constants::escape_type_named_char:
      {
         ++m_position;
         if(m_position == m_end)
         {
            fail(regex_constants::error_escape, m_position - m_base);
            return false;
         }
         // maybe have \N{name}
         if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
         {
            const charT* base = m_position;
            // skip forward until we find enclosing brace:
            while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
               ++m_position;
            if(m_position == m_end)
            {
               fail(regex_constants::error_escape, m_position - m_base);
               return false;
            }
            string_type s = this->m_traits.lookup_collatename(++base, m_position++);
            if(s.empty())
            {
               fail(regex_constants::error_collate, m_position - m_base);
               return false;
            }
            if(s.size() == 1)
            {
               return s[0];
            }
         }
         // fall through is a failure:
         fail(regex_constants::error_escape, m_position - m_base);
         return false;
      }
   default:
      result = *m_position;
      break;
   }
   ++m_position;
   return result;
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif
}

template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_backref()
{
   BOOST_ASSERT(m_position != m_end);
   const charT* pc = m_position;
   int i = this->m_traits.toi(pc, pc + 1, 10);
   if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
   {
      // not a backref at all but an octal escape sequence:
      charT c = unescape_character();
      this->append_literal(c);
   }
   else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
   {
      m_position = pc;
      re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
      pb->index = i;
   }
   else
   {
      fail(regex_constants::error_backref, m_position - m_end);
      return false;
   }
   return true;
}

template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_QE()
{
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable:4127)
#endif
   //
   // parse a \Q...\E sequence:
   //
   ++m_position; // skip the Q
   const charT* start = m_position;
   const charT* end;
   do
   {
      while((m_position != m_end)
         && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
         ++m_position;
      if(m_position == m_end)
      {
         //  a \Q...\E sequence may terminate with the end of the expression:
         end = m_position;
         break;
      }
      if(++m_position == m_end) // skip the escape
      {
         fail(regex_constants::error_escape, m_position - m_base);
         return false;
      }
      // check to see if it's a \E:
      if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
      {
         ++m_position;
         end = m_position - 2;
         break;
      }
      // otherwise go round again:
   }while(true);
   //
   // now add all the character between the two escapes as literals:
   //
   while(start != end)
   {
      this->append_literal(*start);
      ++start;
   }
   return true;
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif
}

template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_perl_extension()
{
   if(++m_position == m_end)
   {
      fail(regex_constants::error_badrepeat, m_position - m_base);
      return false;
   }
   //
   // treat comments as a special case, as these
   // are the only ones that don't start with a leading
   // startmark state:
   //

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -