📄 basic_regex_parser.hpp
字号:
return;
}
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
{
// we have a range:
if(m_end == ++m_position)
{
fail(regex_constants::error_brack, m_position - m_base);
return;
}
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
{
digraph<charT> end_range = get_next_set_literal(char_set);
char_set.add_range(start_range, end_range);
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
{
if(m_end == ++m_position)
{
fail(regex_constants::error_brack, m_position - m_base);
return;
}
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
{
// trailing - :
--m_position;
return;
}
fail(regex_constants::error_range, m_position - m_base);
return;
}
return;
}
--m_position;
}
char_set.add_single(start_range);
}
template <class charT, class traits>
digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
{
typedef typename traits::string_type string_type;
digraph<charT> result;
switch(this->m_traits.syntax_type(*m_position))
{
case regex_constants::syntax_dash:
if(!char_set.empty())
{
// see if we are at the end of the set:
if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
{
fail(regex_constants::error_range, m_position - m_base);
return result;
}
--m_position;
}
result.first = *m_position++;
return result;
case regex_constants::syntax_escape:
// check to see if escapes are supported first:
if(this->flags() & regex_constants::no_escape_in_lists)
{
result = *m_position++;
break;
}
++m_position;
result = unescape_character();
break;
case regex_constants::syntax_open_set:
{
if(m_end == ++m_position)
{
fail(regex_constants::error_collate, m_position - m_base);
return result;
}
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
{
--m_position;
result.first = *m_position;
++m_position;
return result;
}
if(m_end == ++m_position)
{
fail(regex_constants::error_collate, m_position - m_base);
return result;
}
const charT* name_first = m_position;
// skip at least one character, then find the matching ':]'
if(m_end == ++m_position)
{
fail(regex_constants::error_collate, name_first - m_base);
return result;
}
while((m_position != m_end)
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
++m_position;
const charT* name_last = m_position;
if(m_end == m_position)
{
fail(regex_constants::error_collate, name_first - m_base);
return result;
}
if((m_end == ++m_position)
|| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
{
fail(regex_constants::error_collate, name_first - m_base);
return result;
}
++m_position;
string_type s = this->m_traits.lookup_collatename(name_first, name_last);
if(s.empty() || (s.size() > 2))
{
fail(regex_constants::error_collate, name_first - m_base);
return result;
}
result.first = s[0];
if(s.size() > 1)
result.second = s[1];
else
result.second = 0;
return result;
}
default:
result = *m_position++;
}
return result;
}
//
// does a value fit in the specified charT type?
//
template <class charT>
bool valid_value(charT, int v, const mpl::true_&)
{
return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
}
template <class charT>
bool valid_value(charT, int, const mpl::false_&)
{
return true; // v will alsways fit in a charT
}
template <class charT>
bool valid_value(charT c, int v)
{
return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>());
}
template <class charT, class traits>
charT basic_regex_parser<charT, traits>::unescape_character()
{
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable:4127)
#endif
charT result(0);
if(m_position == m_end)
{
fail(regex_constants::error_escape, m_position - m_base);
return false;
}
switch(this->m_traits.escape_syntax_type(*m_position))
{
case regex_constants::escape_type_control_a:
result = charT('\a');
break;
case regex_constants::escape_type_e:
result = charT(27);
break;
case regex_constants::escape_type_control_f:
result = charT('\f');
break;
case regex_constants::escape_type_control_n:
result = charT('\n');
break;
case regex_constants::escape_type_control_r:
result = charT('\r');
break;
case regex_constants::escape_type_control_t:
result = charT('\t');
break;
case regex_constants::escape_type_control_v:
result = charT('\v');
break;
case regex_constants::escape_type_word_assert:
result = charT('\b');
break;
case regex_constants::escape_type_ascii_control:
++m_position;
if(m_position == m_end)
{
fail(regex_constants::error_escape, m_position - m_base);
return result;
}
/*
if((*m_position < charT('@'))
|| (*m_position > charT(125)) )
{
fail(regex_constants::error_escape, m_position - m_base);
return result;
}
*/
result = static_cast<charT>(*m_position % 32);
break;
case regex_constants::escape_type_hex:
++m_position;
if(m_position == m_end)
{
fail(regex_constants::error_escape, m_position - m_base);
return result;
}
// maybe have \x{ddd}
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
{
++m_position;
if(m_position == m_end)
{
fail(regex_constants::error_escape, m_position - m_base);
return result;
}
int i = this->m_traits.toi(m_position, m_end, 16);
if((m_position == m_end)
|| (i < 0)
|| ((std::numeric_limits<charT>::is_specialized) && (charT(i) > (std::numeric_limits<charT>::max)()))
|| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
{
fail(regex_constants::error_badbrace, m_position - m_base);
return result;
}
++m_position;
result = charT(i);
}
else
{
std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), m_end - m_position);
int i = this->m_traits.toi(m_position, m_position + len, 16);
if((i < 0)
|| !valid_value(charT(0), i))
{
fail(regex_constants::error_escape, m_position - m_base);
return result;
}
result = charT(i);
}
return result;
case regex_constants::syntax_digit:
{
// an octal escape sequence, the first character must be a zero
// followed by up to 3 octal digits:
std::ptrdiff_t len = (std::min)(::boost::re_detail::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
const charT* bp = m_position;
int val = this->m_traits.toi(bp, bp + 1, 8);
if(val != 0)
{
// Oops not an octal escape after all:
fail(regex_constants::error_escape, m_position - m_base);
return result;
}
val = this->m_traits.toi(m_position, m_position + len, 8);
if(val < 0)
{
fail(regex_constants::error_escape, m_position - m_base);
return result;
}
return static_cast<charT>(val);
}
case regex_constants::escape_type_named_char:
{
++m_position;
if(m_position == m_end)
{
fail(regex_constants::error_escape, m_position - m_base);
return false;
}
// maybe have \N{name}
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
{
const charT* base = m_position;
// skip forward until we find enclosing brace:
while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
++m_position;
if(m_position == m_end)
{
fail(regex_constants::error_escape, m_position - m_base);
return false;
}
string_type s = this->m_traits.lookup_collatename(++base, m_position++);
if(s.empty())
{
fail(regex_constants::error_collate, m_position - m_base);
return false;
}
if(s.size() == 1)
{
return s[0];
}
}
// fall through is a failure:
fail(regex_constants::error_escape, m_position - m_base);
return false;
}
default:
result = *m_position;
break;
}
++m_position;
return result;
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_backref()
{
BOOST_ASSERT(m_position != m_end);
const charT* pc = m_position;
int i = this->m_traits.toi(pc, pc + 1, 10);
if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
{
// not a backref at all but an octal escape sequence:
charT c = unescape_character();
this->append_literal(c);
}
else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
{
m_position = pc;
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
pb->index = i;
}
else
{
fail(regex_constants::error_backref, m_position - m_end);
return false;
}
return true;
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_QE()
{
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable:4127)
#endif
//
// parse a \Q...\E sequence:
//
++m_position; // skip the Q
const charT* start = m_position;
const charT* end;
do
{
while((m_position != m_end)
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
++m_position;
if(m_position == m_end)
{
// a \Q...\E sequence may terminate with the end of the expression:
end = m_position;
break;
}
if(++m_position == m_end) // skip the escape
{
fail(regex_constants::error_escape, m_position - m_base);
return false;
}
// check to see if it's a \E:
if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
{
++m_position;
end = m_position - 2;
break;
}
// otherwise go round again:
}while(true);
//
// now add all the character between the two escapes as literals:
//
while(start != end)
{
this->append_literal(*start);
++start;
}
return true;
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_perl_extension()
{
if(++m_position == m_end)
{
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
}
//
// treat comments as a special case, as these
// are the only ones that don't start with a leading
// startmark state:
//
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -