basic_regex_parser.hpp
来自「support vector clustering for vc++」· HPP 代码 · 共 2,007 行 · 第 1/5 页
HPP
2,007 行
// we either have a ')' or we have run out of characters prematurely:
//
if(m_position == m_end)
{
this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
return false;
}
BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
++m_position;
//
// append closing parenthesis state:
//
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
pb->index = markid;
this->m_paren_start = last_paren_start;
//
// restore the alternate insertion point:
//
this->m_alt_insert_point = last_alt_point;
//
// allow backrefs to this mark:
//
if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
this->m_backrefs |= 1u << (markid - 1);
return true;
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_basic_escape()
{
++m_position;
bool result = true;
switch(this->m_traits.escape_syntax_type(*m_position))
{
case regex_constants::syntax_open_mark:
return parse_open_paren();
case regex_constants::syntax_close_mark:
return false;
case regex_constants::syntax_plus:
if(this->flags() & regex_constants::bk_plus_qm)
{
++m_position;
return parse_repeat(1);
}
else
return parse_literal();
case regex_constants::syntax_question:
if(this->flags() & regex_constants::bk_plus_qm)
{
++m_position;
return parse_repeat(0, 1);
}
else
return parse_literal();
case regex_constants::syntax_open_brace:
if(this->flags() & regbase::no_intervals)
return parse_literal();
++m_position;
return parse_repeat_range(true);
case regex_constants::syntax_close_brace:
if(this->flags() & regbase::no_intervals)
return parse_literal();
fail(regex_constants::error_brace, this->m_position - this->m_base);
return false;
case regex_constants::syntax_or:
if(this->flags() & regbase::bk_vbar)
return parse_alt();
else
result = parse_literal();
break;
case regex_constants::syntax_digit:
return parse_backref();
case regex_constants::escape_type_start_buffer:
if(this->flags() & regbase::emacs_ex)
{
++m_position;
this->append_state(syntax_element_buffer_start);
}
else
result = parse_literal();
break;
case regex_constants::escape_type_end_buffer:
if(this->flags() & regbase::emacs_ex)
{
++m_position;
this->append_state(syntax_element_buffer_end);
}
else
result = parse_literal();
break;
case regex_constants::escape_type_word_assert:
if(this->flags() & regbase::emacs_ex)
{
++m_position;
this->append_state(syntax_element_word_boundary);
}
else
result = parse_literal();
break;
case regex_constants::escape_type_not_word_assert:
if(this->flags() & regbase::emacs_ex)
{
++m_position;
this->append_state(syntax_element_within_word);
}
else
result = parse_literal();
break;
case regex_constants::escape_type_left_word:
if(this->flags() & regbase::emacs_ex)
{
++m_position;
this->append_state(syntax_element_word_start);
}
else
result = parse_literal();
break;
case regex_constants::escape_type_right_word:
if(this->flags() & regbase::emacs_ex)
{
++m_position;
this->append_state(syntax_element_word_end);
}
else
result = parse_literal();
break;
default:
if(this->flags() & regbase::emacs_ex)
{
bool negate = true;
switch(*m_position)
{
case 'w':
negate = false;
// fall through:
case 'W':
{
basic_char_set<charT, traits> char_set;
if(negate)
char_set.negate();
char_set.add_class(this->m_word_mask);
if(0 == this->append_set(char_set))
{
fail(regex_constants::error_ctype, m_position - m_base);
return false;
}
++m_position;
return true;
}
case 's':
negate = false;
// fall through:
case 'S':
return add_emacs_code(negate);
case 'c':
case 'C':
// not supported yet:
fail(regex_constants::error_escape, m_position - m_base);
return false;
default:
break;
}
}
result = parse_literal();
break;
}
return result;
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_extended_escape()
{
++m_position;
bool negate = false; // in case this is a character class escape: \w \d etc
switch(this->m_traits.escape_syntax_type(*m_position))
{
case regex_constants::escape_type_not_class:
negate = true;
// fall through:
case regex_constants::escape_type_class:
{
typedef typename traits::char_class_type mask_type;
mask_type m = this->m_traits.lookup_classname(m_position, m_position+1);
if(m != 0)
{
basic_char_set<charT, traits> char_set;
if(negate)
char_set.negate();
char_set.add_class(m);
if(0 == this->append_set(char_set))
{
fail(regex_constants::error_ctype, m_position - m_base);
return false;
}
++m_position;
return true;
}
//
// not a class, just a regular unknown escape:
//
this->append_literal(unescape_character());
break;
}
case regex_constants::syntax_digit:
return parse_backref();
case regex_constants::escape_type_left_word:
++m_position;
this->append_state(syntax_element_word_start);
break;
case regex_constants::escape_type_right_word:
++m_position;
this->append_state(syntax_element_word_end);
break;
case regex_constants::escape_type_start_buffer:
++m_position;
this->append_state(syntax_element_buffer_start);
break;
case regex_constants::escape_type_end_buffer:
++m_position;
this->append_state(syntax_element_buffer_end);
break;
case regex_constants::escape_type_word_assert:
++m_position;
this->append_state(syntax_element_word_boundary);
break;
case regex_constants::escape_type_not_word_assert:
++m_position;
this->append_state(syntax_element_within_word);
break;
case regex_constants::escape_type_Z:
++m_position;
this->append_state(syntax_element_soft_buffer_end);
break;
case regex_constants::escape_type_Q:
return parse_QE();
case regex_constants::escape_type_C:
return parse_match_any();
case regex_constants::escape_type_X:
++m_position;
this->append_state(syntax_element_combining);
break;
case regex_constants::escape_type_G:
++m_position;
this->append_state(syntax_element_restart_continue);
break;
case regex_constants::escape_type_not_property:
negate = true;
// fall through:
case regex_constants::escape_type_property:
{
++m_position;
char_class_type m;
if(m_position == m_end)
{
fail(regex_constants::error_escape, m_position - m_base);
return false;
}
// maybe have \p{ddd}
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
{
const charT* base = m_position;
// skip forward until we find enclosing brace:
while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
++m_position;
if(m_position == m_end)
{
fail(regex_constants::error_escape, m_position - m_base);
return false;
}
m = this->m_traits.lookup_classname(++base, m_position++);
}
else
{
m = this->m_traits.lookup_classname(m_position, m_position+1);
++m_position;
}
if(m != 0)
{
basic_char_set<charT, traits> char_set;
if(negate)
char_set.negate();
char_set.add_class(m);
if(0 == this->append_set(char_set))
{
fail(regex_constants::error_ctype, m_position - m_base);
return false;
}
return true;
}
fail(regex_constants::error_ctype, m_position - m_base);
}
default:
this->append_literal(unescape_character());
break;
}
return true;
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_match_any()
{
//
// we have a '.' that can match any character:
//
++m_position;
static_cast<re_dot*>(
this->append_state(syntax_element_wild, sizeof(re_dot))
)->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
? re_detail::force_not_newline
: this->flags() & regbase::mod_s ?
re_detail::force_newline : re_detail::dont_care);
return true;
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
{
bool greedy = true;
std::size_t insert_point;
//
// when we get to here we may have a non-greedy ? mark still to come:
//
if((m_position != m_end)
&& (
(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
|| ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
)
)
{
// OK we have a perl regex, check for a '?':
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
{
greedy = false;
++m_position;
}
}
if(0 == this->m_last_state)
{
fail(regex_constants::error_badrepeat, ::boost::re_detail::distance(m_base, m_position));
return false;
}
if(this->m_last_state->type == syntax_element_endmark)
{
// insert a repeat before the '(' matching the last ')':
insert_point = this->m_paren_start;
}
else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
{
// the last state was a literal with more than one character, split it in two:
re_literal* lit = static_cast<re_literal*>(this->m_last_state);
charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
--(lit->length);
// now append new state:
lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
lit->length = 1;
(static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
insert_point = this->getoffset(this->m_last_state);
}
else
{
// repeat the last state whatever it was, need to add some error checking here:
switch(this->m_last_state->type)
{
case syntax_element_start_line:
case syntax_element_end_line:
case syntax_element_word_boundary:
case syntax_element_within_word:
case syntax_element_word_start:
case syntax_element_word_end:
case syntax_element_buffer_start:
case syntax_element_buffer_end:
case syntax_element_alt:
case syntax_element_soft_buffer_end:
case syntax_element_restart_continue:
case syntax_element_jump:
case syntax_element_startmark:
// can't legally repeat any of the above:
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
default:
// do nothing...
break;
}
insert_point = this->getoffset(this->m_last_state);
}
//
// OK we now know what to repeat, so insert the repeat around it:
//
re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
rep->min = low;
rep->max = high;
rep->greedy = greedy;
rep->leading = false;
// store our repeater position for later:
std::ptrdiff_t rep_off = this->getoffset(rep);
// and append a back jump to the repeat:
re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
jmp->alt.i = rep_off - this->getoffset(jmp);
this->m_pdata->m_data.align();
// now fill in the alt jump for the repeat:
rep = static_cast<re_repeat*>(this->getaddress(rep_off));
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?