📄 basic_regex_parser.hpp
字号:
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
{
while((m_position != m_end)
&& (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
{}
return true;
}
//
// backup some state, and prepare the way:
//
int markid = 0;
std::ptrdiff_t jump_offset = 0;
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
std::ptrdiff_t last_paren_start = this->getoffset(pb);
// back up insertion point for alternations, and set new point:
std::ptrdiff_t last_alt_point = m_alt_insert_point;
this->m_pdata->m_data.align();
m_alt_insert_point = this->m_pdata->m_data.size();
std::ptrdiff_t expected_alt_point = m_alt_insert_point;
bool restore_flags = true;
regex_constants::syntax_option_type old_flags = this->flags();
bool old_case_change = m_has_case_change;
m_has_case_change = false;
//
// select the actual extension used:
//
switch(this->m_traits.syntax_type(*m_position))
{
case regex_constants::syntax_colon:
//
// a non-capturing mark:
//
pb->index = markid = 0;
++m_position;
break;
case regex_constants::syntax_equal:
pb->index = markid = -1;
++m_position;
jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
this->m_pdata->m_data.align();
m_alt_insert_point = this->m_pdata->m_data.size();
break;
case regex_constants::syntax_not:
pb->index = markid = -2;
++m_position;
jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
this->m_pdata->m_data.align();
m_alt_insert_point = this->m_pdata->m_data.size();
break;
case regex_constants::escape_type_left_word:
{
// a lookbehind assertion:
if(++m_position == m_end)
{
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
}
regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
if(t == regex_constants::syntax_not)
pb->index = markid = -2;
else if(t == regex_constants::syntax_equal)
pb->index = markid = -1;
else
{
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
}
++m_position;
jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
this->append_state(syntax_element_backstep, sizeof(re_brace));
this->m_pdata->m_data.align();
m_alt_insert_point = this->m_pdata->m_data.size();
break;
}
case regex_constants::escape_type_right_word:
//
// an independent sub-expression:
//
pb->index = markid = -3;
++m_position;
jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
this->m_pdata->m_data.align();
m_alt_insert_point = this->m_pdata->m_data.size();
break;
case regex_constants::syntax_open_mark:
{
// a conditional expression:
pb->index = markid = -4;
if(++m_position == m_end)
{
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
}
int v = this->m_traits.toi(m_position, m_end, 10);
if(v > 0)
{
re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
br->index = v;
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
{
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
}
if(++m_position == m_end)
{
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
}
}
else
{
// verify that we have a lookahead or lookbehind assert:
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
{
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
}
if(++m_position == m_end)
{
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
}
if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
{
if(++m_position == m_end)
{
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
}
if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
{
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
}
m_position -= 3;
}
else
{
if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
{
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
}
m_position -= 2;
}
}
break;
}
case regex_constants::syntax_close_mark:
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
default:
//
// lets assume that we have a (?imsx) group and try and parse it:
//
regex_constants::syntax_option_type opts = parse_options();
if(m_position == m_end)
return false;
// make a note of whether we have a case change:
m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
pb->index = markid = 0;
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
{
// update flags and carry on as normal:
this->flags(opts);
restore_flags = false;
old_case_change |= m_has_case_change; // defer end of scope by one ')'
}
else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
{
// update flags and carry on until the matching ')' is found:
this->flags(opts);
++m_position;
}
else
{
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
}
// finally append a case change state if we need it:
if(m_has_case_change)
{
static_cast<re_case*>(
this->append_state(syntax_element_toggle_case, sizeof(re_case))
)->icase = opts & regbase::icase;
}
}
//
// now recursively add more states, this will terminate when we get to a
// matching ')' :
//
parse_all();
//
// Unwind alternatives:
//
if(0 == unwind_alts(last_paren_start))
return false;
//
// we either have a ')' or we have run out of characters prematurely:
//
if(m_position == m_end)
{
this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
return false;
}
BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
++m_position;
//
// restore the flags:
//
if(restore_flags)
{
// append a case change state if we need it:
if(m_has_case_change)
{
static_cast<re_case*>(
this->append_state(syntax_element_toggle_case, sizeof(re_case))
)->icase = old_flags & regbase::icase;
}
this->flags(old_flags);
}
//
// set up the jump pointer if we have one:
//
if(jump_offset)
{
this->m_pdata->m_data.align();
re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
if(this->m_last_state == jmp)
{
// Oops... we didn't have anything inside the assertion:
fail(regex_constants::error_empty, m_position - m_base);
return false;
}
}
//
// verify that if this is conditional expression, that we do have
// an alternative, if not add one:
//
if(markid == -4)
{
re_syntax_base* b = this->getaddress(expected_alt_point);
if(b->type != syntax_element_alt)
{
re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
}
else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
{
fail(regex_constants::error_bad_pattern, m_position - m_base);
return false;
}
}
//
// append closing parenthesis state:
//
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
pb->index = markid;
this->m_paren_start = last_paren_start;
//
// restore the alternate insertion point:
//
this->m_alt_insert_point = last_alt_point;
//
// and the case change data:
//
m_has_case_change = old_case_change;
return true;
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
{
//
// parses an emacs style \sx or \Sx construct.
//
if(++m_position == m_end)
{
fail(regex_constants::error_escape, m_position - m_base);
return false;
}
basic_char_set<charT, traits> char_set;
if(negate)
char_set.negate();
static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
switch(*m_position)
{
case 's':
case ' ':
char_set.add_class(this->m_mask_space);
break;
case 'w':
char_set.add_class(this->m_word_mask);
break;
case '_':
char_set.add_single(digraph<charT>(charT('$')));
char_set.add_single(digraph<charT>(charT('&')));
char_set.add_single(digraph<charT>(charT('*')));
char_set.add_single(digraph<charT>(charT('+')));
char_set.add_single(digraph<charT>(charT('-')));
char_set.add_single(digraph<charT>(charT('_')));
char_set.add_single(digraph<charT>(charT('<')));
char_set.add_single(digraph<charT>(charT('>')));
break;
case '.':
char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
break;
case '(':
char_set.add_single(digraph<charT>(charT('(')));
char_set.add_single(digraph<charT>(charT('[')));
char_set.add_single(digraph<charT>(charT('{')));
break;
case ')':
char_set.add_single(digraph<charT>(charT(')')));
char_set.add_single(digraph<charT>(charT(']')));
char_set.add_single(digraph<charT>(charT('}')));
break;
case '"':
char_set.add_single(digraph<charT>(charT('"')));
char_set.add_single(digraph<charT>(charT('\'')));
char_set.add_single(digraph<charT>(charT('`')));
break;
case '\'':
char_set.add_single(digraph<charT>(charT('\'')));
char_set.add_single(digraph<charT>(charT(',')));
char_set.add_single(digraph<charT>(charT('#')));
break;
case '<':
char_set.add_single(digraph<charT>(charT(';')));
break;
case '>':
char_set.add_single(digraph<charT>(charT('\n')));
char_set.add_single(digraph<charT>(charT('\f')));
break;
default:
fail(regex_constants::error_ctype, m_position - m_base);
return false;
}
if(0 == this->append_set(char_set))
{
fail(regex_constants::error_ctype, m_position - m_base);
return false;
}
++m_position;
return true;
}
template <class charT, class traits>
regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
{
// we have a (?imsx-imsx) group, convert it into a set of flags:
regex_constants::syntax_option_type f = this->flags();
bool breakout = false;
do
{
switch(*m_position)
{
case 's':
f |= regex_constants::mod_s;
f &= ~regex_constants::no_mod_s;
break;
case 'm':
f &= ~regex_constants::no_mod_m;
break;
case 'i':
f |= regex_constants::icase;
break;
case 'x':
f |= regex_constants::mod_x;
break;
default:
breakout = true;
continue;
}
if(++m_position == m_end)
{
fail(regex_constants::error_paren, m_position - m_base);
return false;
}
}
while(!breakout);
if(*m_position == static_cast<charT>('-'))
{
if(++m_position == m_end)
{
fail(regex_constants::error_paren, m_position - m_base);
return false;
}
do
{
swi
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -