📄 regex_compile.hpp
字号:
}
dat = add_simple(0, re_detail::syntax_element_endmark, sizeof(re_detail::re_brace));
static_cast<re_detail::re_brace*>(dat)->index = markid.peek();
markid.pop();
last_mark_popped = mark.peek();
mark.pop();
++ptr;
break;
case traits_type::syntax_char:
dat = add_literal(dat, (charT)c);
++ptr;
break;
case traits_type::syntax_slash:
{
if(++ptr == end)
{
fail(REG_EESCAPE);
return error_code();
}
c = (traits_size_type)(traits_uchar_type)*ptr;
// this is only used for the switch(), but cannot be folded in
// due to a bug in Comeau 4.2.44beta3
traits_size_type syntax = traits_inst.syntax_type(c);
switch(syntax)
{
case traits_type::syntax_open_bracket:
if(_flags & bk_parens)
goto open_bracked_jump;
break;
case traits_type::syntax_close_bracket:
if(_flags & bk_parens)
goto close_bracked_jump;
break;
case traits_type::syntax_plus:
if((_flags & bk_plus_qm) && ((_flags & limited_ops) == 0))
{
rep_min = 1;
rep_max = (unsigned)-1;
goto repeat_jump;
}
break;
case traits_type::syntax_question:
if((_flags & bk_plus_qm) && ((_flags & limited_ops) == 0))
{
rep_min = 0;
rep_max = 1;
goto repeat_jump;
}
break;
case traits_type::syntax_or:
if(((_flags & bk_vbar) == 0) || (_flags & limited_ops))
break;
goto alt_string_jump;
case traits_type::syntax_open_brace:
if( ((_flags & bk_braces) == 0) || ((_flags & intervals) == 0))
break;
// we have {x} or {x,} or {x,y}:
parse_range(ptr, end, rep_min, rep_max);
goto repeat_jump;
case traits_type::syntax_digit:
if(_flags & bk_refs)
{
// update previous:
int i = traits_inst.toi((charT)c);
if(i == 0)
{
// we can have \025 which means take char whose
// code is 25 (octal), so parse string:
c = traits_inst.toi(ptr, end, -8);
--ptr;
break;
}
dat = add_simple(dat, re_detail::syntax_element_backref, sizeof(re_detail::re_brace));
static_cast<re_detail::re_brace*>(dat)->index = i;
++ptr;
continue;
}
break;
case traits_type::syntax_b: // re_detail::syntax_element_word_boundary
dat = add_simple(dat, re_detail::syntax_element_word_boundary);
++ptr;
continue;
case traits_type::syntax_B:
dat = add_simple(dat, re_detail::syntax_element_within_word);
++ptr;
continue;
case traits_type::syntax_left_word:
dat = add_simple(dat, re_detail::syntax_element_word_start);
++ptr;
continue;
case traits_type::syntax_right_word:
dat = add_simple(dat, re_detail::syntax_element_word_end);
++ptr;
continue;
case traits_type::syntax_w: //re_detail::syntax_element_word_char
dat = compile_set_simple(dat, traits_type::char_class_word);
++ptr;
continue;
case traits_type::syntax_W:
dat = compile_set_simple(dat, traits_type::char_class_word, true);
++ptr;
continue;
case traits_type::syntax_d: //re_detail::syntax_element_word_char
dat = compile_set_simple(dat, traits_type::char_class_digit);
++ptr;
continue;
case traits_type::syntax_D:
dat = compile_set_simple(dat, traits_type::char_class_digit, true);
++ptr;
continue;
case traits_type::syntax_s: //re_detail::syntax_element_word_char
dat = compile_set_simple(dat, traits_type::char_class_space);
++ptr;
continue;
case traits_type::syntax_S:
dat = compile_set_simple(dat, traits_type::char_class_space, true);
++ptr;
continue;
case traits_type::syntax_l: //re_detail::syntax_element_word_char
dat = compile_set_simple(dat, traits_type::char_class_lower);
++ptr;
continue;
case traits_type::syntax_L:
dat = compile_set_simple(dat, traits_type::char_class_lower, true);
++ptr;
continue;
case traits_type::syntax_u: //re_detail::syntax_element_word_char
dat = compile_set_simple(dat, traits_type::char_class_upper);
++ptr;
continue;
case traits_type::syntax_U:
dat = compile_set_simple(dat, traits_type::char_class_upper, true);
++ptr;
continue;
case traits_type::syntax_Q:
++ptr;
while(true)
{
if(ptr == end)
{
fail(REG_EESCAPE);
return error_code();
}
if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_slash)
{
++ptr;
if((ptr != end) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_E))
break;
else
{
dat = add_literal(dat, *(ptr-1));
continue;
}
}
dat = add_literal(dat, *ptr);
++ptr;
}
++ptr;
continue;
case traits_type::syntax_C:
dat = add_simple(dat, re_detail::syntax_element_wild);
++ptr;
continue;
case traits_type::syntax_X:
dat = add_simple(dat, re_detail::syntax_element_combining);
++ptr;
continue;
case traits_type::syntax_Z:
dat = add_simple(dat, re_detail::syntax_element_soft_buffer_end);
++ptr;
continue;
case traits_type::syntax_G:
dat = add_simple(dat, re_detail::syntax_element_restart_continue);
++ptr;
continue;
case traits_type::syntax_start_buffer:
dat = add_simple(dat, re_detail::syntax_element_buffer_start);
++ptr;
continue;
case traits_type::syntax_end_buffer:
dat = add_simple(dat, re_detail::syntax_element_buffer_end);
++ptr;
continue;
default:
c = (traits_size_type)(traits_uchar_type)parse_escape(ptr, end);
dat = add_literal(dat, (charT)c);
continue;
}
dat = add_literal(dat, (charT)c);
++ptr;
break;
}
case traits_type::syntax_dollar:
dat = add_simple(dat, re_detail::syntax_element_end_line, sizeof(re_detail::re_syntax_base));
++ptr;
continue;
case traits_type::syntax_caret:
dat = add_simple(dat, re_detail::syntax_element_start_line, sizeof(re_detail::re_syntax_base));
++ptr;
continue;
case traits_type::syntax_dot:
dat = add_simple(dat, re_detail::syntax_element_wild, sizeof(re_detail::re_syntax_base));
++ptr;
continue;
case traits_type::syntax_star:
rep_min = 0;
rep_max = (unsigned)-1;
repeat_jump:
{
std::ptrdiff_t offset;
if(dat == 0)
{
fail(REG_BADRPT);
return error_code();
}
switch(dat->type)
{
case re_detail::syntax_element_endmark:
offset = last_mark_popped;
break;
case re_detail::syntax_element_literal:
if(static_cast<re_detail::re_literal*>(dat)->length > 1)
{
// update previous:
charT lit = *reinterpret_cast<charT*>(reinterpret_cast<char*>(dat) + sizeof(re_detail::re_literal) + ((static_cast<re_detail::re_literal*>(dat)->length-1)*sizeof(charT)));
--static_cast<re_detail::re_literal*>(dat)->length;
dat = add_simple(dat, re_detail::syntax_element_literal, sizeof(re_detail::re_literal) + sizeof(charT));
static_cast<re_detail::re_literal*>(dat)->length = 1;
*reinterpret_cast<charT*>(static_cast<re_detail::re_literal*>(dat)+1) = lit;
}
offset = reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data());
break;
case re_detail::syntax_element_backref:
case re_detail::syntax_element_long_set:
case re_detail::syntax_element_set:
case re_detail::syntax_element_wild:
case re_detail::syntax_element_combining:
// we're repeating a single item:
offset = reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data());
break;
default:
fail(REG_BADRPT);
return error_code();
}
data.align();
dat->next.i = data.size();
//unsigned pos = (char*)dat - (char*)data.data();
// add the trailing jump:
dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
static_cast<re_detail::re_jump*>(dat)->alt.i = 0;
// now insert the leading repeater:
dat = static_cast<re_detail::re_syntax_base*>(data.insert(offset, re_detail::re_repeater_size));
dat->next.i = (reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data())) + re_detail::re_repeater_size;
dat->type = re_detail::syntax_element_rep;
static_cast<re_detail::re_repeat*>(dat)->alt.i = data.size();
static_cast<re_detail::re_repeat*>(dat)->min = rep_min;
static_cast<re_detail::re_repeat*>(dat)->max = rep_max;
static_cast<re_detail::re_repeat*>(dat)->leading = false;
static_cast<re_detail::re_repeat*>(dat)->greedy = true;
move_offsets(dat, re_detail::re_repeater_size);
++ptr;
//
// now check to see if we have a non-greedy repeat:
if((ptr != end) && (_flags & (limited_ops | bk_plus_qm | bk_braces)) == 0)
{
c = (traits_size_type)(traits_uchar_type)*ptr;
if(traits_type::syntax_question == traits_inst.syntax_type(c))
{
// OK repeat is non-greedy:
static_cast<re_detail::re_repeat*>(dat)->greedy = false;
++ptr;
}
}
dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + data.size() - re_detail::re_jump_size);
static_cast<re_detail::re_repeat*>(dat)->alt.i = offset;
continue;
}
case traits_type::syntax_plus:
if(_flags & (bk_plus_qm | limited_ops))
{
dat = add_literal(dat, (charT)c);
++ptr;
continue;
}
rep_min = 1;
rep_max = (unsigned)-1;
goto repeat_jump;
case traits_type::syntax_question:
if(_flags & (bk_plus_qm | limited_ops))
{
dat = add_literal(dat, (charT)c);
++ptr;
continue;
}
rep_min = 0;
rep_max = 1;
goto repeat_jump;
case traits_type::syntax_open_set:
// update previous:
if(dat)
{
data.align();
dat->next.i = data.size();
}
// extend:
dat = compile_set(ptr, end);
if(dat == 0)
{
if((_flags & regbase::failbit) == 0)
fail(REG_EBRACK);
return error_code();
}
break;
case traits_type::syntax_or:
{
if(_flags & (bk_vbar | limited_ops))
{
dat = add_literal(dat, (charT)c);
++ptr;
continue;
}
alt_string_jump:
// update previous:
if(dat == 0)
{
// start of pattern can't have empty "|"
fail(REG_EMPTY);
return error_code();
}
// see if we have an empty alternative:
if(mark.empty() == false)
if(mark.peek() == data.index(dat))
{
fail(REG_EMPTY);
return error_code();
}
// extend:
dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
data.align();
//
// we don't know what value to put here yet,
// use an arbitrarily large value for now
// and check it later (TODO!)
static_cast<re_detail::re_jump*>(dat)->alt.i = INT_MAX/2;
// now work out where to insert:
std::size_t offset = 0;
if(mark.empty() == false)
{
// we have a '(' or '|' to go back to:
offset = mark.peek();
re_detail::re_syntax_base* base = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + offset);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -