📄 basic_regex_parser.hpp
字号:
std::size_t min, max;
int v;
// skip whitespace:
while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
++m_position;
// fail if at end:
if(this->m_position == this->m_end)
{
fail(regex_constants::error_brace, this->m_position - this->m_base);
return false;
}
// get min:
v = this->m_traits.toi(m_position, m_end, 10);
// skip whitespace:
while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
++m_position;
if(v < 0)
{
fail(regex_constants::error_badbrace, this->m_position - this->m_base);
return false;
}
else if(this->m_position == this->m_end)
{
fail(regex_constants::error_brace, this->m_position - this->m_base);
return false;
}
min = v;
// see if we have a comma:
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
{
// move on and error check:
++m_position;
// skip whitespace:
while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
++m_position;
if(this->m_position == this->m_end)
{
fail(regex_constants::error_brace, this->m_position - this->m_base);
return false;
}
// get the value if any:
v = this->m_traits.toi(m_position, m_end, 10);
max = (v >= 0) ? v : (std::numeric_limits<std::size_t>::max)();
}
else
{
// no comma, max = min:
max = min;
}
// skip whitespace:
while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
++m_position;
// OK now check trailing }:
if(this->m_position == this->m_end)
{
fail(regex_constants::error_brace, this->m_position - this->m_base);
return false;
}
if(isbasic)
{
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
{
++m_position;
if(this->m_position == this->m_end)
{
fail(regex_constants::error_brace, this->m_position - this->m_base);
return false;
}
}
else
{
fail(regex_constants::error_badbrace, this->m_position - this->m_base);
return false;
}
}
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
++m_position;
else
{
fail(regex_constants::error_badbrace, this->m_position - this->m_base);
return false;
}
//
// finally go and add the repeat, unless error:
//
if(min > max)
{
fail(regex_constants::error_range, this->m_position - this->m_base);
return false;
}
return parse_repeat(min, max);
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_alt()
{
//
// error check: if there have been no previous states,
// or if the last state was a '(' then error:
//
if((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
{
fail(regex_constants::error_empty, this->m_position - this->m_base);
return false;
}
++m_position;
//
// we need to append a trailing jump:
//
re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump));
std::ptrdiff_t jump_offset = this->getoffset(pj);
//
// now insert the alternative:
//
re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
jump_offset += re_alt_size;
this->m_pdata->m_data.align();
palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
//
// update m_alt_insert_point so that the next alternate gets
// inserted at the start of the second of the two we've just created:
//
this->m_alt_insert_point = this->m_pdata->m_data.size();
//
// the start of this alternative must have a case changes state
// if the current block has messed around with case changes:
//
if(m_has_case_change)
{
static_cast<re_case*>(
this->append_state(syntax_element_toggle_case, sizeof(re_case))
)->icase = this->m_icase;
}
//
// push the alternative onto our stack, a recursive
// implementation here is easier to understand (and faster
// as it happens), but causes all kinds of stack overflow problems
// on programs with small stacks (COM+).
//
m_alt_jumps.push_back(jump_offset);
return true;
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_set()
{
++m_position;
if(m_position == m_end)
{
fail(regex_constants::error_brack, m_position - m_base);
return false;
}
basic_char_set<charT, traits> char_set;
const charT* base = m_position; // where the '[' was
const charT* item_base = m_position; // where the '[' or '^' was
while(m_position != m_end)
{
switch(this->m_traits.syntax_type(*m_position))
{
case regex_constants::syntax_caret:
if(m_position == base)
{
char_set.negate();
++m_position;
item_base = m_position;
}
else
parse_set_literal(char_set);
break;
case regex_constants::syntax_close_set:
if(m_position == item_base)
{
parse_set_literal(char_set);
break;
}
else
{
++m_position;
if(0 == this->append_set(char_set))
{
fail(regex_constants::error_range, m_position - m_base);
return false;
}
}
return true;
case regex_constants::syntax_open_set:
if(parse_inner_set(char_set))
break;
return true;
case regex_constants::syntax_escape:
{
//
// look ahead and see if this is a character class shortcut
// \d \w \s etc...
//
++m_position;
if(this->m_traits.escape_syntax_type(*m_position)
== regex_constants::escape_type_class)
{
char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
if(m != 0)
{
char_set.add_class(m);
++m_position;
break;
}
}
else if(this->m_traits.escape_syntax_type(*m_position)
== regex_constants::escape_type_not_class)
{
// negated character class:
char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
if(m != 0)
{
char_set.add_negated_class(m);
++m_position;
break;
}
}
// not a character class, just a regular escape:
--m_position;
parse_set_literal(char_set);
break;
}
default:
parse_set_literal(char_set);
break;
}
}
return m_position != m_end;
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
{
//
// we have either a character class [:name:]
// a collating element [.name.]
// or an equivalence class [=name=]
//
if(m_end == ++m_position)
{
fail(regex_constants::error_brack, m_position - m_base);
return false;
}
switch(this->m_traits.syntax_type(*m_position))
{
case regex_constants::syntax_dot:
//
// a collating element is treated as a literal:
//
--m_position;
parse_set_literal(char_set);
return true;
case regex_constants::syntax_colon:
{
// check that character classes are actually enabled:
if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
== (regbase::basic_syntax_group | regbase::no_char_classes))
{
--m_position;
parse_set_literal(char_set);
return true;
}
// skip the ':'
if(m_end == ++m_position)
{
fail(regex_constants::error_brack, m_position - m_base);
return false;
}
const charT* name_first = m_position;
// skip at least one character, then find the matching ':]'
if(m_end == ++m_position)
{
fail(regex_constants::error_brack, m_position - m_base);
return false;
}
while((m_position != m_end)
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
++m_position;
const charT* name_last = m_position;
if(m_end == m_position)
{
fail(regex_constants::error_brack, m_position - m_base);
return false;
}
if((m_end == ++m_position)
|| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
{
fail(regex_constants::error_brack, m_position - m_base);
return false;
}
//
// check for negated class:
//
bool negated = false;
if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
{
++name_first;
negated = true;
}
typedef typename traits::char_class_type mask_type;
mask_type m = this->m_traits.lookup_classname(name_first, name_last);
if(m == 0)
{
if(char_set.empty() && (name_last - name_first == 1))
{
// maybe a special case:
++m_position;
if( (m_position != m_end)
&& (this->m_traits.syntax_type(*m_position)
== regex_constants::syntax_close_set))
{
if(this->m_traits.escape_syntax_type(*name_first)
== regex_constants::escape_type_left_word)
{
++m_position;
this->append_state(syntax_element_word_start);
return false;
}
if(this->m_traits.escape_syntax_type(*name_first)
== regex_constants::escape_type_right_word)
{
++m_position;
this->append_state(syntax_element_word_end);
return false;
}
}
}
fail(regex_constants::error_ctype, name_first - m_base);
return false;
}
if(negated == false)
char_set.add_class(m);
else
char_set.add_negated_class(m);
++m_position;
break;
}
case regex_constants::syntax_equal:
{
// skip the '='
if(m_end == ++m_position)
{
fail(regex_constants::error_brack, m_position - m_base);
return false;
}
const charT* name_first = m_position;
// skip at least one character, then find the matching '=]'
if(m_end == ++m_position)
{
fail(regex_constants::error_brack, m_position - m_base);
return false;
}
while((m_position != m_end)
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
++m_position;
const charT* name_last = m_position;
if(m_end == m_position)
{
fail(regex_constants::error_brack, m_position - m_base);
return false;
}
if((m_end == ++m_position)
|| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
{
fail(regex_constants::error_brack, m_position - m_base);
return false;
}
string_type m = this->m_traits.lookup_collatename(name_first, name_last);
if((0 == m.size()) || (m.size() > 2))
{
fail(regex_constants::error_collate, name_first - m_base);
return false;
}
digraph<charT> d;
d.first = m[0];
if(m.size() > 1)
d.second = m[1];
else
d.second = 0;
char_set.add_equivalent(d);
++m_position;
break;
}
default:
--m_position;
parse_set_literal(char_set);
break;
}
return true;
}
template <class charT, class traits>
void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
{
digraph<charT> start_range(get_next_set_literal(char_set));
if(m_end == m_position)
{
fail(regex_constants::error_brack, m_position - m_base);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -