📄 regex_compile.hpp
字号:
unsigned int inner_set = parse_inner_set(first, last);
switch(inner_set)
{
case traits_type::syntax_colon:
{
if(l == last_dash)
{
fail(REG_ERANGE);
return 0;
}
boost::uint_fast32_t id = traits_inst.lookup_classname(base+2, first-2);
if(_flags & regbase::icase)
{
if((id == traits_type::char_class_upper) || (id == traits_type::char_class_lower))
{
id = traits_type::char_class_alpha;
}
}
if(id == 0)
{
fail(REG_ECTYPE);
return 0;
}
classes.push(id);
started = true;
l = last_none;
}
break;
case traits_type::syntax_dot:
//
// we have a collating element [.collating-name.]
//
if(traits_inst.lookup_collatename(s, base+2, first-2))
{
--first;
if(s.size() > 1)
has_digraphs = true;
if(s.size())goto char_set_literal;
}
fail(REG_ECOLLATE);
return 0;
case traits_type::syntax_equal:
//
// we have an equivalence class [=collating-name=]
//
if(traits_inst.lookup_collatename(s, base+2, first-2))
{
std::size_t len = s.size();
if(len)
{
unsigned i = 0;
while(i < len)
{
s[i] = traits_inst.translate(s[i], (_flags & regbase::icase));
++i;
}
traits_string_type s2;
traits_inst.transform_primary(s2, s);
equivalents.push(s2);
started = true;
l = last_none;
break;
}
}
fail(REG_ECOLLATE);
return 0;
case traits_type::syntax_left_word:
if((started == false) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_close_set))
{
++first;
return add_simple(0, re_detail::syntax_element_word_start);
}
fail(REG_EBRACK);
return 0;
case traits_type::syntax_right_word:
if((started == false) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_close_set))
{
++first;
return add_simple(0, re_detail::syntax_element_word_end);
}
fail(REG_EBRACK);
return 0;
default:
if(started == false)
{
unsigned int t = traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*(base+1));
if((t != traits_type::syntax_colon) && (t != traits_type::syntax_dot) && (t != traits_type::syntax_equal))
{
first = base;
s = (charT)c;
goto char_set_literal;
}
}
fail(REG_EBRACK);
return 0;
}
if(first == last)
{
fail(REG_EBRACK);
return 0;
}
continue;
}
case traits_type::syntax_close_set:
if(started == false)
{
s = (charT)c;
goto char_set_literal;
}
done = true;
break;
case traits_type::syntax_dash:
if(!started)
{
s = (charT)c;
goto char_set_literal;
}
++first;
if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_close_set)
{
--first;
s = (charT)c;
goto char_set_literal;
}
if((singles.empty() == true) || (l != last_single))
{
fail(REG_ERANGE);
return 0;
}
ranges.push(singles.peek());
if(singles.peek().size() <= 1) // leave digraphs and ligatures in place
singles.pop();
l = last_dash;
continue;
case traits_type::syntax_slash:
if(_flags & regbase::escape_in_lists)
{
++first;
if(first == last)
continue;
traits_size_type c = (traits_size_type)(traits_uchar_type)*first;
// this is only used for the switch(), but cannot be folded in
// due to a bug in Comeau 4.2.44beta3
traits_size_type syntax = traits_inst.syntax_type(c);
switch(syntax)
{
case traits_type::syntax_w:
if(l == last_dash)
{
fail(REG_ERANGE);
return 0;
}
classes.push(traits_type::char_class_word);
started = true;
l = last_none;
++first;
continue;
case traits_type::syntax_d:
if(l == last_dash)
{
fail(REG_ERANGE);
return 0;
}
classes.push(traits_type::char_class_digit);
started = true;
l = last_none;
++first;
continue;
case traits_type::syntax_s:
if(l == last_dash)
{
fail(REG_ERANGE);
return 0;
}
classes.push(traits_type::char_class_space);
started = true;
l = last_none;
++first;
continue;
case traits_type::syntax_l:
if(l == last_dash)
{
fail(REG_ERANGE);
return 0;
}
classes.push(traits_type::char_class_lower);
started = true;
l = last_none;
++first;
continue;
case traits_type::syntax_u:
if(l == last_dash)
{
fail(REG_ERANGE);
return 0;
}
classes.push(traits_type::char_class_upper);
started = true;
l = last_none;
++first;
continue;
case traits_type::syntax_W:
case traits_type::syntax_D:
case traits_type::syntax_S:
case traits_type::syntax_U:
case traits_type::syntax_L:
fail(REG_EESCAPE);
return 0;
default:
c = parse_escape(first, last);
--first;
s = (charT)c;
goto char_set_literal;
}
}
else
{
s = (charT)c;
goto char_set_literal;
}
default:
s = (charT)c;
char_set_literal:
unsigned i = 0;
// get string length to stop us going past the end of string (DWA)
std::size_t len = s.size();
while(i < len)
{
s[i] = traits_inst.translate(s[i], (_flags & regbase::icase));
++i;
}
started = true;
if(l == last_dash)
{
ranges.push(s);
l = last_none;
if(s.size() > 1) // add ligatures to singles list as well
singles.push(s);
}
else
{
singles.push(s);
l = last_single;
}
}
++first;
}
if(!done)
return 0;
typedef typename re_detail::is_byte<charT>::width_type width_type;
re_detail::re_syntax_base* result;
if(has_digraphs)
result = compile_set_aux(singles, ranges, classes, equivalents, isnot, re_detail::_wide_type());
else
result = compile_set_aux(singles, ranges, classes, equivalents, isnot, width_type());
#ifdef __BORLANDC__
// delayed throw:
if((result == 0) && (_flags & regbase::use_except))
fail(error_code());
#endif
return result;
}
template <class charT, class traits, class Allocator>
re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set_aux(re_detail::jstack<traits_string_type, Allocator>& singles, re_detail::jstack<traits_string_type, Allocator>& ranges, re_detail::jstack<boost::uint_fast32_t, Allocator>& classes, re_detail::jstack<traits_string_type, Allocator>& equivalents, bool isnot, const re_detail::_wide_type&)
{
size_type base = data.size();
data.extend(sizeof(re_detail::re_set_long));
unsigned int csingles = 0;
unsigned int cranges = 0;
boost::uint_fast32_t cclasses = 0;
unsigned int cequivalents = 0;
bool nocollate_state = flags() & regbase::nocollate;
while(singles.empty() == false)
{
++csingles;
const traits_string_type& s = singles.peek();
std::size_t len = (s.size() + 1) * sizeof(charT);
std::memcpy(reinterpret_cast<charT*>(data.extend(len)), s.c_str(), len);
singles.pop();
}
while(ranges.empty() == false)
{
traits_string_type c1, c2;
if(nocollate_state)
c1 = ranges.peek();
else
traits_inst.transform(c1, ranges.peek());
ranges.pop();
if(nocollate_state)
c2 = ranges.peek();
else
traits_inst.transform(c2, ranges.peek());
ranges.pop();
if(c1 < c2)
{
// for some reason bc5 crashes when throwing exceptions
// from here - probably an EH-compiler bug, but hard to
// be sure...
// delay throw to later:
#ifdef __BORLANDC__
boost::uint_fast32_t f = _flags;
_flags &= ~regbase::use_except;
#endif
fail(REG_ERANGE);
#ifdef __BORLANDC__
_flags = f;
#endif
return 0;
}
++cranges;
std::size_t len = (re_detail::re_strlen(c1.c_str()) + 1) * sizeof(charT);
std::memcpy(data.extend(len), c1.c_str(), len);
len = (re_detail::re_strlen(c2.c_str()) + 1) * sizeof(charT);
std::memcpy(data.extend(len), c2.c_str(), len);
}
while(classes.empty() == false)
{
cclasses |= classes.peek();
classes.pop();
}
while(equivalents.empty() == false)
{
++cequivalents;
const traits_string_type& s = equivalents.peek();
std::size_t len = (re_detail::re_strlen(s.c_str()) + 1) * sizeof(charT);
std::memcpy(reinterpret_cast<charT*>(data.extend(len)), s.c_str(), len);
equivalents.pop();
}
re_detail::re_set_long* dat = reinterpret_cast<re_detail::re_set_long*>(reinterpret_cast<unsigned char*>(data.data()) + base);
dat->type = re_detail::syntax_element_long_set;
dat->csingles = csingles;
dat->cranges = cranges;
dat->cclasses = cclasses;
dat->cequivalents = cequivalents;
dat->isnot = isnot;
dat->next.i = 0;
return dat;
}
template <class charT, class traits, class Allocator>
re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set_aux(re_detail::jstack<traits_string_type, Allocator>& singles, re_detail::jstack<traits_string_type, Allocator>& ranges, re_detail::jstack<boost::uint_fast32_t, Allocator>& classes, re_detail::jstack<traits_string_type, Allocator>& equivalents, bool isnot, const re_detail::_narrow_type&)
{
re_detail::re_set* dat = reinterpret_cast<re_detail::re_set*>(data.extend(sizeof(re_detail::re_set)));
std::memset(dat, 0, sizeof(re_detail::re_set));
while(singles.empty() == false)
{
dat->_map[(traits_size_type)(traits_uchar_type)*(singles.peek().c_str())] = re_detail::mask_all;
singles.pop();
}
while(ranges.empty() == false)
{
traits_string_type c1, c2, c3, c4;
if(flags() & regbase::nocollate)
c1 = ranges.peek();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -