📄 regex_compile.hpp
字号:
if(traits_inst.is_class(*first, traits_type::char_class_xdigit) == false)
{
fail(REG_BADBR);
break;
}
c = (charT)traits_inst.toi(first, last, -16);
}
break;
case traits_type::syntax_c:
++first;
if(first == last)
{
fail(REG_EESCAPE);
break;
}
if(((traits_uchar_type)(*first) < (traits_uchar_type)'@')
|| ((traits_uchar_type)(*first) > (traits_uchar_type)127) )
{
fail(REG_EESCAPE);
return (charT)0;
}
c = (charT)((traits_uchar_type)(*first) - (traits_uchar_type)'@');
++first;
break;
case traits_type::syntax_e:
c = (charT)27;
++first;
break;
case traits_type::syntax_digit:
c = (charT)traits_inst.toi(first, last, -8);
break;
default:
//c = *first;
++first;
}
return c;
}
template <class charT, class traits, class Allocator>
void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_maps()
{
re_detail::re_syntax_base* record = static_cast<re_detail::re_syntax_base*>(data.data());
// always compile the first _map:
std::memset(startmap, 0, 256);
record->can_be_null = 0;
compile_map(record, startmap, 0, re_detail::mask_all);
while(record->type != re_detail::syntax_element_match)
{
if((record->type == re_detail::syntax_element_alt) || (record->type == re_detail::syntax_element_rep))
{
std::memset(&(static_cast<re_detail::re_jump*>(record)->_map), 0, 256);
record->can_be_null = 0;
compile_map(record->next.p, static_cast<re_detail::re_jump*>(record)->_map, &(record->can_be_null), re_detail::mask_take, static_cast<re_detail::re_jump*>(record)->alt.p);
compile_map(static_cast<re_detail::re_jump*>(record)->alt.p, static_cast<re_detail::re_jump*>(record)->_map, &(record->can_be_null), re_detail::mask_skip);
if(record->type == re_detail::syntax_element_rep)
{
re_detail::re_repeat* rep = static_cast<re_detail::re_repeat*>(record);
// set whether this is a singleton repeat or not:
if(rep->next.p->next.p->next.p == rep->alt.p)
{
rep->singleton = true;
}
else
rep->singleton = false;
}
}
else
{
record->can_be_null = 0;
compile_map(record, 0, &(record->can_be_null), re_detail::mask_all);
}
record = record->next.p;
}
record->can_be_null = re_detail::mask_all;
}
template <class charT, class traits, class Allocator>
bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start(
re_detail::re_syntax_base* node, charT cc, re_detail::re_syntax_base* terminal) const
{
unsigned int c;
switch(node->type)
{
case re_detail::syntax_element_startmark:
if(static_cast<const re_detail::re_brace*>(node)->index == -1)
{
return probe_start(node->next.p->next.p, cc, terminal)
&& probe_start(static_cast<const re_detail::re_jump*>(node->next.p)->alt.p, cc, terminal);
}
// fall through:
case re_detail::syntax_element_endmark:
case re_detail::syntax_element_start_line:
case re_detail::syntax_element_word_boundary:
case re_detail::syntax_element_buffer_start:
case re_detail::syntax_element_restart_continue:
// doesn't tell us anything about the next character, so:
return probe_start(node->next.p, cc, terminal);
case re_detail::syntax_element_literal:
// only the first character of the literal can match:
// note these have already been translated:
if(*reinterpret_cast<charT*>(static_cast<re_detail::re_literal*>(node)+1) == traits_inst.translate(cc, (_flags & regbase::icase)))
return true;
return false;
case re_detail::syntax_element_end_line:
// next character (if there is one!) must be a newline:
if(traits_inst.is_separator(traits_inst.translate(cc, (_flags & regbase::icase))))
return true;
return false;
case re_detail::syntax_element_wild:
return true;
case re_detail::syntax_element_match:
return true;
case re_detail::syntax_element_within_word:
case re_detail::syntax_element_word_start:
return traits_inst.is_class(traits_inst.translate(cc, (_flags & regbase::icase)), traits_type::char_class_word);
case re_detail::syntax_element_word_end:
// what follows must not be a word character,
return traits_inst.is_class(traits_inst.translate(cc, (_flags & regbase::icase)), traits_type::char_class_word) ? false : true;
case re_detail::syntax_element_buffer_end:
// we can be null, nothing must follow,
// NB we assume that this is followed by
// re_detail::syntax_element_match, if its not then we can
// never match anything anyway!!
return false;
case re_detail::syntax_element_soft_buffer_end:
// we can be null, only newlines must follow,
// NB we assume that this is followed by
// re_detail::syntax_element_match, if its not then we can
// never match anything anyway!!
return traits_inst.is_separator(traits_inst.translate(cc, (_flags & regbase::icase)));
case re_detail::syntax_element_backref:
// there's no easy way to determine this
// which is not to say it can't be done!
// for now:
return true;
case re_detail::syntax_element_long_set:
// we can not be null,
// we need to add already translated values in the set
// to values in the _map
return re_detail::re_maybe_set_member(cc, static_cast<const re_detail::re_set_long*>(node), *this) || (re_detail::re_is_set_member(static_cast<const charT*>(&cc), static_cast<const charT*>(&cc+1), static_cast<re_detail::re_set_long*>(node), *this) != &cc);
case re_detail::syntax_element_set:
// set all the elements that are set in corresponding set:
c = (traits_size_type)(traits_uchar_type)traits_inst.translate(cc, (_flags & regbase::icase));
return static_cast<re_detail::re_set*>(node)->_map[c] != 0;
case re_detail::syntax_element_jump:
if(static_cast<re_detail::re_jump*>(node)->alt.p < node)
{
// backwards jump,
// caused only by end of repeat section, we'll treat this
// the same as a match, because the sub-expression has matched.
if(node->next.p == terminal)
return true; // null repeat - we can always take this
else
{
//
// take the jump, add in fix for the fact that if the
// repeat that we're jumping to has non-zero minimum count
// then we need to add in the possiblity that we could still
// skip that repeat.
re_detail::re_syntax_base* next = static_cast<re_detail::re_jump*>(node)->alt.p;
bool b = probe_start(next, cc, terminal);
if((next->type == re_detail::syntax_element_rep) && (static_cast<re_detail::re_repeat*>(next)->min != 0))
{
b = b || probe_start(static_cast<re_detail::re_jump*>(next)->alt.p, cc, terminal);
}
return b;
}
}
else
// take the jump and compile:
return probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal);
case re_detail::syntax_element_alt:
// we need to take the OR of the two alternatives:
return probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal) || probe_start(node->next.p, cc, terminal);
case re_detail::syntax_element_rep:
// we need to take the OR of the two alternatives
if(static_cast<re_detail::re_repeat*>(node)->min == 0)
return probe_start(node->next.p, cc, static_cast<re_detail::re_jump*>(node)->alt.p) || probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal);
else
return probe_start(node->next.p, cc, static_cast<re_detail::re_jump*>(node)->alt.p);
case re_detail::syntax_element_combining:
return !traits_inst.is_combining(traits_inst.translate(cc, (_flags & regbase::icase)));
}
return false;
}
template <class charT, class traits, class Allocator>
bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start_null(re_detail::re_syntax_base* node, re_detail::re_syntax_base* terminal)const
{
switch(node->type)
{
case re_detail::syntax_element_startmark:
case re_detail::syntax_element_endmark:
case re_detail::syntax_element_start_line:
case re_detail::syntax_element_word_boundary:
case re_detail::syntax_element_buffer_start:
case re_detail::syntax_element_restart_continue:
case re_detail::syntax_element_end_line:
case re_detail::syntax_element_word_end:
// doesn't tell us anything about the next character, so:
return probe_start_null(node->next.p, terminal);
case re_detail::syntax_element_match:
case re_detail::syntax_element_buffer_end:
case re_detail::syntax_element_soft_buffer_end:
case re_detail::syntax_element_backref:
return true;
case re_detail::syntax_element_jump:
if(static_cast<re_detail::re_jump*>(node)->alt.p < node)
{
// backwards jump,
// caused only by end of repeat section, we'll treat this
// the same as a match, because the sub-expression has matched.
// this is only caused by NULL repeats as in "(a*)*" or "(\<)*"
// these are really nonsensence and make the matching code much
// harder, it would be nice to get rid of them altogether.
if(node->next.p == terminal)
return true;
else
return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);
}
else
// take the jump and compile:
return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);
case re_detail::syntax_element_alt:
// we need to take the OR of the two alternatives:
return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal) || probe_start_null(node->next.p, terminal);
case re_detail::syntax_element_rep:
// only need to consider skipping the repeat:
return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);
default:
break;
}
return false;
}
template <class charT, class traits, class Allocator>
void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_map(
re_detail::re_syntax_base* node, unsigned char* _map,
unsigned int* pnull, unsigned char mask, re_detail::re_syntax_base* terminal)const
{
if(_map)
{
for(unsigned int i = 0; i < 256; ++i)
{
if(probe_start(node, (charT)i, terminal))
_map[i] |= mask;
}
}
if(pnull && probe_start_null(node, terminal))
*pnull |= mask;
}
template <class charT, class traits, class Allocator>
void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::move_offsets(re_detail::re_syntax_base* j, unsigned size)
{
# ifdef BOOST_MSVC
# pragma warning(push)
# pragma warning(disable: 4127)
#endif
// move all offsets starting with j->link forward by size
// called after an insert:
j = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + j->next.i);
while(true)
{
switch(j->type)
{
case re_detail::syntax_element_rep:
static_cast<re_detail::re_jump*>(j)->alt.i += size;
j->next.i += size;
break;
case re_detail::syntax_element_jump:
case re_detail::syntax_element_alt:
static_cast<re_detail::re_jump*>(j)->alt.i += size;
j->next.i += size;
break;
default:
j->next.i += size;
break;
}
if(j->next.i == size)
break;
j = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + j->next.i);
}
# ifdef BOOST_MSVC
# pragma warning(pop)
#endif
}
template <class charT, class traits, class Allocator>
re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set_simple(re_detail::re_syntax_base* dat, unsigned long cls, bool isnot)
{
typedef typename re_detail::is_byte<charT>::width_type width_type;
re_detail::jstack<traits_string_type, Allocator> singles(64, data.allocator());
re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator());
re_detail::jstack<boost::uint_fast32_t, Allocator> classes(64, data.allocator());
re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator());
classes.push(cls);
if(dat)
{
data.align();
dat->next.i = data.size();
}
return compile_set_aux(singles, ranges, classes, equivalents, isnot, width_type());
}
template <class charT, class traits, class Allocator>
re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set(const charT*& first, const charT* last)
{
re_detail::jstack<traits_string_type, Allocator> singles(64, data.allocator());
re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator());
re_detail::jstack<boost::uint_fast32_t, Allocator> classes(64, data.allocator());
re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator());
bool has_digraphs = false;
jm_assert(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_open_set);
++first;
bool started = false;
bool done = false;
bool isnot = false;
enum last_type
{
last_single,
last_none,
last_dash
};
unsigned l = last_none;
traits_string_type s;
while((first != last) && !done)
{
traits_size_type c = (traits_size_type)(traits_uchar_type)*first;
// this is only used for the switch(), but cannot be folded in
// due to a bug in Comeau 4.2.44beta3
traits_size_type syntax = traits_inst.syntax_type(c);
switch(syntax)
{
case traits_type::syntax_caret:
if(!started && !isnot)
{
isnot = true;
}
else
{
s = (charT)c;
goto char_set_literal;
}
break;
case traits_type::syntax_open_set:
{
if((_flags & char_classes) == 0)
{
s = (charT)c;
goto char_set_literal;
}
// check to see if we really have a class:
const charT* base = first;
// this is only used for the switch(), but cannot be folded in
// due to a bug in Comeau 4.2.44beta3
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -