📄 basic_regex_creator.hpp
字号:
s1 = this->m_traits.transform(a1, (a1[1] ? a1+2 : a1+1));
s2 = this->m_traits.transform(a2, (a2[1] ? a2+2 : a2+1));
#endif
if(s1.size() == 0)
s1 = string_type(1, charT(0));
if(s2.size() == 0)
s2 = string_type(1, charT(0));
}
else
{
if(c1.second)
{
s1.insert(s1.end(), c1.first);
s1.insert(s1.end(), c1.second);
}
else
s1 = string_type(1, c1.first);
if(c2.second)
{
s2.insert(s2.end(), c2.first);
s2.insert(s2.end(), c2.second);
}
else
s2.insert(s2.end(), c2.first);
}
if(s1 > s2)
{
// Oops error:
return 0;
}
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) );
re_detail::copy(s1.begin(), s1.end(), p);
p[s1.size()] = charT(0);
p += s1.size() + 1;
re_detail::copy(s2.begin(), s2.end(), p);
p[s2.size()] = charT(0);
}
//
// now process the equivalence classes:
//
first = char_set.equivalents_begin();
last = char_set.equivalents_end();
while(first != last)
{
string_type s;
if(first->second)
{
#if BOOST_WORKAROUND(__GNUC__, < 3)
string_type in(3, charT(0));
in[0] = first->first;
in[1] = first->second;
s = m_traits.transform_primary(in.c_str(), in.c_str()+2);
#else
charT cs[3] = { first->first, first->second, charT(0), };
s = m_traits.transform_primary(cs, cs+2);
#endif
}
else
s = m_traits.transform_primary(&first->first, &first->first+1);
if(s.empty())
return 0; // invalid or unsupported equivalence class
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) );
re_detail::copy(s.begin(), s.end(), p);
p[s.size()] = charT(0);
++first;
}
//
// finally reset the address of our last state:
//
m_last_state = result = static_cast<re_set_long<mask_type>*>(getaddress(offset));
return result;
}
namespace{
template<class T>
inline bool char_less(T t1, T t2)
{
return t1 < t2;
}
template<>
inline bool char_less<char>(char t1, char t2)
{
return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2);
}
template<>
inline bool char_less<signed char>(signed char t1, signed char t2)
{
return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2);
}
}
template <class charT, class traits>
re_syntax_base* basic_regex_creator<charT, traits>::append_set(
const basic_char_set<charT, traits>& char_set, mpl::true_*)
{
typedef typename traits::string_type string_type;
typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
re_set* result = static_cast<re_set*>(append_state(syntax_element_set, sizeof(re_set)));
bool negate = char_set.is_negated();
std::memset(result->_map, 0, sizeof(result->_map));
//
// handle singles first:
//
item_iterator first, last;
first = char_set.singles_begin();
last = char_set.singles_end();
while(first != last)
{
for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i)
{
if(this->m_traits.translate(static_cast<charT>(i), this->m_icase)
== this->m_traits.translate(first->first, this->m_icase))
result->_map[i] = true;
}
++first;
}
//
// OK now handle ranges:
//
first = char_set.ranges_begin();
last = char_set.ranges_end();
while(first != last)
{
// first grab the endpoints of the range:
charT c1 = this->m_traits.translate(first->first, this->m_icase);
++first;
charT c2 = this->m_traits.translate(first->first, this->m_icase);
++first;
// different actions now depending upon whether collation is turned on:
if(flags() & regex_constants::collate)
{
// we need to transform our range into sort keys:
charT c3[2] = { c1, charT(0), };
string_type s1 = this->m_traits.transform(c3, c3+1);
c3[0] = c2;
string_type s2 = this->m_traits.transform(c3, c3+1);
if(s1 > s2)
{
// Oops error:
return 0;
}
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
{
charT c3[2] = { static_cast<charT>(i), charT(0), };
string_type s3 = this->m_traits.transform(c3, c3 +1);
if((s1 <= s3) && (s3 <= s2))
result->_map[i] = true;
}
}
else
{
if(char_less<charT>(c2, c1))
{
// Oops error:
return 0;
}
// everything in range matches:
std::memset(result->_map + static_cast<unsigned char>(c1), true, 1 + static_cast<unsigned char>(c2) - static_cast<unsigned char>(c1));
}
}
//
// and now the classes:
//
typedef typename traits::char_class_type mask_type;
mask_type m = char_set.classes();
if(flags() & regbase::icase)
{
// adjust m as needed:
if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
m |= m_alpha_mask;
}
if(m != 0)
{
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
{
if(this->m_traits.isctype(static_cast<charT>(i), m))
result->_map[i] = true;
}
}
//
// and now the negated classes:
//
m = char_set.negated_classes();
if(flags() & regbase::icase)
{
// adjust m as needed:
if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
m |= m_alpha_mask;
}
if(m != 0)
{
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
{
if(0 == this->m_traits.isctype(static_cast<charT>(i), m))
result->_map[i] = true;
}
}
//
// now process the equivalence classes:
//
first = char_set.equivalents_begin();
last = char_set.equivalents_end();
while(first != last)
{
string_type s;
BOOST_ASSERT(static_cast<charT>(0) == first->second);
s = m_traits.transform_primary(&first->first, &first->first+1);
if(s.empty())
return 0; // invalid or unsupported equivalence class
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
{
charT c[2] = { (static_cast<charT>(i)), charT(0), };
string_type s2 = this->m_traits.transform_primary(c, c+1);
if(s == s2)
result->_map[i] = true;
}
++first;
}
if(negate)
{
for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
{
result->_map[i] = !(result->_map[i]);
}
}
return result;
}
template <class charT, class traits>
void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2)
{
// we've added all the states we need, now finish things off.
// start by adding a terminating state:
append_state(syntax_element_match);
// extend storage to store original expression:
std::ptrdiff_t len = p2 - p1;
m_pdata->m_expression_len = len;
charT* ps = static_cast<charT*>(m_pdata->m_data.extend(sizeof(charT) * (1 + (p2 - p1))));
m_pdata->m_expression = ps;
re_detail::copy(p1, p2, ps);
ps[p2 - p1] = 0;
// fill in our other data...
// successful parsing implies a zero status:
m_pdata->m_status = 0;
// get the first state of the machine:
m_pdata->m_first_state = static_cast<re_syntax_base*>(m_pdata->m_data.data());
// fixup pointers in the machine:
fixup_pointers(m_pdata->m_first_state);
// create nested startmaps:
create_startmaps(m_pdata->m_first_state);
// create main startmap:
std::memset(m_pdata->m_startmap, 0, sizeof(m_pdata->m_startmap));
m_pdata->m_can_be_null = 0;
m_bad_repeats = 0;
create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all);
// get the restart type:
m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state);
// optimise a leading repeat if there is one:
probe_leading_repeat(m_pdata->m_first_state);
}
template <class charT, class traits>
void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
{
while(state)
{
switch(state->type)
{
case syntax_element_rep:
case syntax_element_dot_rep:
case syntax_element_char_rep:
case syntax_element_short_set_rep:
case syntax_element_long_set_rep:
// set the id of this repeat:
static_cast<re_repeat*>(state)->id = m_repeater_id++;
// fall through:
case syntax_element_alt:
std::memset(static_cast<re_alt*>(state)->_map, 0, sizeof(static_cast<re_alt*>(state)->_map));
static_cast<re_alt*>(state)->can_be_null = 0;
// fall through:
case syntax_element_jump:
static_cast<re_jump*>(state)->alt.p = getaddress(static_cast<re_jump*>(state)->alt.i, state);
// fall through again:
default:
if(state->next.i)
state->next.p = getaddress(state->next.i, state);
else
state->next.p = 0;
}
state = state->next.p;
}
}
template <class charT, class traits>
void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
{
// non-recursive implementation:
// create the last map in the machine first, so that earlier maps
// can make use of the result...
//
// This was originally a recursive implementation, but that caused stack
// overflows with complex expressions on small stacks (think COM+).
// start by saving the case setting:
bool l_icase = m_icase;
std::vector<std::pair<bool, re_syntax_base*> > v;
while(state)
{
switch(state->type)
{
case syntax_element_toggle_case:
// we need to track case changes here:
m_icase = static_cast<re_case*>(state)->icase;
state = state->next.p;
continue;
case syntax_element_alt:
case syntax_element_rep:
case syntax_element_dot_rep:
case syntax_element_char_rep:
case syntax_element_short_set_rep:
case syntax_element_long_set_rep:
// just push the state onto our stack for now:
v.push_back(std::pair<bool, re_syntax_base*>(m_icase, state));
state = state->next.p;
break;
case syntax_element_backstep:
// we need to calculate how big the backstep is:
static_cast<re_brace*>(state)->index
= this->calculate_backstep(state->next.p);
if(static_cast<re_brace*>(state)->index < 0)
{
// Oops error:
if(0 == this->m_pdata->m_status) // update the error code if not already set
this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
//
// clear the expression, we should be empty:
//
this->m_pdata->m_expression = 0;
this->m_pdata->m_expression_len = 0;
//
// and throw if required:
//
if(0 == (this->flags() & regex_constants::no_except))
{
std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_bad_pattern);
boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
e.raise();
}
}
// fall through:
default:
state = state->next.p;
}
}
// now work through our list, building all the maps as we go:
while(v.size())
{
const std::pair<bool, re_syntax_base*>& p = v.back();
m_icase = p.first;
state = p.second;
v.pop_back();
// Build maps:
create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take);
m_bad_repeats = 0;
create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip);
// adjust the type of the state to allow for faster matching:
state->type = this->get_repeat_type(state);
}
// restore case sensitivity:
m_icase = l_icase;
}
template <class charT, class traits>
int basic_regex_creator<charT, traits>::calculate_backstep(re_syntax_base* state)
{
typedef typename traits::char_class_type mask_type;
int result = 0;
while(state)
{
switch(state->type)
{
case syntax_element_startmark:
if((static_cast<re_brace*>(state)->index == -1)
|| (static_cast<re_brace*>(state)->index == -2))
{
state = static_cast<re_jump*>(state->next.p)->alt.p->next.p;
continue;
}
else if(static_cast<re_brace*>(state)->index == -3)
{
state = state->next.p->next.p;
continue;
}
break;
case syntax_element_endmark:
if((static_cast<re_brace*>(state)->index == -1)
|| (static_cast<re_brace*>(state)->index == -2))
return result;
break;
case syntax_element_literal:
result += static_cast<re_literal*>(state)->length;
break;
case syntax_element_wild:
case syntax_element_set:
result += 1;
break;
case syntax_element_dot_rep:
case syntax_element_char_rep:
case syntax_element_short_set_rep:
case syntax_element_backref:
case syntax_element_rep:
case syntax_element_combining:
case syntax_element_long_set_rep:
case syntax_element_backstep:
{
re_repeat* rep = static_cast<re_repeat *>(state);
// adjust the type of the state to allow for faster matching:
state->type = this->get_repeat_type(state);
if((state->type == syntax_element_dot_rep)
|| (state->type == syntax_element_char_rep)
|| (state->type == syntax_element_short_set_rep))
{
if(rep->max != rep->min)
return -1;
result += static_cast<int>(rep->min);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -