📄 regex_match.hpp
字号:
/*
*
* Copyright (c) 1998-2002
* Dr John Maddock
*
* Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*
*/
/*
* LOCATION: see http://www.boost.org for most recent version.
* FILE regex_match.hpp
* VERSION see <boost/version.hpp>
* DESCRIPTION: Regular expression matching algorithms.
* Note this is an internal header file included
* by regex.hpp, do not include on its own.
*/
#ifndef BOOST_REGEX_MATCH_HPP
#define BOOST_REGEX_MATCH_HPP
#ifndef BOOST_REGEX_MAX_STATE_COUNT
# define BOOST_REGEX_MAX_STATE_COUNT 100000000
#endif
#include <boost/limits.hpp>
namespace boost{
namespace re_detail{
#ifdef __BORLANDC__
#pragma option push -a8 -b -Vx -Ve -pc -w-8026 -w-8027
#endif
//
// Unfortunately Rogue Waves standard library appears to have a bug
// in std::basic_string::compare that results in eroneous answers
// in some cases (tested with Borland C++ 5.1, Rogue Wave lib version
// 0x020101) the test case was:
// {39135,0} < {0xff,0}
// which succeeds when it should not.
//
#ifndef _RWSTD_VER
# define STR_COMP(s,p) s.compare(p)
#else
template <class C, class T, class A>
inline int string_compare(const std::basic_string<C,T,A>& s, const C* p)
{ return s.compare(p); }
inline int string_compare(const std::string& s, const char* p)
{ return std::strcmp(s.c_str(), p); }
# ifndef BOOST_NO_WREGEX
inline int string_compare(const std::wstring& s, const wchar_t* p)
{ return std::wcscmp(s.c_str(), p); }
# endif
# define STR_COMP(s,p) string_compare(s,p)
#endif
template<class charT>
inline const charT* re_skip_past_null(const charT* p)
{
while (*p != 0) ++p;
return ++p;
}
template <class iterator, class charT, class traits_type, class Allocator>
iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
iterator last,
const re_set_long* set_,
const reg_expression<charT, traits_type, Allocator>& e)
{
const charT* p = reinterpret_cast<const charT*>(set_+1);
iterator ptr;
unsigned int i;
bool icase = e.flags() & regbase::icase;
if(next == last) return next;
typedef typename traits_type::string_type traits_string_type;
const traits_type& traits_inst = e.get_traits();
// dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never
// referenced
(void)traits_inst;
// try and match a single character, could be a multi-character
// collating element...
for(i = 0; i < set_->csingles; ++i)
{
ptr = next;
if(*p == 0)
{
// treat null string as special case:
if(traits_inst.translate(*ptr, icase) != *p)
{
while(*p == 0)++p;
continue;
}
return set_->isnot ? next : (ptr == next) ? ++next : ptr;
}
else
{
while(*p && (ptr != last))
{
if(traits_inst.translate(*ptr, icase) != *p)
break;
++p;
++ptr;
}
if(*p == 0) // if null we've matched
return set_->isnot ? next : (ptr == next) ? ++next : ptr;
p = re_skip_past_null(p); // skip null
}
}
charT col = traits_inst.translate(*next, icase);
if(set_->cranges || set_->cequivalents)
{
traits_string_type s2(1, col);
traits_string_type s1;
//
// try and match a range, NB only a single character can match
if(set_->cranges)
{
if(e.flags() & regbase::nocollate)
s1 = s2;
else
traits_inst.transform(s1, s2);
for(i = 0; i < set_->cranges; ++i)
{
if(STR_COMP(s1, p) <= 0)
{
while(*p)++p;
++p;
if(STR_COMP(s1, p) >= 0)
return set_->isnot ? next : ++next;
}
else
{
// skip first string
while(*p)++p;
++p;
}
// skip second string
while(*p)++p;
++p;
}
}
//
// try and match an equivalence class, NB only a single character can match
if(set_->cequivalents)
{
traits_inst.transform_primary(s1, s2);
for(i = 0; i < set_->cequivalents; ++i)
{
if(STR_COMP(s1, p) == 0)
return set_->isnot ? next : ++next;
// skip string
while(*p)++p;
++p;
}
}
}
if(traits_inst.is_class(col, set_->cclasses) == true)
return set_->isnot ? next : ++next;
return set_->isnot ? ++next : next;
}
template <class iterator, class Allocator>
class _priv_match_data
{
public:
typedef typename boost::detail::rebind_allocator<int, Allocator>::type i_alloc;
typedef typename boost::detail::rebind_allocator<iterator, Allocator>::type it_alloc;
typedef typename regex_iterator_traits<iterator>::difference_type difference_type;
match_results_base<iterator, Allocator> temp_match;
// failure stacks:
jstack<match_results_base<iterator, Allocator>, Allocator> matches;
jstack<iterator, Allocator> prev_pos;
jstack<const re_syntax_base*, Allocator> prev_record;
jstack<int, Allocator> prev_acc;
int* accumulators;
unsigned int caccumulators;
difference_type state_count;
difference_type max_state_count;
iterator* loop_starts;
_priv_match_data(const match_results_base<iterator, Allocator>&, iterator, iterator, std::size_t);
~_priv_match_data()
{
m_free();
}
void m_free();
void set_accumulator_size(unsigned int size);
int* get_accumulators()
{
return accumulators;
}
iterator* get_loop_starts()
{
return loop_starts;
}
void estimate_max_state_count(iterator a, iterator b, std::size_t states, std::random_access_iterator_tag*)
{
difference_type dist = boost::re_detail::distance(a,b);
states *= states;
difference_type lim = std::numeric_limits<difference_type>::max() - 1000 - states;
if(dist > (difference_type)(lim / states))
max_state_count = lim;
else
max_state_count = 1000 + states * dist;
}
void estimate_max_state_count(iterator a, iterator b, std::size_t states, void*)
{
// we don't know how long the sequence is:
max_state_count = BOOST_REGEX_MAX_STATE_COUNT;
}
};
template <class iterator, class Allocator>
_priv_match_data<iterator, Allocator>::_priv_match_data(const match_results_base<iterator, Allocator>& m, iterator a, iterator b, std::size_t states)
: temp_match(m), matches(64, m.allocator()), prev_pos(64, m.allocator()), prev_record(64, m.allocator())
{
typedef typename regex_iterator_traits<iterator>::iterator_category category;
accumulators = 0;
caccumulators = 0;
loop_starts = 0;
state_count = 0;
estimate_max_state_count(a, b, states, static_cast<category*>(0));
}
template <class iterator, class Allocator>
void _priv_match_data<iterator, Allocator>::set_accumulator_size(unsigned int size)
{
if(size > caccumulators)
{
m_free();
caccumulators = size;
accumulators = i_alloc(temp_match.allocator()).allocate(caccumulators);
BOOST_REGEX_NOEH_ASSERT(accumulators)
loop_starts = it_alloc(temp_match.allocator()).allocate(caccumulators);
BOOST_REGEX_NOEH_ASSERT(loop_starts)
for(unsigned i = 0; i < caccumulators; ++i)
new (loop_starts + i) iterator();
}
}
template <class iterator, class Allocator>
void _priv_match_data<iterator, Allocator>::m_free()
{
if(caccumulators)
{
i_alloc temp1(temp_match.allocator());
temp1.deallocate(accumulators, caccumulators);
for(unsigned i = 0; i < caccumulators; ++i)
::boost::re_detail::pointer_destroy(loop_starts + i);
it_alloc temp2(temp_match.allocator());
temp2.deallocate(loop_starts, caccumulators);
}
}
template <class charT, class traits, class Allocator>
struct access_t : public reg_expression<charT, traits, Allocator>
{
typedef typename is_byte<charT>::width_type width_type;
typedef reg_expression<charT, traits, Allocator> base_type;
typedef charT char_type;
typedef traits traits_type;
typedef Allocator alloc_type;
static int repeat_count(const base_type& b)
{ return base_type::repeat_count(b); }
static unsigned int restart_type(const base_type& b)
{ return base_type::restart_type(b); }
static const re_syntax_base* first(const base_type& b)
{ return base_type::first(b); }
static const unsigned char* get_map(const base_type& b)
{ return base_type::get_map(b); }
static std::size_t leading_length(const base_type& b)
{ return base_type::leading_length(b); }
static const kmp_info<charT>* get_kmp(const base_type& b)
{ return base_type::get_kmp(b); }
static bool can_start(char_type c, const unsigned char* _map, unsigned char mask)
{
return reg_expression<char_type, traits_type, alloc_type>::can_start(c, _map, mask, width_type());
}
};
#if defined(BOOST_REGEX_NO_TEMPLATE_SWITCH_MERGE)
//
// Ugly ugly hack,
// template don't merge if they contain switch statements so declare these
// templates in unnamed namespace (ie with internal linkage), each translation
// unit then gets its own local copy, it works seemlessly but bloats the app.
namespace{
#endif
template <class iterator, class Allocator, class charT, class traits, class Allocator2>
bool query_match_aux(iterator first,
iterator last,
match_results<iterator, Allocator>& m,
const reg_expression<charT, traits, Allocator2>& e,
unsigned flags,
_priv_match_data<iterator, Allocator>& pd,
iterator* restart)
{
typedef access_t<charT, traits, Allocator2> access;
if(e.flags() & regbase::failbit)
return false;
typedef typename traits::size_type traits_size_type;
typedef typename traits::uchar_type traits_uchar_type;
typedef typename is_byte<charT>::width_type width_type;
typedef typename re_detail::regex_iterator_traits<iterator>::difference_type difference_type;
// declare some local aliases to reduce pointer loads
// good optimising compilers should make this unnecessary!!
jstack<match_results_base<iterator, Allocator>, Allocator>& matches = pd.matches;
jstack<iterator, Allocator>& prev_pos = pd.prev_pos;
jstack<const re_syntax_base*, Allocator>& prev_record = pd.prev_record;
jstack<int, Allocator>& prev_acc = pd.prev_acc;
match_results_base<iterator, Allocator>& temp_match = pd.temp_match;
temp_match.set_first(first);
difference_type& state_count = pd.state_count;
const re_syntax_base* ptr = access::first(e);
bool match_found = false;
bool have_partial_match = false;
bool unwind_stack = false;
bool need_push_match = (e.mark_count() > 1);
int cur_acc = -1; // no active accumulator
pd.set_accumulator_size(access::repeat_count(e));
int* accumulators = pd.get_accumulators();
iterator* start_loop = pd.get_loop_starts();
int k; // for loops
bool icase = e.flags() & regbase::icase;
*restart = first;
iterator base = first;
const traits& traits_inst = e.get_traits();
// dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never
// referenced
(void)traits_inst;
// prepare m for failure:
/*
if((flags & match_init) == 0)
{
m.init_fail(first, last);
} */
retry:
while(first != last)
{
jm_assert(ptr);
++state_count;
switch(ptr->type)
{
case syntax_element_match:
match_jump:
{
// match found, save then fallback in case we missed a
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -