regex_match.hpp
来自「CGAL is a collaborative effort of severa」· HPP 代码 · 共 1,914 行 · 第 1/5 页
HPP
1,914 行
/* * * Copyright (c) 1998-2002 * Dr John Maddock * * Use, modification and distribution are subject to the * Boost Software License, Version 1.0. (See accompanying file * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) * */ /* * LOCATION: see http://www.boost.org for most recent version. * FILE regex_match.hpp * VERSION see <boost/version.hpp> * DESCRIPTION: Regular expression matching algorithms. * Note this is an internal header file included * by regex.hpp, do not include on its own. */#ifndef BOOST_REGEX_MATCH_HPP#define BOOST_REGEX_MATCH_HPP#ifndef BOOST_REGEX_MAX_STATE_COUNT# define BOOST_REGEX_MAX_STATE_COUNT 100000000#endif#include <boost/limits.hpp>namespace boost{ namespace re_detail{#ifdef __BORLANDC__ #pragma option push -a8 -b -Vx -Ve -pc -w-8026 -w-8027#endif//// Unfortunately Rogue Waves standard library appears to have a bug// in std::basic_string::compare that results in eroneous answers// in some cases (tested with Borland C++ 5.1, Rogue Wave lib version// 0x020101) the test case was:// {39135,0} < {0xff,0}// which succeeds when it should not.//#ifndef _RWSTD_VER# define STR_COMP(s,p) s.compare(p)#elsetemplate <class C, class T, class A>inline int string_compare(const std::basic_string<C,T,A>& s, const C* p){ return s.compare(p); }inline int string_compare(const std::string& s, const char* p){ return std::strcmp(s.c_str(), p); }# ifndef BOOST_NO_WREGEXinline int string_compare(const std::wstring& s, const wchar_t* p){ return std::wcscmp(s.c_str(), p); }# endif# define STR_COMP(s,p) string_compare(s,p)#endiftemplate<class charT>inline const charT* re_skip_past_null(const charT* p){ while (*p != 0) ++p; return ++p;}template <class iterator, class charT, class traits_type, class Allocator>iterator BOOST_REGEX_CALL re_is_set_member(iterator next, iterator last, const re_set_long* set_, const reg_expression<charT, traits_type, Allocator>& e){ const charT* p = reinterpret_cast<const charT*>(set_+1); iterator ptr; unsigned int i; bool icase = e.flags() & regbase::icase; if(next == last) return next; typedef typename traits_type::string_type traits_string_type; const traits_type& traits_inst = e.get_traits(); // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never // referenced (void)traits_inst; // try and match a single character, could be a multi-character // collating element... for(i = 0; i < set_->csingles; ++i) { ptr = next; if(*p == 0) { // treat null string as special case: if(traits_inst.translate(*ptr, icase) != *p) { while(*p == 0)++p; continue; } return set_->isnot ? next : (ptr == next) ? ++next : ptr; } else { while(*p && (ptr != last)) { if(traits_inst.translate(*ptr, icase) != *p) break; ++p; ++ptr; } if(*p == 0) // if null we've matched return set_->isnot ? next : (ptr == next) ? ++next : ptr; p = re_skip_past_null(p); // skip null } } charT col = traits_inst.translate(*next, icase); if(set_->cranges || set_->cequivalents) { traits_string_type s2(1, col); traits_string_type s1; // // try and match a range, NB only a single character can match if(set_->cranges) { if(e.flags() & regbase::nocollate) s1 = s2; else traits_inst.transform(s1, s2); for(i = 0; i < set_->cranges; ++i) { if(STR_COMP(s1, p) <= 0) { while(*p)++p; ++p; if(STR_COMP(s1, p) >= 0) return set_->isnot ? next : ++next; } else { // skip first string while(*p)++p; ++p; } // skip second string while(*p)++p; ++p; } } // // try and match an equivalence class, NB only a single character can match if(set_->cequivalents) { traits_inst.transform_primary(s1, s2); for(i = 0; i < set_->cequivalents; ++i) { if(STR_COMP(s1, p) == 0) return set_->isnot ? next : ++next; // skip string while(*p)++p; ++p; } } } if(traits_inst.is_class(col, set_->cclasses) == true) return set_->isnot ? next : ++next; return set_->isnot ? ++next : next;}template <class iterator, class Allocator>class _priv_match_data{public: typedef typename boost::detail::rebind_allocator<int, Allocator>::type i_alloc; typedef typename boost::detail::rebind_allocator<iterator, Allocator>::type it_alloc; typedef typename regex_iterator_traits<iterator>::difference_type difference_type; match_results_base<iterator, Allocator> temp_match; // failure stacks: jstack<match_results_base<iterator, Allocator>, Allocator> matches; jstack<iterator, Allocator> prev_pos; jstack<const re_syntax_base*, Allocator> prev_record; jstack<int, Allocator> prev_acc; int* accumulators; unsigned int caccumulators; difference_type state_count; difference_type max_state_count; iterator* loop_starts; _priv_match_data(const match_results_base<iterator, Allocator>&, iterator, iterator, std::size_t); ~_priv_match_data() { m_free(); } void m_free(); void set_accumulator_size(unsigned int size); int* get_accumulators() { return accumulators; } iterator* get_loop_starts() { return loop_starts; } void estimate_max_state_count(iterator a, iterator b, std::size_t states, std::random_access_iterator_tag*) { difference_type dist = boost::re_detail::distance(a,b); states *= states; difference_type lim = (std::numeric_limits<difference_type>::max)() - 1000 - states; if(dist > (difference_type)(lim / states)) max_state_count = lim; else max_state_count = 1000 + states * dist; } void estimate_max_state_count(iterator a, iterator b, std::size_t states, void*) { // we don't know how long the sequence is: max_state_count = BOOST_REGEX_MAX_STATE_COUNT; }};template <class iterator, class Allocator>_priv_match_data<iterator, Allocator>::_priv_match_data(const match_results_base<iterator, Allocator>& m, iterator a, iterator b, std::size_t states) : temp_match(m), matches(64, m.allocator()), prev_pos(64, m.allocator()), prev_record(64, m.allocator()){ typedef typename regex_iterator_traits<iterator>::iterator_category category; accumulators = 0; caccumulators = 0; loop_starts = 0; state_count = 0; estimate_max_state_count(a, b, states, static_cast<category*>(0));}template <class iterator, class Allocator>void _priv_match_data<iterator, Allocator>::set_accumulator_size(unsigned int size){ if(size > caccumulators) { m_free(); caccumulators = size; accumulators = i_alloc(temp_match.allocator()).allocate(caccumulators); BOOST_REGEX_NOEH_ASSERT(accumulators) loop_starts = it_alloc(temp_match.allocator()).allocate(caccumulators); BOOST_REGEX_NOEH_ASSERT(loop_starts) for(unsigned i = 0; i < caccumulators; ++i) new (loop_starts + i) iterator(); }}template <class iterator, class Allocator>void _priv_match_data<iterator, Allocator>::m_free(){ if(caccumulators) { i_alloc temp1(temp_match.allocator()); temp1.deallocate(accumulators, caccumulators); for(unsigned i = 0; i < caccumulators; ++i) ::boost::re_detail::pointer_destroy(loop_starts + i); it_alloc temp2(temp_match.allocator()); temp2.deallocate(loop_starts, caccumulators); }}template <class charT, class traits, class Allocator>struct access_t : public reg_expression<charT, traits, Allocator>{ typedef typename is_byte<charT>::width_type width_type; typedef reg_expression<charT, traits, Allocator> base_type; typedef charT char_type; typedef traits traits_type; typedef Allocator alloc_type; static int repeat_count(const base_type& b) { return base_type::repeat_count(b); } static unsigned int restart_type(const base_type& b) { return base_type::restart_type(b); } static const re_syntax_base* first(const base_type& b) { return base_type::first(b); } static const unsigned char* get_map(const base_type& b) { return base_type::get_map(b); } static std::size_t leading_length(const base_type& b) { return base_type::leading_length(b); } static const kmp_info<charT>* get_kmp(const base_type& b) { return base_type::get_kmp(b); } static bool can_start(char_type c, const unsigned char* _map, unsigned char mask) { return reg_expression<char_type, traits_type, alloc_type>::can_start(c, _map, mask, width_type()); }};#if defined(BOOST_REGEX_NO_TEMPLATE_SWITCH_MERGE)//// Ugly ugly hack,// template don't merge if they contain switch statements so declare these// templates in unnamed namespace (ie with internal linkage), each translation// unit then gets its own local copy, it works seemlessly but bloats the app.namespace{#endiftemplate <class iterator, class Allocator, class charT, class traits, class Allocator2>bool query_match_aux(iterator first, iterator last, match_results<iterator, Allocator>& m, const reg_expression<charT, traits, Allocator2>& e, unsigned flags, _priv_match_data<iterator, Allocator>& pd, iterator* restart){ typedef access_t<charT, traits, Allocator2> access; if(e.flags() & regbase::failbit) return false; typedef typename traits::size_type traits_size_type; typedef typename traits::uchar_type traits_uchar_type; typedef typename is_byte<charT>::width_type width_type; typedef typename re_detail::regex_iterator_traits<iterator>::difference_type difference_type; // declare some local aliases to reduce pointer loads // good optimising compilers should make this unnecessary!! jstack<match_results_base<iterator, Allocator>, Allocator>& matches = pd.matches; jstack<iterator, Allocator>& prev_pos = pd.prev_pos; jstack<const re_syntax_base*, Allocator>& prev_record = pd.prev_record; jstack<int, Allocator>& prev_acc = pd.prev_acc; match_results_base<iterator, Allocator>& temp_match = pd.temp_match; temp_match.set_first(first); difference_type& state_count = pd.state_count; const re_syntax_base* ptr = access::first(e); bool match_found = false; bool have_partial_match = false; bool unwind_stack = false; bool need_push_match = (e.mark_count() > 1); int cur_acc = -1; // no active accumulator pd.set_accumulator_size(access::repeat_count(e)); int* accumulators = pd.get_accumulators(); iterator* start_loop = pd.get_loop_starts(); int k; // for loops bool icase = e.flags() & regbase::icase; *restart = first; iterator base = first; const traits& traits_inst = e.get_traits(); // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never // referenced (void)traits_inst; // prepare m for failure: /* if((flags & match_init) == 0) { m.init_fail(first, last); } */ retry: while(first != last) { jm_assert(ptr); ++state_count; switch(ptr->type) { case syntax_element_match: match_jump: { // match found, save then fallback in case we missed a // longer one. if((flags & match_not_null) && (first == temp_match[0].first)) goto failure; if((flags & match_all) && (first != last)) goto failure; temp_match.set_second(first); m.maybe_assign(temp_match); match_found = true; if(((flags & match_any) && ((first == last) || !(flags & match_all))) || ((first == last) && (need_push_match == false)))
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?