regex_match.hpp

来自「CGAL is a collaborative effort of severa」· HPP 代码 · 共 1,914 行 · 第 1/5 页

HPP
1,914
字号
/* * * Copyright (c) 1998-2002 * Dr John Maddock * * Use, modification and distribution are subject to the  * Boost Software License, Version 1.0. (See accompanying file  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) * */ /*  *   LOCATION:    see http://www.boost.org for most recent version.  *   FILE         regex_match.hpp  *   VERSION      see <boost/version.hpp>  *   DESCRIPTION: Regular expression matching algorithms.  *                Note this is an internal header file included  *                by regex.hpp, do not include on its own.  */#ifndef BOOST_REGEX_MATCH_HPP#define BOOST_REGEX_MATCH_HPP#ifndef BOOST_REGEX_MAX_STATE_COUNT#  define BOOST_REGEX_MAX_STATE_COUNT 100000000#endif#include <boost/limits.hpp>namespace boost{   namespace re_detail{#ifdef __BORLANDC__   #pragma option push -a8 -b -Vx -Ve -pc  -w-8026 -w-8027#endif//// Unfortunately Rogue Waves standard library appears to have a bug// in std::basic_string::compare that results in eroneous answers// in some cases (tested with Borland C++ 5.1, Rogue Wave lib version// 0x020101) the test case was:// {39135,0} < {0xff,0}// which succeeds when it should not.//#ifndef _RWSTD_VER# define STR_COMP(s,p) s.compare(p)#elsetemplate <class C, class T, class A>inline int string_compare(const std::basic_string<C,T,A>& s, const C* p){ return s.compare(p); }inline int string_compare(const std::string& s, const char* p){ return std::strcmp(s.c_str(), p); }# ifndef BOOST_NO_WREGEXinline int string_compare(const std::wstring& s, const wchar_t* p){ return std::wcscmp(s.c_str(), p); }# endif# define STR_COMP(s,p) string_compare(s,p)#endiftemplate<class charT>inline const charT* re_skip_past_null(const charT* p){  while (*p != 0) ++p;  return ++p;}template <class iterator, class charT, class traits_type, class Allocator>iterator BOOST_REGEX_CALL re_is_set_member(iterator next,                           iterator last,                           const re_set_long* set_,                           const reg_expression<charT, traits_type, Allocator>& e){      const charT* p = reinterpret_cast<const charT*>(set_+1);   iterator ptr;   unsigned int i;   bool icase = e.flags() & regbase::icase;   if(next == last) return next;   typedef typename traits_type::string_type traits_string_type;   const traits_type& traits_inst = e.get_traits();      // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never   // referenced   (void)traits_inst;   // try and match a single character, could be a multi-character   // collating element...   for(i = 0; i < set_->csingles; ++i)   {      ptr = next;      if(*p == 0)      {         // treat null string as special case:         if(traits_inst.translate(*ptr, icase) != *p)         {            while(*p == 0)++p;            continue;         }         return set_->isnot ? next : (ptr == next) ? ++next : ptr;      }      else      {         while(*p && (ptr != last))         {            if(traits_inst.translate(*ptr, icase) != *p)               break;            ++p;            ++ptr;         }         if(*p == 0) // if null we've matched            return set_->isnot ? next : (ptr == next) ? ++next : ptr;         p = re_skip_past_null(p);     // skip null      }   }   charT col = traits_inst.translate(*next, icase);   if(set_->cranges || set_->cequivalents)   {      traits_string_type s2(1, col);      traits_string_type s1;      //      // try and match a range, NB only a single character can match      if(set_->cranges)      {         if(e.flags() & regbase::nocollate)            s1 = s2;         else            traits_inst.transform(s1, s2);         for(i = 0; i < set_->cranges; ++i)         {            if(STR_COMP(s1, p) <= 0)            {               while(*p)++p;               ++p;               if(STR_COMP(s1, p) >= 0)                  return set_->isnot ? next : ++next;            }            else            {               // skip first string               while(*p)++p;               ++p;            }            // skip second string            while(*p)++p;            ++p;         }      }      //      // try and match an equivalence class, NB only a single character can match      if(set_->cequivalents)      {         traits_inst.transform_primary(s1, s2);         for(i = 0; i < set_->cequivalents; ++i)         {            if(STR_COMP(s1, p) == 0)               return set_->isnot ? next : ++next;            // skip string            while(*p)++p;            ++p;         }      }   }   if(traits_inst.is_class(col, set_->cclasses) == true)      return set_->isnot ? next : ++next;   return set_->isnot ? ++next : next;}template <class iterator, class Allocator>class _priv_match_data{public:   typedef typename boost::detail::rebind_allocator<int, Allocator>::type i_alloc;   typedef typename boost::detail::rebind_allocator<iterator, Allocator>::type it_alloc;   typedef typename regex_iterator_traits<iterator>::difference_type difference_type;   match_results_base<iterator, Allocator> temp_match;   // failure stacks:   jstack<match_results_base<iterator, Allocator>, Allocator> matches;   jstack<iterator, Allocator> prev_pos;   jstack<const re_syntax_base*, Allocator> prev_record;   jstack<int, Allocator> prev_acc;   int* accumulators;   unsigned int caccumulators;   difference_type state_count;   difference_type max_state_count;   iterator* loop_starts;   _priv_match_data(const match_results_base<iterator, Allocator>&, iterator, iterator, std::size_t);      ~_priv_match_data()   {      m_free();   }   void m_free();   void set_accumulator_size(unsigned int size);   int* get_accumulators()   {      return accumulators;   }   iterator* get_loop_starts()   {      return loop_starts;   }   void estimate_max_state_count(iterator a, iterator b, std::size_t states, std::random_access_iterator_tag*)   {      difference_type dist = boost::re_detail::distance(a,b);      states *= states;      difference_type lim = (std::numeric_limits<difference_type>::max)() - 1000 - states;      if(dist > (difference_type)(lim / states))         max_state_count = lim;      else         max_state_count = 1000 + states * dist;   }   void estimate_max_state_count(iterator a, iterator b, std::size_t states, void*)   {      // we don't know how long the sequence is:      max_state_count = BOOST_REGEX_MAX_STATE_COUNT;   }};template <class iterator, class Allocator>_priv_match_data<iterator, Allocator>::_priv_match_data(const match_results_base<iterator, Allocator>& m, iterator a, iterator b, std::size_t states)  : temp_match(m), matches(64, m.allocator()), prev_pos(64, m.allocator()), prev_record(64, m.allocator()){  typedef typename regex_iterator_traits<iterator>::iterator_category category;    accumulators = 0;  caccumulators = 0;  loop_starts = 0;  state_count = 0;  estimate_max_state_count(a, b, states, static_cast<category*>(0));}template <class iterator, class Allocator>void _priv_match_data<iterator, Allocator>::set_accumulator_size(unsigned int size){   if(size > caccumulators)   {      m_free();      caccumulators = size;      accumulators = i_alloc(temp_match.allocator()).allocate(caccumulators);      BOOST_REGEX_NOEH_ASSERT(accumulators)      loop_starts = it_alloc(temp_match.allocator()).allocate(caccumulators);      BOOST_REGEX_NOEH_ASSERT(loop_starts)      for(unsigned i = 0; i < caccumulators; ++i)         new (loop_starts + i) iterator();   }}template <class iterator, class Allocator>void _priv_match_data<iterator, Allocator>::m_free(){   if(caccumulators)   {      i_alloc temp1(temp_match.allocator());      temp1.deallocate(accumulators, caccumulators);      for(unsigned i = 0; i < caccumulators; ++i)         ::boost::re_detail::pointer_destroy(loop_starts + i);      it_alloc temp2(temp_match.allocator());      temp2.deallocate(loop_starts, caccumulators);   }}template <class charT, class traits, class Allocator>struct access_t : public reg_expression<charT, traits, Allocator>{   typedef typename is_byte<charT>::width_type width_type;   typedef reg_expression<charT, traits, Allocator> base_type;   typedef charT char_type;   typedef traits traits_type;   typedef Allocator alloc_type;   static int repeat_count(const base_type& b)    { return base_type::repeat_count(b); }   static unsigned int restart_type(const base_type& b)    { return base_type::restart_type(b); }   static const re_syntax_base* first(const base_type& b)   { return base_type::first(b); }   static const unsigned char* get_map(const base_type& b)   { return base_type::get_map(b); }   static std::size_t leading_length(const base_type& b)   { return base_type::leading_length(b); }   static const kmp_info<charT>* get_kmp(const base_type& b)   { return base_type::get_kmp(b); }   static bool can_start(char_type c, const unsigned char* _map, unsigned char mask)   {      return reg_expression<char_type, traits_type, alloc_type>::can_start(c, _map, mask, width_type());   }};#if defined(BOOST_REGEX_NO_TEMPLATE_SWITCH_MERGE)//// Ugly ugly hack,// template don't merge if they contain switch statements so declare these// templates in unnamed namespace (ie with internal linkage), each translation// unit then gets its own local copy, it works seemlessly but bloats the app.namespace{#endiftemplate <class iterator, class Allocator, class charT, class traits, class Allocator2>bool query_match_aux(iterator first,                      iterator last,                      match_results<iterator, Allocator>& m,                      const reg_expression<charT, traits, Allocator2>& e,                      unsigned flags,                     _priv_match_data<iterator, Allocator>& pd,                     iterator* restart){   typedef access_t<charT, traits, Allocator2> access;   if(e.flags() & regbase::failbit)      return false;   typedef typename traits::size_type traits_size_type;   typedef typename traits::uchar_type traits_uchar_type;   typedef typename is_byte<charT>::width_type width_type;   typedef typename re_detail::regex_iterator_traits<iterator>::difference_type difference_type;   // declare some local aliases to reduce pointer loads   // good optimising compilers should make this unnecessary!!   jstack<match_results_base<iterator, Allocator>, Allocator>& matches = pd.matches;   jstack<iterator, Allocator>& prev_pos = pd.prev_pos;   jstack<const re_syntax_base*, Allocator>& prev_record = pd.prev_record;   jstack<int, Allocator>& prev_acc = pd.prev_acc;   match_results_base<iterator, Allocator>& temp_match = pd.temp_match;   temp_match.set_first(first);   difference_type& state_count = pd.state_count;   const re_syntax_base* ptr = access::first(e);   bool match_found = false;   bool have_partial_match = false;   bool unwind_stack = false;   bool need_push_match = (e.mark_count() > 1);   int cur_acc = -1;    // no active accumulator   pd.set_accumulator_size(access::repeat_count(e));   int* accumulators = pd.get_accumulators();   iterator* start_loop = pd.get_loop_starts();   int k; // for loops   bool icase = e.flags() & regbase::icase;   *restart = first;   iterator base = first;   const traits& traits_inst = e.get_traits();   // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never   // referenced   (void)traits_inst;   // prepare m for failure:   /*   if((flags & match_init) == 0)   {      m.init_fail(first, last);   } */   retry:   while(first != last)   {      jm_assert(ptr);      ++state_count;      switch(ptr->type)      {      case syntax_element_match:         match_jump:         {            // match found, save then fallback in case we missed a            // longer one.            if((flags & match_not_null) && (first == temp_match[0].first))               goto failure;            if((flags & match_all) && (first != last))               goto failure;            temp_match.set_second(first);            m.maybe_assign(temp_match);            match_found = true;            if(((flags & match_any) && ((first == last) || !(flags & match_all))) || ((first == last) && (need_push_match == false)))

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?