⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex_match.hpp

📁 boost库提供标准的C++ API 配合dev c++使用,功能更加强大
💻 HPP
📖 第 1 页 / 共 5 页
字号:
/*
 *
 * Copyright (c) 1998-2002
 * Dr John Maddock
 *
 * Use, modification and distribution are subject to the 
 * Boost Software License, Version 1.0. (See accompanying file 
 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 *
 */

 /*
  *   LOCATION:    see http://www.boost.org for most recent version.
  *   FILE         regex_match.hpp
  *   VERSION      see <boost/version.hpp>
  *   DESCRIPTION: Regular expression matching algorithms.
  *                Note this is an internal header file included
  *                by regex.hpp, do not include on its own.
  */


#ifndef BOOST_REGEX_MATCH_HPP
#define BOOST_REGEX_MATCH_HPP

#ifndef BOOST_REGEX_MAX_STATE_COUNT
#  define BOOST_REGEX_MAX_STATE_COUNT 100000000
#endif

#include <boost/limits.hpp>


namespace boost{
   namespace re_detail{

#ifdef __BORLANDC__
   #pragma option push -a8 -b -Vx -Ve -pc  -w-8026 -w-8027
#endif

//
// Unfortunately Rogue Waves standard library appears to have a bug
// in std::basic_string::compare that results in eroneous answers
// in some cases (tested with Borland C++ 5.1, Rogue Wave lib version
// 0x020101) the test case was:
// {39135,0} < {0xff,0}
// which succeeds when it should not.
//
#ifndef _RWSTD_VER
# define STR_COMP(s,p) s.compare(p)
#else
template <class C, class T, class A>
inline int string_compare(const std::basic_string<C,T,A>& s, const C* p)
{ return s.compare(p); }
inline int string_compare(const std::string& s, const char* p)
{ return std::strcmp(s.c_str(), p); }
# ifndef BOOST_NO_WREGEX
inline int string_compare(const std::wstring& s, const wchar_t* p)
{ return std::wcscmp(s.c_str(), p); }
# endif
# define STR_COMP(s,p) string_compare(s,p)
#endif

template<class charT>
inline const charT* re_skip_past_null(const charT* p)
{
  while (*p != 0) ++p;
  return ++p;
}

template <class iterator, class charT, class traits_type, class Allocator>
iterator BOOST_REGEX_CALL re_is_set_member(iterator next, 
                          iterator last, 
                          const re_set_long* set_, 
                          const reg_expression<charT, traits_type, Allocator>& e)
{   
   const charT* p = reinterpret_cast<const charT*>(set_+1);
   iterator ptr;
   unsigned int i;
   bool icase = e.flags() & regbase::icase;

   if(next == last) return next;

   typedef typename traits_type::string_type traits_string_type;
   const traits_type& traits_inst = e.get_traits();
   
   // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never
   // referenced
   (void)traits_inst;

   // try and match a single character, could be a multi-character
   // collating element...
   for(i = 0; i < set_->csingles; ++i)
   {
      ptr = next;
      if(*p == 0)
      {
         // treat null string as special case:
         if(traits_inst.translate(*ptr, icase) != *p)
         {
            while(*p == 0)++p;
            continue;
         }
         return set_->isnot ? next : (ptr == next) ? ++next : ptr;
      }
      else
      {
         while(*p && (ptr != last))
         {
            if(traits_inst.translate(*ptr, icase) != *p)
               break;
            ++p;
            ++ptr;
         }

         if(*p == 0) // if null we've matched
            return set_->isnot ? next : (ptr == next) ? ++next : ptr;

         p = re_skip_past_null(p);     // skip null
      }
   }

   charT col = traits_inst.translate(*next, icase);


   if(set_->cranges || set_->cequivalents)
   {
      traits_string_type s2(1, col);
      traits_string_type s1;
      //
      // try and match a range, NB only a single character can match
      if(set_->cranges)
      {
         if(e.flags() & regbase::nocollate)
            s1 = s2;
         else
            traits_inst.transform(s1, s2);
         for(i = 0; i < set_->cranges; ++i)
         {
            if(STR_COMP(s1, p) <= 0)
            {
               while(*p)++p;
               ++p;
               if(STR_COMP(s1, p) >= 0)
                  return set_->isnot ? next : ++next;
            }
            else
            {
               // skip first string
               while(*p)++p;
               ++p;
            }
            // skip second string
            while(*p)++p;
            ++p;
         }
      }
      //
      // try and match an equivalence class, NB only a single character can match
      if(set_->cequivalents)
      {
         traits_inst.transform_primary(s1, s2);
         for(i = 0; i < set_->cequivalents; ++i)
         {
            if(STR_COMP(s1, p) == 0)
               return set_->isnot ? next : ++next;
            // skip string
            while(*p)++p;
            ++p;
         }
      }
   }
   if(traits_inst.is_class(col, set_->cclasses) == true)
      return set_->isnot ? next : ++next;
   return set_->isnot ? ++next : next;
}

template <class iterator, class Allocator>
class _priv_match_data
{
public:
   typedef typename boost::detail::rebind_allocator<int, Allocator>::type i_alloc;
   typedef typename boost::detail::rebind_allocator<iterator, Allocator>::type it_alloc;
   typedef typename regex_iterator_traits<iterator>::difference_type difference_type;

   match_results_base<iterator, Allocator> temp_match;
   // failure stacks:
   jstack<match_results_base<iterator, Allocator>, Allocator> matches;
   jstack<iterator, Allocator> prev_pos;
   jstack<const re_syntax_base*, Allocator> prev_record;
   jstack<int, Allocator> prev_acc;
   int* accumulators;
   unsigned int caccumulators;
   difference_type state_count;
   difference_type max_state_count;
   iterator* loop_starts;

   _priv_match_data(const match_results_base<iterator, Allocator>&, iterator, iterator, std::size_t);
   
   ~_priv_match_data()
   {
      m_free();
   }
   void m_free();
   void set_accumulator_size(unsigned int size);
   int* get_accumulators()
   {
      return accumulators;
   }
   iterator* get_loop_starts()
   {
      return loop_starts;
   }
   void estimate_max_state_count(iterator a, iterator b, std::size_t states, std::random_access_iterator_tag*)
   {
      difference_type dist = boost::re_detail::distance(a,b);
      states *= states;
      difference_type lim = std::numeric_limits<difference_type>::max() - 1000 - states;
      if(dist > (difference_type)(lim / states))
         max_state_count = lim;
      else
         max_state_count = 1000 + states * dist;
   }
   void estimate_max_state_count(iterator a, iterator b, std::size_t states, void*)
   {
      // we don't know how long the sequence is:
      max_state_count = BOOST_REGEX_MAX_STATE_COUNT;
   }
};

template <class iterator, class Allocator>
_priv_match_data<iterator, Allocator>::_priv_match_data(const match_results_base<iterator, Allocator>& m, iterator a, iterator b, std::size_t states)
  : temp_match(m), matches(64, m.allocator()), prev_pos(64, m.allocator()), prev_record(64, m.allocator())
{
  typedef typename regex_iterator_traits<iterator>::iterator_category category;
  
  accumulators = 0;
  caccumulators = 0;
  loop_starts = 0;
  state_count = 0;
  estimate_max_state_count(a, b, states, static_cast<category*>(0));
}

template <class iterator, class Allocator>
void _priv_match_data<iterator, Allocator>::set_accumulator_size(unsigned int size)
{
   if(size > caccumulators)
   {
      m_free();
      caccumulators = size;
      accumulators = i_alloc(temp_match.allocator()).allocate(caccumulators);
      BOOST_REGEX_NOEH_ASSERT(accumulators)
      loop_starts = it_alloc(temp_match.allocator()).allocate(caccumulators);
      BOOST_REGEX_NOEH_ASSERT(loop_starts)
      for(unsigned i = 0; i < caccumulators; ++i)
         new (loop_starts + i) iterator();
   }
}

template <class iterator, class Allocator>
void _priv_match_data<iterator, Allocator>::m_free()
{
   if(caccumulators)
   {
      i_alloc temp1(temp_match.allocator());
      temp1.deallocate(accumulators, caccumulators);
      for(unsigned i = 0; i < caccumulators; ++i)
         ::boost::re_detail::pointer_destroy(loop_starts + i);
      it_alloc temp2(temp_match.allocator());
      temp2.deallocate(loop_starts, caccumulators);
   }
}

template <class charT, class traits, class Allocator>
struct access_t : public reg_expression<charT, traits, Allocator>
{
   typedef typename is_byte<charT>::width_type width_type;
   typedef reg_expression<charT, traits, Allocator> base_type;
   typedef charT char_type;
   typedef traits traits_type;
   typedef Allocator alloc_type;

   static int repeat_count(const base_type& b) 
   { return base_type::repeat_count(b); }
   static unsigned int restart_type(const base_type& b) 
   { return base_type::restart_type(b); }
   static const re_syntax_base* first(const base_type& b)
   { return base_type::first(b); }
   static const unsigned char* get_map(const base_type& b)
   { return base_type::get_map(b); }
   static std::size_t leading_length(const base_type& b)
   { return base_type::leading_length(b); }
   static const kmp_info<charT>* get_kmp(const base_type& b)
   { return base_type::get_kmp(b); }
   static bool can_start(char_type c, const unsigned char* _map, unsigned char mask)
   {
      return reg_expression<char_type, traits_type, alloc_type>::can_start(c, _map, mask, width_type());
   }
};


#if defined(BOOST_REGEX_NO_TEMPLATE_SWITCH_MERGE)
//
// Ugly ugly hack,
// template don't merge if they contain switch statements so declare these
// templates in unnamed namespace (ie with internal linkage), each translation
// unit then gets its own local copy, it works seemlessly but bloats the app.
namespace{
#endif

template <class iterator, class Allocator, class charT, class traits, class Allocator2>
bool query_match_aux(iterator first, 
                     iterator last, 
                     match_results<iterator, Allocator>& m, 
                     const reg_expression<charT, traits, Allocator2>& e, 
                     unsigned flags,
                     _priv_match_data<iterator, Allocator>& pd,
                     iterator* restart)
{
   typedef access_t<charT, traits, Allocator2> access;

   if(e.flags() & regbase::failbit)
      return false;

   typedef typename traits::size_type traits_size_type;
   typedef typename traits::uchar_type traits_uchar_type;
   typedef typename is_byte<charT>::width_type width_type;
   typedef typename re_detail::regex_iterator_traits<iterator>::difference_type difference_type;

   // declare some local aliases to reduce pointer loads
   // good optimising compilers should make this unnecessary!!
   jstack<match_results_base<iterator, Allocator>, Allocator>& matches = pd.matches;
   jstack<iterator, Allocator>& prev_pos = pd.prev_pos;
   jstack<const re_syntax_base*, Allocator>& prev_record = pd.prev_record;
   jstack<int, Allocator>& prev_acc = pd.prev_acc;
   match_results_base<iterator, Allocator>& temp_match = pd.temp_match;
   temp_match.set_first(first);
   difference_type& state_count = pd.state_count;

   const re_syntax_base* ptr = access::first(e);
   bool match_found = false;
   bool have_partial_match = false;
   bool unwind_stack = false;
   bool need_push_match = (e.mark_count() > 1);
   int cur_acc = -1;    // no active accumulator
   pd.set_accumulator_size(access::repeat_count(e));
   int* accumulators = pd.get_accumulators();
   iterator* start_loop = pd.get_loop_starts();
   int k; // for loops
   bool icase = e.flags() & regbase::icase;
   *restart = first;
   iterator base = first;
   const traits& traits_inst = e.get_traits();
   // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never
   // referenced
   (void)traits_inst;

   // prepare m for failure:
   /*
   if((flags & match_init) == 0)
   {
      m.init_fail(first, last);
   } */

   retry:

   while(first != last)
   {
      jm_assert(ptr);
      ++state_count;
      switch(ptr->type)
      {
      case syntax_element_match:
         match_jump:
         {
            // match found, save then fallback in case we missed a

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -