⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex_compile.hpp

📁 boost库提供标准的C++ API 配合dev c++使用,功能更加强大
💻 HPP
📖 第 1 页 / 共 5 页
字号:
         if(traits_inst.is_class(*first, traits_type::char_class_xdigit) == false)
         {
            fail(REG_BADBR);
            break;
         }
         c = (charT)traits_inst.toi(first, last, -16);
      }
      break;
   case traits_type::syntax_c:
      ++first;
      if(first == last)
      {
         fail(REG_EESCAPE);
         break;
      }
      if(((traits_uchar_type)(*first) < (traits_uchar_type)'@')
            || ((traits_uchar_type)(*first) > (traits_uchar_type)127) )
      {
         fail(REG_EESCAPE);
         return (charT)0;
      }
      c = (charT)((traits_uchar_type)(*first) - (traits_uchar_type)'@');
      ++first;
      break;
   case traits_type::syntax_e:
      c = (charT)27;
      ++first;
      break;
   case traits_type::syntax_digit:
      c = (charT)traits_inst.toi(first, last, -8);
      break;
   default:
      //c = *first;
      ++first;
   }
   return c;
}

template <class charT, class traits, class Allocator>
void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_maps()
{
   re_detail::re_syntax_base* record = static_cast<re_detail::re_syntax_base*>(data.data());
   // always compile the first _map:
   std::memset(startmap, 0, 256);
   record->can_be_null = 0;
   compile_map(record, startmap, 0, re_detail::mask_all);

   while(record->type != re_detail::syntax_element_match)
   {
      if((record->type == re_detail::syntax_element_alt) || (record->type == re_detail::syntax_element_rep))
      {
         std::memset(&(static_cast<re_detail::re_jump*>(record)->_map), 0, 256);
         record->can_be_null = 0;
         compile_map(record->next.p, static_cast<re_detail::re_jump*>(record)->_map, &(record->can_be_null), re_detail::mask_take, static_cast<re_detail::re_jump*>(record)->alt.p);
         compile_map(static_cast<re_detail::re_jump*>(record)->alt.p, static_cast<re_detail::re_jump*>(record)->_map, &(record->can_be_null), re_detail::mask_skip);
         if(record->type == re_detail::syntax_element_rep)
         {
            re_detail::re_repeat* rep = static_cast<re_detail::re_repeat*>(record);
            // set whether this is a singleton repeat or not:
            if(rep->next.p->next.p->next.p == rep->alt.p)
            {
               rep->singleton = true;
            }
            else
               rep->singleton = false;
         }
      }
      else
      {
         record->can_be_null = 0;
         compile_map(record, 0, &(record->can_be_null), re_detail::mask_all);
      }
      record = record->next.p;
   }
   record->can_be_null = re_detail::mask_all;
}

template <class charT, class traits, class Allocator>
bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start(
                        re_detail::re_syntax_base* node, charT cc, re_detail::re_syntax_base* terminal) const
{
   unsigned int c;

   switch(node->type)
   {
   case re_detail::syntax_element_startmark:
      if(static_cast<const re_detail::re_brace*>(node)->index == -1)
      {
         return probe_start(node->next.p->next.p, cc, terminal)
            && probe_start(static_cast<const re_detail::re_jump*>(node->next.p)->alt.p, cc, terminal);
      }
      // fall through:
   case re_detail::syntax_element_endmark:
   case re_detail::syntax_element_start_line:
   case re_detail::syntax_element_word_boundary:
   case re_detail::syntax_element_buffer_start:
   case re_detail::syntax_element_restart_continue:
      // doesn't tell us anything about the next character, so:
      return probe_start(node->next.p, cc, terminal);
   case re_detail::syntax_element_literal:
      // only the first character of the literal can match:
      // note these have already been translated:
      if(*reinterpret_cast<charT*>(static_cast<re_detail::re_literal*>(node)+1) == traits_inst.translate(cc, (_flags & regbase::icase)))
         return true;
      return false;
   case re_detail::syntax_element_end_line:
      // next character (if there is one!) must be a newline:
      if(traits_inst.is_separator(traits_inst.translate(cc, (_flags & regbase::icase))))
         return true;
      return false;
   case re_detail::syntax_element_wild:
      return true;
   case re_detail::syntax_element_match:
      return true;
   case re_detail::syntax_element_within_word:
   case re_detail::syntax_element_word_start:
      return traits_inst.is_class(traits_inst.translate(cc, (_flags & regbase::icase)), traits_type::char_class_word);
   case re_detail::syntax_element_word_end:
      // what follows must not be a word character,
      return traits_inst.is_class(traits_inst.translate(cc, (_flags & regbase::icase)), traits_type::char_class_word) ? false : true;
   case re_detail::syntax_element_buffer_end:
      // we can be null, nothing must follow,
      // NB we assume that this is followed by
      // re_detail::syntax_element_match, if its not then we can
      // never match anything anyway!!
      return false;
   case re_detail::syntax_element_soft_buffer_end:
      // we can be null, only newlines must follow,
      // NB we assume that this is followed by
      // re_detail::syntax_element_match, if its not then we can
      // never match anything anyway!!
      return traits_inst.is_separator(traits_inst.translate(cc, (_flags & regbase::icase)));
   case re_detail::syntax_element_backref:
      // there's no easy way to determine this
      // which is not to say it can't be done!
      // for now:
      return true;
   case re_detail::syntax_element_long_set:
      // we can not be null,
      // we need to add already translated values in the set
      // to values in the _map
      return re_detail::re_maybe_set_member(cc, static_cast<const re_detail::re_set_long*>(node), *this) || (re_detail::re_is_set_member(static_cast<const charT*>(&cc), static_cast<const charT*>(&cc+1), static_cast<re_detail::re_set_long*>(node), *this) != &cc);
   case re_detail::syntax_element_set:
      // set all the elements that are set in corresponding set:
      c = (traits_size_type)(traits_uchar_type)traits_inst.translate(cc, (_flags & regbase::icase));
      return static_cast<re_detail::re_set*>(node)->_map[c] != 0;
   case re_detail::syntax_element_jump:
      if(static_cast<re_detail::re_jump*>(node)->alt.p < node)
      {
         // backwards jump,
         // caused only by end of repeat section, we'll treat this
         // the same as a match, because the sub-expression has matched.
         if(node->next.p == terminal)
            return true; // null repeat - we can always take this
         else
         {
            //
            // take the jump, add in fix for the fact that if the
            // repeat that we're jumping to has non-zero minimum count
            // then we need to add in the possiblity that we could still
            // skip that repeat.
            re_detail::re_syntax_base* next = static_cast<re_detail::re_jump*>(node)->alt.p;
            bool b = probe_start(next, cc, terminal);
            if((next->type == re_detail::syntax_element_rep) && (static_cast<re_detail::re_repeat*>(next)->min != 0))
            {
               b = b || probe_start(static_cast<re_detail::re_jump*>(next)->alt.p, cc, terminal);
            }
            return b;
         }
      }
      else
         // take the jump and compile:
         return probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal);
   case re_detail::syntax_element_alt:
      // we need to take the OR of the two alternatives:
      return probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal) || probe_start(node->next.p, cc, terminal);
   case re_detail::syntax_element_rep:
      // we need to take the OR of the two alternatives
      if(static_cast<re_detail::re_repeat*>(node)->min == 0)
         return probe_start(node->next.p, cc, static_cast<re_detail::re_jump*>(node)->alt.p) || probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal);
      else
         return probe_start(node->next.p, cc, static_cast<re_detail::re_jump*>(node)->alt.p);
   case re_detail::syntax_element_combining:
      return !traits_inst.is_combining(traits_inst.translate(cc, (_flags & regbase::icase)));
   }
   return false;
}

template <class charT, class traits, class Allocator>
bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start_null(re_detail::re_syntax_base* node, re_detail::re_syntax_base* terminal)const
{
   switch(node->type)
   {
   case re_detail::syntax_element_startmark:
   case re_detail::syntax_element_endmark:
   case re_detail::syntax_element_start_line:
   case re_detail::syntax_element_word_boundary:
   case re_detail::syntax_element_buffer_start:
   case re_detail::syntax_element_restart_continue:
   case re_detail::syntax_element_end_line:
   case re_detail::syntax_element_word_end:
      // doesn't tell us anything about the next character, so:
      return probe_start_null(node->next.p, terminal);
   case re_detail::syntax_element_match:
   case re_detail::syntax_element_buffer_end:
   case re_detail::syntax_element_soft_buffer_end:
   case re_detail::syntax_element_backref:
      return true;
   case re_detail::syntax_element_jump:
      if(static_cast<re_detail::re_jump*>(node)->alt.p < node)
      {
         // backwards jump,
         // caused only by end of repeat section, we'll treat this
         // the same as a match, because the sub-expression has matched.
         // this is only caused by NULL repeats as in "(a*)*" or "(\<)*"
         // these are really nonsensence and make the matching code much
         // harder, it would be nice to get rid of them altogether.
         if(node->next.p == terminal)
            return true;
         else
            return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);
      }
      else
         // take the jump and compile:
         return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);
   case re_detail::syntax_element_alt:
      // we need to take the OR of the two alternatives:
      return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal) || probe_start_null(node->next.p, terminal);
   case re_detail::syntax_element_rep:
      // only need to consider skipping the repeat:
      return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);
   default:
      break;
   }
   return false;
}

template <class charT, class traits, class Allocator>
void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_map(
                        re_detail::re_syntax_base* node, unsigned char* _map,
                        unsigned int* pnull, unsigned char mask, re_detail::re_syntax_base* terminal)const
{
   if(_map)
   {
      for(unsigned int i = 0; i < 256; ++i)
      {
         if(probe_start(node, (charT)i, terminal))
            _map[i] |= mask;
      }
   }
   if(pnull && probe_start_null(node, terminal))
      *pnull |= mask;
}
  
template <class charT, class traits, class Allocator>
void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::move_offsets(re_detail::re_syntax_base* j, unsigned size)
{
# ifdef BOOST_MSVC
#  pragma warning(push)
#  pragma warning(disable: 4127)
#endif
   // move all offsets starting with j->link forward by size
   // called after an insert:
   j = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + j->next.i);
   while(true)
   {
      switch(j->type)
      {
      case re_detail::syntax_element_rep:
         static_cast<re_detail::re_jump*>(j)->alt.i += size;
         j->next.i += size;
         break;
      case re_detail::syntax_element_jump:
      case re_detail::syntax_element_alt:
         static_cast<re_detail::re_jump*>(j)->alt.i += size;
         j->next.i += size;
         break;
      default:
         j->next.i += size;
         break;
      }
      if(j->next.i == size)
         break;
      j = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + j->next.i);
   }
# ifdef BOOST_MSVC
#  pragma warning(pop)
#endif
}

template <class charT, class traits, class Allocator>
re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set_simple(re_detail::re_syntax_base* dat, unsigned long cls, bool isnot)
{
   typedef typename re_detail::is_byte<charT>::width_type width_type;
   re_detail::jstack<traits_string_type, Allocator> singles(64, data.allocator());
   re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator());
   re_detail::jstack<boost::uint_fast32_t, Allocator> classes(64, data.allocator());
   re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator());
   classes.push(cls);
   if(dat)
   {
      data.align();
      dat->next.i = data.size();
   }
   return compile_set_aux(singles, ranges, classes, equivalents, isnot, width_type());
}

template <class charT, class traits, class Allocator>
re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set(const charT*& first, const charT* last)
{
   re_detail::jstack<traits_string_type, Allocator> singles(64, data.allocator());
   re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator());
   re_detail::jstack<boost::uint_fast32_t, Allocator> classes(64, data.allocator());
   re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator());
   bool has_digraphs = false;
   jm_assert(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_open_set);
   ++first;
   bool started = false;
   bool done = false;
   bool isnot = false;

   enum last_type
   {
      last_single,
      last_none,
      last_dash
   };

   unsigned l = last_none;
   traits_string_type s;

   while((first != last) && !done)
   {
      traits_size_type c = (traits_size_type)(traits_uchar_type)*first;
      // this is only used for the switch(), but cannot be folded in
      // due to a bug in Comeau 4.2.44beta3
      traits_size_type syntax = traits_inst.syntax_type(c);
      switch(syntax)
      {
      case traits_type::syntax_caret:
         if(!started && !isnot)
         {
            isnot = true;
         }
         else
         {
            s = (charT)c;
            goto char_set_literal;
         }
         break;
      case traits_type::syntax_open_set:
      {
         if((_flags & char_classes) == 0)
         {
            s = (charT)c;
            goto char_set_literal;
         }
         // check to see if we really have a class:
         const charT* base = first;
         // this is only used for the switch(), but cannot be folded in
         // due to a bug in Comeau 4.2.44beta3

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -