regex_compile.hpp

来自「CGAL is a collaborative effort of severa」· HPP 代码 · 共 1,861 行 · 第 1/5 页

HPP
1,861
字号
         fail(REG_EESCAPE);         break;      }      if(((traits_uchar_type)(*first) < (traits_uchar_type)'@')            || ((traits_uchar_type)(*first) > (traits_uchar_type)127) )      {         fail(REG_EESCAPE);         return (charT)0;      }      c = (charT)((traits_uchar_type)(*first) - (traits_uchar_type)'@');      ++first;      break;   case traits_type::syntax_e:      c = (charT)27;      ++first;      break;   case traits_type::syntax_digit:      c = (charT)traits_inst.toi(first, last, -8);      break;   default:      //c = *first;      ++first;   }   return c;}template <class charT, class traits, class Allocator>void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_maps(){   re_detail::re_syntax_base* record = static_cast<re_detail::re_syntax_base*>(data.data());   // always compile the first _map:   std::memset(startmap, 0, 256);   record->can_be_null = 0;   compile_map(record, startmap, 0, re_detail::mask_all);   while(record->type != re_detail::syntax_element_match)   {      if((record->type == re_detail::syntax_element_alt) || (record->type == re_detail::syntax_element_rep))      {         std::memset(&(static_cast<re_detail::re_jump*>(record)->_map), 0, 256);         record->can_be_null = 0;         compile_map(record->next.p, static_cast<re_detail::re_jump*>(record)->_map, &(record->can_be_null), re_detail::mask_take, static_cast<re_detail::re_jump*>(record)->alt.p);         compile_map(static_cast<re_detail::re_jump*>(record)->alt.p, static_cast<re_detail::re_jump*>(record)->_map, &(record->can_be_null), re_detail::mask_skip);         if(record->type == re_detail::syntax_element_rep)         {            re_detail::re_repeat* rep = static_cast<re_detail::re_repeat*>(record);            // set whether this is a singleton repeat or not:            if(rep->next.p->next.p->next.p == rep->alt.p)            {               rep->singleton = true;            }            else               rep->singleton = false;         }      }      else      {         record->can_be_null = 0;         compile_map(record, 0, &(record->can_be_null), re_detail::mask_all);      }      record = record->next.p;   }   record->can_be_null = re_detail::mask_all;}template <class charT, class traits, class Allocator>bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start(                        re_detail::re_syntax_base* node, charT cc, re_detail::re_syntax_base* terminal) const{   unsigned int c;   switch(node->type)   {   case re_detail::syntax_element_startmark:      if(static_cast<const re_detail::re_brace*>(node)->index == -1)      {         return probe_start(node->next.p->next.p, cc, terminal)            && probe_start(static_cast<const re_detail::re_jump*>(node->next.p)->alt.p, cc, terminal);      }      // fall through:   case re_detail::syntax_element_endmark:   case re_detail::syntax_element_start_line:   case re_detail::syntax_element_word_boundary:   case re_detail::syntax_element_buffer_start:   case re_detail::syntax_element_restart_continue:      // doesn't tell us anything about the next character, so:      return probe_start(node->next.p, cc, terminal);   case re_detail::syntax_element_literal:      // only the first character of the literal can match:      // note these have already been translated:      if(*reinterpret_cast<charT*>(static_cast<re_detail::re_literal*>(node)+1) == traits_inst.translate(cc, (_flags & regbase::icase)))         return true;      return false;   case re_detail::syntax_element_end_line:      // next character (if there is one!) must be a newline:      if(traits_inst.is_separator(traits_inst.translate(cc, (_flags & regbase::icase))))         return true;      return false;   case re_detail::syntax_element_wild:      return true;   case re_detail::syntax_element_match:      return true;   case re_detail::syntax_element_within_word:   case re_detail::syntax_element_word_start:      return traits_inst.is_class(traits_inst.translate(cc, (_flags & regbase::icase)), traits_type::char_class_word);   case re_detail::syntax_element_word_end:      // what follows must not be a word character,      return traits_inst.is_class(traits_inst.translate(cc, (_flags & regbase::icase)), traits_type::char_class_word) ? false : true;   case re_detail::syntax_element_buffer_end:      // we can be null, nothing must follow,      // NB we assume that this is followed by      // re_detail::syntax_element_match, if its not then we can      // never match anything anyway!!      return false;   case re_detail::syntax_element_soft_buffer_end:      // we can be null, only newlines must follow,      // NB we assume that this is followed by      // re_detail::syntax_element_match, if its not then we can      // never match anything anyway!!      return traits_inst.is_separator(traits_inst.translate(cc, (_flags & regbase::icase)));   case re_detail::syntax_element_backref:      // there's no easy way to determine this      // which is not to say it can't be done!      // for now:      return true;   case re_detail::syntax_element_long_set:      // we can not be null,      // we need to add already translated values in the set      // to values in the _map      return re_detail::re_maybe_set_member(cc, static_cast<const re_detail::re_set_long*>(node), *this) || (re_detail::re_is_set_member(static_cast<const charT*>(&cc), static_cast<const charT*>(&cc+1), static_cast<re_detail::re_set_long*>(node), *this) != &cc);   case re_detail::syntax_element_set:      // set all the elements that are set in corresponding set:      c = (traits_size_type)(traits_uchar_type)traits_inst.translate(cc, (_flags & regbase::icase));      return static_cast<re_detail::re_set*>(node)->_map[c] != 0;   case re_detail::syntax_element_jump:      if(static_cast<re_detail::re_jump*>(node)->alt.p < node)      {         // backwards jump,         // caused only by end of repeat section, we'll treat this         // the same as a match, because the sub-expression has matched.         if(node->next.p == terminal)            return true; // null repeat - we can always take this         else         {            //            // take the jump, add in fix for the fact that if the            // repeat that we're jumping to has non-zero minimum count            // then we need to add in the possiblity that we could still            // skip that repeat.            re_detail::re_syntax_base* next = static_cast<re_detail::re_jump*>(node)->alt.p;            bool b = probe_start(next, cc, terminal);            if((next->type == re_detail::syntax_element_rep) && (static_cast<re_detail::re_repeat*>(next)->min != 0))            {               b = b || probe_start(static_cast<re_detail::re_jump*>(next)->alt.p, cc, terminal);            }            return b;         }      }      else         // take the jump and compile:         return probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal);   case re_detail::syntax_element_alt:      // we need to take the OR of the two alternatives:      return probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal) || probe_start(node->next.p, cc, terminal);   case re_detail::syntax_element_rep:      // we need to take the OR of the two alternatives      if(static_cast<re_detail::re_repeat*>(node)->min == 0)         return probe_start(node->next.p, cc, static_cast<re_detail::re_jump*>(node)->alt.p) || probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal);      else         return probe_start(node->next.p, cc, static_cast<re_detail::re_jump*>(node)->alt.p);   case re_detail::syntax_element_combining:      return !traits_inst.is_combining(traits_inst.translate(cc, (_flags & regbase::icase)));   }   return false;}template <class charT, class traits, class Allocator>bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start_null(re_detail::re_syntax_base* node, re_detail::re_syntax_base* terminal)const{   switch(node->type)   {   case re_detail::syntax_element_startmark:   case re_detail::syntax_element_endmark:   case re_detail::syntax_element_start_line:   case re_detail::syntax_element_word_boundary:   case re_detail::syntax_element_buffer_start:   case re_detail::syntax_element_restart_continue:   case re_detail::syntax_element_end_line:   case re_detail::syntax_element_word_end:      // doesn't tell us anything about the next character, so:      return probe_start_null(node->next.p, terminal);   case re_detail::syntax_element_match:   case re_detail::syntax_element_buffer_end:   case re_detail::syntax_element_soft_buffer_end:   case re_detail::syntax_element_backref:      return true;   case re_detail::syntax_element_jump:      if(static_cast<re_detail::re_jump*>(node)->alt.p < node)      {         // backwards jump,         // caused only by end of repeat section, we'll treat this         // the same as a match, because the sub-expression has matched.         // this is only caused by NULL repeats as in "(a*)*" or "(\<)*"         // these are really nonsensence and make the matching code much         // harder, it would be nice to get rid of them altogether.         if(node->next.p == terminal)            return true;         else            return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);      }      else         // take the jump and compile:         return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);   case re_detail::syntax_element_alt:      // we need to take the OR of the two alternatives:      return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal) || probe_start_null(node->next.p, terminal);   case re_detail::syntax_element_rep:      // only need to consider skipping the repeat:      return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);   default:      break;   }   return false;}template <class charT, class traits, class Allocator>void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_map(                        re_detail::re_syntax_base* node, unsigned char* _map,                        unsigned int* pnull, unsigned char mask, re_detail::re_syntax_base* terminal)const{   if(_map)   {      for(unsigned int i = 0; i < 256; ++i)      {         if(probe_start(node, (charT)i, terminal))            _map[i] |= mask;      }   }   if(pnull && probe_start_null(node, terminal))      *pnull |= mask;}  template <class charT, class traits, class Allocator>void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::move_offsets(re_detail::re_syntax_base* j, unsigned size){# ifdef BOOST_MSVC#  pragma warning(push)#  pragma warning(disable: 4127)#endif   // move all offsets starting with j->link forward by size   // called after an insert:   j = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + j->next.i);   while(true)   {      switch(j->type)      {      case re_detail::syntax_element_rep:         static_cast<re_detail::re_jump*>(j)->alt.i += size;         j->next.i += size;         break;      case re_detail::syntax_element_jump:      case re_detail::syntax_element_alt:         static_cast<re_detail::re_jump*>(j)->alt.i += size;         j->next.i += size;         break;      default:         j->next.i += size;         break;      }      if(j->next.i == size)         break;      j = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + j->next.i);   }# ifdef BOOST_MSVC#  pragma warning(pop)#endif}template <class charT, class traits, class Allocator>re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set_simple(re_detail::re_syntax_base* dat, unsigned long cls, bool isnot){   typedef typename re_detail::is_byte<charT>::width_type width_type;   re_detail::jstack<traits_string_type, Allocator> singles(64, data.allocator());   re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator());   re_detail::jstack<boost::uint_fast32_t, Allocator> classes(64, data.allocator());   re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator());   classes.push(cls);   if(dat)   {      data.align();      dat->next.i = data.size();   }   return compile_set_aux(singles, ranges, classes, equivalents, isnot, width_type());}template <class charT, class traits, class Allocator>re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set(const charT*& first, const charT* last){   re_detail::jstack<traits_string_type, Allocator> singles(64, data.allocator());   re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator());   re_detail::jstack<boost::uint_fast32_t, Allocator> classes(64, data.allocator());   re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator());   bool has_digraphs = false;   jm_assert(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_open_set);   ++first;   bool started = false;   bool done = false;   bool isnot = false;   enum last_type   {      last_single,      last_none,      last_dash   };   unsigned l = last_none;   traits_string_type s;   while((first != last) && !done)   {      traits_size_type c = (traits_size_type)(traits_uchar_type)*first;      // this is only used for the switch(), but cannot be folded in      // due to a bug in Comeau 4.2.44beta3      traits_size_type syntax = traits_inst.syntax_type(c);      switch(syntax)      {      case traits_type::syntax_caret:         if(!started && !isnot)         {            isnot = true;         }         else         {            s = (charT)c;            goto char_set_literal;         }         break;      case traits_type::syntax_open_set:      {         if((_flags & char_classes) == 0)         {            s = (charT)c;            goto char_set_literal;         }         // check to see if we really have a class:         const charT* base = first;         // this is only used for the switch(), but cannot be folded in         // due to a bug in Comeau 4.2.44beta3    unsigned int inner_set = parse_inner_set(first, last);         switch(inner_set)         {         case traits_type::syntax_colon:            {               if(l == last_dash)               {                  fail(REG_ERANGE);                  return 0;               }               boost::uint_fast32_t id = traits_inst.lookup_classname(base+2, first-2);               if(_flags & regbase::icase)               {                  if((id == traits_type::char_class_upper) || (id == traits_type::char_class_lower))                  {                     id = traits_type::char_class_alpha;                  }               }               if(id == 0)               {                  fail(REG_ECTYPE);                  return 0;               }               classes.push(id);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?