basic_regex_creator.hpp

来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 1,333 行 · 第 1/3 页

HPP
1,333
字号
         return -1;      case syntax_element_long_set:         if(static_cast<re_set_long<mask_type>*>(state)->singleton == 0)            return -1;         result += 1;         break;      case syntax_element_jump:         state = static_cast<re_jump*>(state)->alt.p;         continue;      default:         break;      }      state = state->next.p;   }   return -1;}template <class charT, class traits>void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask){   int not_last_jump = 1;   // track case sensitivity:   bool l_icase = m_icase;   while(state)   {      switch(state->type)      {      case syntax_element_toggle_case:         l_icase = static_cast<re_case*>(state)->icase;         state = state->next.p;         break;      case syntax_element_literal:      {         // don't set anything in *pnull, set each element in l_map         // that could match the first character in the literal:         if(l_map)         {            l_map[0] |= mask_init;            charT first_char = *static_cast<charT*>(static_cast<void*>(static_cast<re_literal*>(state) + 1));            for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)            {               if(m_traits.translate(static_cast<charT>(i), l_icase) == first_char)                  l_map[i] |= mask;            }         }         return;      }      case syntax_element_end_line:      {         // next character must be a line separator (if there is one):         if(l_map)         {            l_map[0] |= mask_init;            l_map['\n'] |= mask;            l_map['\r'] |= mask;            l_map['\f'] |= mask;            l_map[0x85] |= mask;         }         // now figure out if we can match a NULL string at this point:         if(pnull)            create_startmap(state->next.p, 0, pnull, mask);         return;      }      case syntax_element_backref:         // can be null, and any character can match:         if(pnull)            *pnull |= mask;         // fall through:      case syntax_element_wild:      {         // can't be null, any character can match:         set_all_masks(l_map, mask);         return;      }      case syntax_element_match:      {         // must be null, any character can match:         set_all_masks(l_map, mask);         if(pnull)            *pnull |= mask;         return;      }      case syntax_element_word_start:      {         // recurse, then AND with all the word characters:         create_startmap(state->next.p, l_map, pnull, mask);         if(l_map)         {            l_map[0] |= mask_init;            for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)            {               if(!m_traits.isctype(static_cast<charT>(i), m_word_mask))                  l_map[i] &= static_cast<unsigned char>(~mask);            }         }         return;      }      case syntax_element_word_end:      {         // recurse, then AND with all the word characters:         create_startmap(state->next.p, l_map, pnull, mask);         if(l_map)         {            l_map[0] |= mask_init;            for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)            {               if(m_traits.isctype(static_cast<charT>(i), m_word_mask))                  l_map[i] &= static_cast<unsigned char>(~mask);            }         }         return;      }      case syntax_element_buffer_end:      {         // we *must be null* :         if(pnull)            *pnull |= mask;         return;      }      case syntax_element_long_set:         if(l_map)         {            typedef typename traits::char_class_type mask_type;            if(static_cast<re_set_long<mask_type>*>(state)->singleton)            {               l_map[0] |= mask_init;               for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)               {                  charT c = static_cast<charT>(i);                  if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<mask_type>*>(state), *m_pdata, m_icase))                     l_map[i] |= mask;               }            }            else               set_all_masks(l_map, mask);         }         return;      case syntax_element_set:         if(l_map)         {            l_map[0] |= mask_init;            for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)            {               if(static_cast<re_set*>(state)->_map[                  static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), l_icase))])                  l_map[i] |= mask;            }         }         return;      case syntax_element_jump:         // take the jump:         state = static_cast<re_alt*>(state)->alt.p;         not_last_jump = -1;         break;      case syntax_element_alt:      case syntax_element_rep:      case syntax_element_dot_rep:      case syntax_element_char_rep:      case syntax_element_short_set_rep:      case syntax_element_long_set_rep:         {            re_alt* rep = static_cast<re_alt*>(state);            if(rep->_map[0] & mask_init)            {               if(l_map)               {                  // copy previous results:                  l_map[0] |= mask_init;                  for(unsigned int i = 0; i <= UCHAR_MAX; ++i)                  {                     if(rep->_map[i] & mask_any)                        l_map[i] |= mask;                  }               }               if(pnull)               {                  if(rep->can_be_null & mask_any)                     *pnull |= mask;               }            }            else            {               // we haven't created a startmap for this alternative yet               // so take the union of the two options:               if(is_bad_repeat(state))               {                  set_all_masks(l_map, mask);                  if(pnull)                     *pnull |= mask;                  return;               }               set_bad_repeat(state);               create_startmap(state->next.p, l_map, pnull, mask);               if((state->type == syntax_element_alt)                  || (static_cast<re_repeat*>(state)->min == 0)                  || (not_last_jump == 0))                  create_startmap(rep->alt.p, l_map, pnull, mask);            }         }         return;      case syntax_element_soft_buffer_end:         // match newline or null:         if(l_map)         {            l_map[0] |= mask_init;            l_map['\n'] |= mask;            l_map['\r'] |= mask;         }         if(pnull)            *pnull |= mask;         return;      case syntax_element_endmark:         // need to handle independent subs as a special case:         if(static_cast<re_brace*>(state)->index < 0)         {            // can be null, any character can match:            set_all_masks(l_map, mask);            if(pnull)               *pnull |= mask;            return;         }         else         {            state = state->next.p;            break;         }      case syntax_element_startmark:         // need to handle independent subs as a special case:         if(static_cast<re_brace*>(state)->index == -3)         {            state = state->next.p->next.p;            break;         }         // otherwise fall through:      default:         state = state->next.p;      }      ++not_last_jump;   }}template <class charT, class traits>unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* state){   //   // find out how the machine starts, so we can optimise the search:   //   while(state)   {      switch(state->type)      {      case syntax_element_startmark:      case syntax_element_endmark:         state = state->next.p;         continue;      case syntax_element_start_line:         return regbase::restart_line;      case syntax_element_word_start:         return regbase::restart_word;      case syntax_element_buffer_start:         return regbase::restart_buf;      case syntax_element_restart_continue:         return regbase::restart_continue;      default:         state = 0;         continue;      }   }   return regbase::restart_any;}template <class charT, class traits>void basic_regex_creator<charT, traits>::set_all_masks(unsigned char* bits, unsigned char mask){   //   // set mask in all of bits elements,    // if bits[0] has mask_init not set then we can    // optimise this to a call to memset:   //   if(bits)   {      if(bits[0] == 0)         (std::memset)(bits, mask, 1u << CHAR_BIT);      else      {         for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)            bits[i] |= mask;      }      bits[0] |= mask_init;   }}template <class charT, class traits>bool basic_regex_creator<charT, traits>::is_bad_repeat(re_syntax_base* pt){   switch(pt->type)   {   case syntax_element_rep:   case syntax_element_dot_rep:   case syntax_element_char_rep:   case syntax_element_short_set_rep:   case syntax_element_long_set_rep:      {         unsigned state_id = static_cast<re_repeat*>(pt)->state_id;         if(state_id > sizeof(m_bad_repeats) * CHAR_BIT)            return true;  // run out of bits, assume we can't traverse this one.         static const boost::uintmax_t one = 1uL;         return m_bad_repeats & (one << state_id);      }   default:      return false;   }}template <class charT, class traits>void basic_regex_creator<charT, traits>::set_bad_repeat(re_syntax_base* pt){   switch(pt->type)   {   case syntax_element_rep:   case syntax_element_dot_rep:   case syntax_element_char_rep:   case syntax_element_short_set_rep:   case syntax_element_long_set_rep:      {         unsigned state_id = static_cast<re_repeat*>(pt)->state_id;         static const boost::uintmax_t one = 1uL;         if(state_id <= sizeof(m_bad_repeats) * CHAR_BIT)            m_bad_repeats |= (one << state_id);      }   default:      break;   }}template <class charT, class traits>syntax_element_type basic_regex_creator<charT, traits>::get_repeat_type(re_syntax_base* state){   typedef typename traits::char_class_type mask_type;   if(state->type == syntax_element_rep)   {      // check to see if we are repeating a single state:      if(state->next.p->next.p->next.p == static_cast<re_alt*>(state)->alt.p)      {         switch(state->next.p->type)         {         case re_detail::syntax_element_wild:            return re_detail::syntax_element_dot_rep;         case re_detail::syntax_element_literal:            return re_detail::syntax_element_char_rep;         case re_detail::syntax_element_set:            return re_detail::syntax_element_short_set_rep;         case re_detail::syntax_element_long_set:            if(static_cast<re_detail::re_set_long<mask_type>*>(state->next.p)->singleton)               return re_detail::syntax_element_long_set_rep;            break;         default:            break;         }      }   }   return state->type;}template <class charT, class traits>void basic_regex_creator<charT, traits>::probe_leading_repeat(re_syntax_base* state){   // enumerate our states, and see if we have a leading repeat    // for which failed search restarts can be optimised;   do   {      switch(state->type)      {      case syntax_element_startmark:         if(static_cast<re_brace*>(state)->index >= 0)         {            state = state->next.p;            continue;         }         if((static_cast<re_brace*>(state)->index == -1)            || (static_cast<re_brace*>(state)->index == -2))         {            // skip past the zero width assertion:            state = static_cast<const re_jump*>(state->next.p)->alt.p->next.p;            continue;         }         if(static_cast<re_brace*>(state)->index == -3)         {            // Have to skip the leading jump state:            state = state->next.p->next.p;            continue;         }         return;      case syntax_element_endmark:      case syntax_element_start_line:      case syntax_element_end_line:      case syntax_element_word_boundary:      case syntax_element_within_word:      case syntax_element_word_start:      case syntax_element_word_end:      case syntax_element_buffer_start:      case syntax_element_buffer_end:      case syntax_element_restart_continue:         state = state->next.p;         break;      case syntax_element_dot_rep:      case syntax_element_char_rep:      case syntax_element_short_set_rep:      case syntax_element_long_set_rep:         if(this->m_has_backrefs == 0)            static_cast<re_repeat*>(state)->leading = true;         // fall through:      default:         return;      }   }while(state);}} // namespace re_detail} // namespace boost#ifdef BOOST_MSVC#  pragma warning(pop)#endif#ifdef BOOST_MSVC#pragma warning(push)#pragma warning(disable: 4103)#endif#ifdef BOOST_HAS_ABI_HEADERS#  include BOOST_ABI_SUFFIX#endif#ifdef BOOST_MSVC#pragma warning(pop)#endif#endif

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?