⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 basic_regex_creator.hpp

📁 C++的一个好库。。。现在很流行
💻 HPP
📖 第 1 页 / 共 3 页
字号:
               state = rep->alt.p;
               continue;
            }
            else if((state->type == syntax_element_long_set_rep)) 
            {
               BOOST_ASSERT(rep->next.p->type == syntax_element_long_set);
               if(static_cast<re_set_long<mask_type>*>(rep->next.p)->singleton == 0)
                  return -1;
               if(rep->max != rep->min)
                  return -1;
               result += static_cast<int>(rep->min);
               state = rep->alt.p;
               continue;
            }
         }
         return -1;
      case syntax_element_long_set:
         if(static_cast<re_set_long<mask_type>*>(state)->singleton == 0)
            return -1;
         result += 1;
         break;
      case syntax_element_jump:
         state = static_cast<re_jump*>(state)->alt.p;
         continue;
      default:
         break;
      }
      state = state->next.p;
   }
   return -1;
}

template <class charT, class traits>
void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask)
{
   int not_last_jump = 1;

   // track case sensitivity:
   bool l_icase = m_icase;

   while(state)
   {
      switch(state->type)
      {
      case syntax_element_toggle_case:
         l_icase = static_cast<re_case*>(state)->icase;
         state = state->next.p;
         break;
      case syntax_element_literal:
      {
         // don't set anything in *pnull, set each element in l_map
         // that could match the first character in the literal:
         if(l_map)
         {
            l_map[0] |= mask_init;
            charT first_char = *static_cast<charT*>(static_cast<void*>(static_cast<re_literal*>(state) + 1));
            for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
            {
               if(m_traits.translate(static_cast<charT>(i), l_icase) == first_char)
                  l_map[i] |= mask;
            }
         }
         return;
      }
      case syntax_element_end_line:
      {
         // next character must be a line separator (if there is one):
         if(l_map)
         {
            l_map[0] |= mask_init;
            l_map['\n'] |= mask;
            l_map['\r'] |= mask;
            l_map['\f'] |= mask;
            l_map[0x85] |= mask;
         }
         // now figure out if we can match a NULL string at this point:
         if(pnull)
            create_startmap(state->next.p, 0, pnull, mask);
         return;
      }
      case syntax_element_backref:
         // can be null, and any character can match:
         if(pnull)
            *pnull |= mask;
         // fall through:
      case syntax_element_wild:
      {
         // can't be null, any character can match:
         set_all_masks(l_map, mask);
         return;
      }
      case syntax_element_match:
      {
         // must be null, any character can match:
         set_all_masks(l_map, mask);
         if(pnull)
            *pnull |= mask;
         return;
      }
      case syntax_element_word_start:
      {
         // recurse, then AND with all the word characters:
         create_startmap(state->next.p, l_map, pnull, mask);
         if(l_map)
         {
            l_map[0] |= mask_init;
            for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
            {
               if(!m_traits.isctype(static_cast<charT>(i), m_word_mask))
                  l_map[i] &= static_cast<unsigned char>(~mask);
            }
         }
         return;
      }
      case syntax_element_word_end:
      {
         // recurse, then AND with all the word characters:
         create_startmap(state->next.p, l_map, pnull, mask);
         if(l_map)
         {
            l_map[0] |= mask_init;
            for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
            {
               if(m_traits.isctype(static_cast<charT>(i), m_word_mask))
                  l_map[i] &= static_cast<unsigned char>(~mask);
            }
         }
         return;
      }
      case syntax_element_buffer_end:
      {
         // we *must be null* :
         if(pnull)
            *pnull |= mask;
         return;
      }
      case syntax_element_long_set:
         if(l_map)
         {
            typedef typename traits::char_class_type mask_type;
            if(static_cast<re_set_long<mask_type>*>(state)->singleton)
            {
               l_map[0] |= mask_init;
               for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
               {
                  charT c = static_cast<charT>(i);
                  if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<mask_type>*>(state), *m_pdata, m_icase))
                     l_map[i] |= mask;
               }
            }
            else
               set_all_masks(l_map, mask);
         }
         return;
      case syntax_element_set:
         if(l_map)
         {
            l_map[0] |= mask_init;
            for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
            {
               if(static_cast<re_set*>(state)->_map[
                  static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), l_icase))])
                  l_map[i] |= mask;
            }
         }
         return;
      case syntax_element_jump:
         // take the jump:
         state = static_cast<re_alt*>(state)->alt.p;
         not_last_jump = -1;
         break;
      case syntax_element_alt:
      case syntax_element_rep:
      case syntax_element_dot_rep:
      case syntax_element_char_rep:
      case syntax_element_short_set_rep:
      case syntax_element_long_set_rep:
         {
            re_alt* rep = static_cast<re_alt*>(state);
            if(rep->_map[0] & mask_init)
            {
               if(l_map)
               {
                  // copy previous results:
                  l_map[0] |= mask_init;
                  for(unsigned int i = 0; i <= UCHAR_MAX; ++i)
                  {
                     if(rep->_map[i] & mask_any)
                        l_map[i] |= mask;
                  }
               }
               if(pnull)
               {
                  if(rep->can_be_null & mask_any)
                     *pnull |= mask;
               }
            }
            else
            {
               // we haven't created a startmap for this alternative yet
               // so take the union of the two options:
               if(is_bad_repeat(state))
               {
                  set_all_masks(l_map, mask);
                  return;
               }
               set_bad_repeat(state);
               create_startmap(state->next.p, l_map, pnull, mask);
               if((state->type == syntax_element_alt)
                  || (static_cast<re_repeat*>(state)->min == 0)
                  || (not_last_jump == 0))
                  create_startmap(rep->alt.p, l_map, pnull, mask);
            }
         }
         return;
      case syntax_element_soft_buffer_end:
         // match newline or null:
         if(l_map)
         {
            l_map[0] |= mask_init;
            l_map['\n'] |= mask;
            l_map['\r'] |= mask;
         }
         if(pnull)
            *pnull |= mask;
         return;
      case syntax_element_endmark:
         // need to handle independent subs as a special case:
         if(static_cast<re_brace*>(state)->index < 0)
         {
            // can be null, any character can match:
            set_all_masks(l_map, mask);
            if(pnull)
               *pnull |= mask;
            return;
         }
         else
         {
            state = state->next.p;
            break;
         }

      case syntax_element_startmark:
         // need to handle independent subs as a special case:
         if(static_cast<re_brace*>(state)->index == -3)
         {
            state = state->next.p->next.p;
            break;
         }
         // otherwise fall through:
      default:
         state = state->next.p;
      }
      ++not_last_jump;
   }
}

template <class charT, class traits>
unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* state)
{
   //
   // find out how the machine starts, so we can optimise the search:
   //
   while(state)
   {
      switch(state->type)
      {
      case syntax_element_startmark:
      case syntax_element_endmark:
         state = state->next.p;
         continue;
      case syntax_element_start_line:
         return regbase::restart_line;
      case syntax_element_word_start:
         return regbase::restart_word;
      case syntax_element_buffer_start:
         return regbase::restart_buf;
      case syntax_element_restart_continue:
         return regbase::restart_continue;
      default:
         state = 0;
         continue;
      }
   }
   return regbase::restart_any;
}

template <class charT, class traits>
void basic_regex_creator<charT, traits>::set_all_masks(unsigned char* bits, unsigned char mask)
{
   //
   // set mask in all of bits elements, 
   // if bits[0] has mask_init not set then we can 
   // optimise this to a call to memset:
   //
   if(bits)
   {
      if(bits[0] == 0)
         (std::memset)(bits, mask, 1u << CHAR_BIT);
      else
      {
         for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
            bits[i] |= mask;
      }
      bits[0] |= mask_init;
   }
}

template <class charT, class traits>
bool basic_regex_creator<charT, traits>::is_bad_repeat(re_syntax_base* pt)
{
   switch(pt->type)
   {
   case syntax_element_rep:
   case syntax_element_dot_rep:
   case syntax_element_char_rep:
   case syntax_element_short_set_rep:
   case syntax_element_long_set_rep:
      {
         unsigned id = static_cast<re_repeat*>(pt)->id;
         if(id > sizeof(m_bad_repeats) * CHAR_BIT)
            return true;  // run out of bits, assume we can't traverse this one.
         return m_bad_repeats & static_cast<boost::uintmax_t>(1uL << id);
      }
   default:
      return false;
   }
}

template <class charT, class traits>
void basic_regex_creator<charT, traits>::set_bad_repeat(re_syntax_base* pt)
{
   switch(pt->type)
   {
   case syntax_element_rep:
   case syntax_element_dot_rep:
   case syntax_element_char_rep:
   case syntax_element_short_set_rep:
   case syntax_element_long_set_rep:
      {
         unsigned id = static_cast<re_repeat*>(pt)->id;
         if(id <= sizeof(m_bad_repeats) * CHAR_BIT)
            m_bad_repeats |= static_cast<boost::uintmax_t>(1uL << id);
      }
   default:
      break;
   }
}

template <class charT, class traits>
syntax_element_type basic_regex_creator<charT, traits>::get_repeat_type(re_syntax_base* state)
{
   typedef typename traits::char_class_type mask_type;
   if(state->type == syntax_element_rep)
   {
      // check to see if we are repeating a single state:
      if(state->next.p->next.p->next.p == static_cast<re_alt*>(state)->alt.p)
      {
         switch(state->next.p->type)
         {
         case re_detail::syntax_element_wild:
            return re_detail::syntax_element_dot_rep;
         case re_detail::syntax_element_literal:
            return re_detail::syntax_element_char_rep;
         case re_detail::syntax_element_set:
            return re_detail::syntax_element_short_set_rep;
         case re_detail::syntax_element_long_set:
            if(static_cast<re_detail::re_set_long<mask_type>*>(state->next.p)->singleton)
               return re_detail::syntax_element_long_set_rep;
            break;
         default:
            break;
         }
      }
   }
   return state->type;
}

template <class charT, class traits>
void basic_regex_creator<charT, traits>::probe_leading_repeat(re_syntax_base* state)
{
   // enumerate our states, and see if we have a leading repeat 
   // for which failed search restarts can be optimised;
   do
   {
      switch(state->type)
      {
      case syntax_element_startmark:
         if(static_cast<re_brace*>(state)->index >= 0)
         {
            state = state->next.p;
            continue;
         }
         return;
      case syntax_element_endmark:
      case syntax_element_start_line:
      case syntax_element_end_line:
      case syntax_element_word_boundary:
      case syntax_element_within_word:
      case syntax_element_word_start:
      case syntax_element_word_end:
      case syntax_element_buffer_start:
      case syntax_element_buffer_end:
      case syntax_element_restart_continue:
         state = state->next.p;
         break;
      case syntax_element_dot_rep:
      case syntax_element_char_rep:
      case syntax_element_short_set_rep:
      case syntax_element_long_set_rep:
         if(this->m_has_backrefs == 0)
            static_cast<re_repeat*>(state)->leading = true;
         // fall through:
      default:
         return;
      }
   }while(state);
}


} // namespace re_detail

} // namespace boost

#ifdef BOOST_HAS_ABI_HEADERS
#  include BOOST_ABI_SUFFIX
#endif

#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -