basic_regex_creator.hpp
来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 1,333 行 · 第 1/3 页
HPP
1,333 行
return -1; case syntax_element_long_set: if(static_cast<re_set_long<mask_type>*>(state)->singleton == 0) return -1; result += 1; break; case syntax_element_jump: state = static_cast<re_jump*>(state)->alt.p; continue; default: break; } state = state->next.p; } return -1;}template <class charT, class traits>void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask){ int not_last_jump = 1; // track case sensitivity: bool l_icase = m_icase; while(state) { switch(state->type) { case syntax_element_toggle_case: l_icase = static_cast<re_case*>(state)->icase; state = state->next.p; break; case syntax_element_literal: { // don't set anything in *pnull, set each element in l_map // that could match the first character in the literal: if(l_map) { l_map[0] |= mask_init; charT first_char = *static_cast<charT*>(static_cast<void*>(static_cast<re_literal*>(state) + 1)); for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) { if(m_traits.translate(static_cast<charT>(i), l_icase) == first_char) l_map[i] |= mask; } } return; } case syntax_element_end_line: { // next character must be a line separator (if there is one): if(l_map) { l_map[0] |= mask_init; l_map['\n'] |= mask; l_map['\r'] |= mask; l_map['\f'] |= mask; l_map[0x85] |= mask; } // now figure out if we can match a NULL string at this point: if(pnull) create_startmap(state->next.p, 0, pnull, mask); return; } case syntax_element_backref: // can be null, and any character can match: if(pnull) *pnull |= mask; // fall through: case syntax_element_wild: { // can't be null, any character can match: set_all_masks(l_map, mask); return; } case syntax_element_match: { // must be null, any character can match: set_all_masks(l_map, mask); if(pnull) *pnull |= mask; return; } case syntax_element_word_start: { // recurse, then AND with all the word characters: create_startmap(state->next.p, l_map, pnull, mask); if(l_map) { l_map[0] |= mask_init; for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) { if(!m_traits.isctype(static_cast<charT>(i), m_word_mask)) l_map[i] &= static_cast<unsigned char>(~mask); } } return; } case syntax_element_word_end: { // recurse, then AND with all the word characters: create_startmap(state->next.p, l_map, pnull, mask); if(l_map) { l_map[0] |= mask_init; for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) { if(m_traits.isctype(static_cast<charT>(i), m_word_mask)) l_map[i] &= static_cast<unsigned char>(~mask); } } return; } case syntax_element_buffer_end: { // we *must be null* : if(pnull) *pnull |= mask; return; } case syntax_element_long_set: if(l_map) { typedef typename traits::char_class_type mask_type; if(static_cast<re_set_long<mask_type>*>(state)->singleton) { l_map[0] |= mask_init; for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) { charT c = static_cast<charT>(i); if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<mask_type>*>(state), *m_pdata, m_icase)) l_map[i] |= mask; } } else set_all_masks(l_map, mask); } return; case syntax_element_set: if(l_map) { l_map[0] |= mask_init; for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) { if(static_cast<re_set*>(state)->_map[ static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), l_icase))]) l_map[i] |= mask; } } return; case syntax_element_jump: // take the jump: state = static_cast<re_alt*>(state)->alt.p; not_last_jump = -1; break; case syntax_element_alt: case syntax_element_rep: case syntax_element_dot_rep: case syntax_element_char_rep: case syntax_element_short_set_rep: case syntax_element_long_set_rep: { re_alt* rep = static_cast<re_alt*>(state); if(rep->_map[0] & mask_init) { if(l_map) { // copy previous results: l_map[0] |= mask_init; for(unsigned int i = 0; i <= UCHAR_MAX; ++i) { if(rep->_map[i] & mask_any) l_map[i] |= mask; } } if(pnull) { if(rep->can_be_null & mask_any) *pnull |= mask; } } else { // we haven't created a startmap for this alternative yet // so take the union of the two options: if(is_bad_repeat(state)) { set_all_masks(l_map, mask); if(pnull) *pnull |= mask; return; } set_bad_repeat(state); create_startmap(state->next.p, l_map, pnull, mask); if((state->type == syntax_element_alt) || (static_cast<re_repeat*>(state)->min == 0) || (not_last_jump == 0)) create_startmap(rep->alt.p, l_map, pnull, mask); } } return; case syntax_element_soft_buffer_end: // match newline or null: if(l_map) { l_map[0] |= mask_init; l_map['\n'] |= mask; l_map['\r'] |= mask; } if(pnull) *pnull |= mask; return; case syntax_element_endmark: // need to handle independent subs as a special case: if(static_cast<re_brace*>(state)->index < 0) { // can be null, any character can match: set_all_masks(l_map, mask); if(pnull) *pnull |= mask; return; } else { state = state->next.p; break; } case syntax_element_startmark: // need to handle independent subs as a special case: if(static_cast<re_brace*>(state)->index == -3) { state = state->next.p->next.p; break; } // otherwise fall through: default: state = state->next.p; } ++not_last_jump; }}template <class charT, class traits>unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* state){ // // find out how the machine starts, so we can optimise the search: // while(state) { switch(state->type) { case syntax_element_startmark: case syntax_element_endmark: state = state->next.p; continue; case syntax_element_start_line: return regbase::restart_line; case syntax_element_word_start: return regbase::restart_word; case syntax_element_buffer_start: return regbase::restart_buf; case syntax_element_restart_continue: return regbase::restart_continue; default: state = 0; continue; } } return regbase::restart_any;}template <class charT, class traits>void basic_regex_creator<charT, traits>::set_all_masks(unsigned char* bits, unsigned char mask){ // // set mask in all of bits elements, // if bits[0] has mask_init not set then we can // optimise this to a call to memset: // if(bits) { if(bits[0] == 0) (std::memset)(bits, mask, 1u << CHAR_BIT); else { for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) bits[i] |= mask; } bits[0] |= mask_init; }}template <class charT, class traits>bool basic_regex_creator<charT, traits>::is_bad_repeat(re_syntax_base* pt){ switch(pt->type) { case syntax_element_rep: case syntax_element_dot_rep: case syntax_element_char_rep: case syntax_element_short_set_rep: case syntax_element_long_set_rep: { unsigned state_id = static_cast<re_repeat*>(pt)->state_id; if(state_id > sizeof(m_bad_repeats) * CHAR_BIT) return true; // run out of bits, assume we can't traverse this one. static const boost::uintmax_t one = 1uL; return m_bad_repeats & (one << state_id); } default: return false; }}template <class charT, class traits>void basic_regex_creator<charT, traits>::set_bad_repeat(re_syntax_base* pt){ switch(pt->type) { case syntax_element_rep: case syntax_element_dot_rep: case syntax_element_char_rep: case syntax_element_short_set_rep: case syntax_element_long_set_rep: { unsigned state_id = static_cast<re_repeat*>(pt)->state_id; static const boost::uintmax_t one = 1uL; if(state_id <= sizeof(m_bad_repeats) * CHAR_BIT) m_bad_repeats |= (one << state_id); } default: break; }}template <class charT, class traits>syntax_element_type basic_regex_creator<charT, traits>::get_repeat_type(re_syntax_base* state){ typedef typename traits::char_class_type mask_type; if(state->type == syntax_element_rep) { // check to see if we are repeating a single state: if(state->next.p->next.p->next.p == static_cast<re_alt*>(state)->alt.p) { switch(state->next.p->type) { case re_detail::syntax_element_wild: return re_detail::syntax_element_dot_rep; case re_detail::syntax_element_literal: return re_detail::syntax_element_char_rep; case re_detail::syntax_element_set: return re_detail::syntax_element_short_set_rep; case re_detail::syntax_element_long_set: if(static_cast<re_detail::re_set_long<mask_type>*>(state->next.p)->singleton) return re_detail::syntax_element_long_set_rep; break; default: break; } } } return state->type;}template <class charT, class traits>void basic_regex_creator<charT, traits>::probe_leading_repeat(re_syntax_base* state){ // enumerate our states, and see if we have a leading repeat // for which failed search restarts can be optimised; do { switch(state->type) { case syntax_element_startmark: if(static_cast<re_brace*>(state)->index >= 0) { state = state->next.p; continue; } if((static_cast<re_brace*>(state)->index == -1) || (static_cast<re_brace*>(state)->index == -2)) { // skip past the zero width assertion: state = static_cast<const re_jump*>(state->next.p)->alt.p->next.p; continue; } if(static_cast<re_brace*>(state)->index == -3) { // Have to skip the leading jump state: state = state->next.p->next.p; continue; } return; case syntax_element_endmark: case syntax_element_start_line: case syntax_element_end_line: case syntax_element_word_boundary: case syntax_element_within_word: case syntax_element_word_start: case syntax_element_word_end: case syntax_element_buffer_start: case syntax_element_buffer_end: case syntax_element_restart_continue: state = state->next.p; break; case syntax_element_dot_rep: case syntax_element_char_rep: case syntax_element_short_set_rep: case syntax_element_long_set_rep: if(this->m_has_backrefs == 0) static_cast<re_repeat*>(state)->leading = true; // fall through: default: return; } }while(state);}} // namespace re_detail} // namespace boost#ifdef BOOST_MSVC# pragma warning(pop)#endif#ifdef BOOST_MSVC#pragma warning(push)#pragma warning(disable: 4103)#endif#ifdef BOOST_HAS_ABI_HEADERS# include BOOST_ABI_SUFFIX#endif#ifdef BOOST_MSVC#pragma warning(pop)#endif#endif
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?