basic_regex_parser.hpp
来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 2,075 行 · 第 1/5 页
HPP
2,075 行
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set) { // trailing - : --m_position; return; } fail(regex_constants::error_range, m_position - m_base); return; } return; } --m_position; } char_set.add_single(start_range);}template <class charT, class traits>digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set){ digraph<charT> result; switch(this->m_traits.syntax_type(*m_position)) { case regex_constants::syntax_dash: if(!char_set.empty()) { // see if we are at the end of the set: if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) { fail(regex_constants::error_range, m_position - m_base); return result; } --m_position; } result.first = *m_position++; return result; case regex_constants::syntax_escape: // check to see if escapes are supported first: if(this->flags() & regex_constants::no_escape_in_lists) { result = *m_position++; break; } ++m_position; result = unescape_character(); break; case regex_constants::syntax_open_set: { if(m_end == ++m_position) { fail(regex_constants::error_collate, m_position - m_base); return result; } if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot) { --m_position; result.first = *m_position; ++m_position; return result; } if(m_end == ++m_position) { fail(regex_constants::error_collate, m_position - m_base); return result; } const charT* name_first = m_position; // skip at least one character, then find the matching ':]' if(m_end == ++m_position) { fail(regex_constants::error_collate, name_first - m_base); return result; } while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)) ++m_position; const charT* name_last = m_position; if(m_end == m_position) { fail(regex_constants::error_collate, name_first - m_base); return result; } if((m_end == ++m_position) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) { fail(regex_constants::error_collate, name_first - m_base); return result; } ++m_position; string_type s = this->m_traits.lookup_collatename(name_first, name_last); if(s.empty() || (s.size() > 2)) { fail(regex_constants::error_collate, name_first - m_base); return result; } result.first = s[0]; if(s.size() > 1) result.second = s[1]; else result.second = 0; return result; } default: result = *m_position++; } return result;}//// does a value fit in the specified charT type?//template <class charT>bool valid_value(charT, int v, const mpl::true_&){ return (v >> (sizeof(charT) * CHAR_BIT)) == 0;}template <class charT>bool valid_value(charT, int, const mpl::false_&){ return true; // v will alsways fit in a charT}template <class charT>bool valid_value(charT c, int v){ return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>());}template <class charT, class traits>charT basic_regex_parser<charT, traits>::unescape_character(){#ifdef BOOST_MSVC#pragma warning(push)#pragma warning(disable:4127)#endif charT result(0); if(m_position == m_end) { fail(regex_constants::error_escape, m_position - m_base); return false; } switch(this->m_traits.escape_syntax_type(*m_position)) { case regex_constants::escape_type_control_a: result = charT('\a'); break; case regex_constants::escape_type_e: result = charT(27); break; case regex_constants::escape_type_control_f: result = charT('\f'); break; case regex_constants::escape_type_control_n: result = charT('\n'); break; case regex_constants::escape_type_control_r: result = charT('\r'); break; case regex_constants::escape_type_control_t: result = charT('\t'); break; case regex_constants::escape_type_control_v: result = charT('\v'); break; case regex_constants::escape_type_word_assert: result = charT('\b'); break; case regex_constants::escape_type_ascii_control: ++m_position; if(m_position == m_end) { fail(regex_constants::error_escape, m_position - m_base); return result; } /* if((*m_position < charT('@')) || (*m_position > charT(125)) ) { fail(regex_constants::error_escape, m_position - m_base); return result; } */ result = static_cast<charT>(*m_position % 32); break; case regex_constants::escape_type_hex: ++m_position; if(m_position == m_end) { fail(regex_constants::error_escape, m_position - m_base); return result; } // maybe have \x{ddd} if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) { ++m_position; if(m_position == m_end) { fail(regex_constants::error_escape, m_position - m_base); return result; } int i = this->m_traits.toi(m_position, m_end, 16); if((m_position == m_end) || (i < 0) || ((std::numeric_limits<charT>::is_specialized) && (charT(i) > (std::numeric_limits<charT>::max)())) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) { fail(regex_constants::error_badbrace, m_position - m_base); return result; } ++m_position; result = charT(i); } else { std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), m_end - m_position); int i = this->m_traits.toi(m_position, m_position + len, 16); if((i < 0) || !valid_value(charT(0), i)) { fail(regex_constants::error_escape, m_position - m_base); return result; } result = charT(i); } return result; case regex_constants::syntax_digit: { // an octal escape sequence, the first character must be a zero // followed by up to 3 octal digits: std::ptrdiff_t len = (std::min)(::boost::re_detail::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4)); const charT* bp = m_position; int val = this->m_traits.toi(bp, bp + 1, 8); if(val != 0) { // Oops not an octal escape after all: fail(regex_constants::error_escape, m_position - m_base); return result; } val = this->m_traits.toi(m_position, m_position + len, 8); if(val < 0) { fail(regex_constants::error_escape, m_position - m_base); return result; } return static_cast<charT>(val); } case regex_constants::escape_type_named_char: { ++m_position; if(m_position == m_end) { fail(regex_constants::error_escape, m_position - m_base); return false; } // maybe have \N{name} if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) { const charT* base = m_position; // skip forward until we find enclosing brace: while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) ++m_position; if(m_position == m_end) { fail(regex_constants::error_escape, m_position - m_base); return false; } string_type s = this->m_traits.lookup_collatename(++base, m_position++); if(s.empty()) { fail(regex_constants::error_collate, m_position - m_base); return false; } if(s.size() == 1) { return s[0]; } } // fall through is a failure: fail(regex_constants::error_escape, m_position - m_base); return false; } default: result = *m_position; break; } ++m_position; return result;#ifdef BOOST_MSVC#pragma warning(pop)#endif}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_backref(){ BOOST_ASSERT(m_position != m_end); const charT* pc = m_position; int i = this->m_traits.toi(pc, pc + 1, 10); if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs))) { // not a backref at all but an octal escape sequence: charT c = unescape_character(); this->append_literal(c); } else if((i > 0) && (this->m_backrefs & (1u << (i-1)))) { m_position = pc; re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace))); pb->index = i; } else { fail(regex_constants::error_backref, m_position - m_end); return false; } return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_QE(){#ifdef BOOST_MSVC#pragma warning(push)#pragma warning(disable:4127)#endif // // parse a \Q...\E sequence: // ++m_position; // skip the Q const charT* start = m_position; const charT* end; do { while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape)) ++m_position; if(m_position == m_end) { // a \Q...\E sequence may terminate with the end of the expression: end = m_position; break; } if(++m_position == m_end) // skip the escape { fail(regex_constants::error_escape, m_position - m_base); return false; } // check to see if it's a \E: if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E) { ++m_position; end = m_position - 2; break; } // otherwise go round again: }while(true); // // now add all the character between the two escapes as literals: // while(start != end) { this->append_literal(*start); ++start; } return true;#ifdef BOOST_MSVC#pragma warning(pop)#endif}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_perl_extension(){ if(++m_position == m_end) { fail(regex_constants::error_badrepeat, m_position - m_base); return false; } // // treat comments as a special case, as these // are the only ones that don't start with a leading // startmark state: // if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash) { while((m_position != m_end) && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) {} return true; } // // backup some state, and prepare the way: // int markid = 0; std::ptrdiff_t jump_offset = 0; re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace))); std::ptrdiff_t last_paren_start = this->getoffset(pb); // back up insertion point for alternations, and set new point: std::ptrdiff_t last_alt_point = m_alt_insert_point; this->m_pdata->m_data.align(); m_alt_insert_point = this->m_pdata->m_data.size(); std::ptrdiff_t expected_alt_point = m_alt_insert_point; bool restore_flags = true; regex_constants::syntax_option_type old_flags = this->flags(); bool old_case_change = m_has_case_change; m_has_case_change = false; // // select the actual extension used: // switch(this->m_traits.syntax_type(*m_position)) { case regex_constants::syntax_colon: // // a non-capturing mark: // pb->index = markid = 0; ++m_position; break;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?