basic_regex_parser.hpp
来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 2,075 行 · 第 1/5 页
HPP
2,075 行
} BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark); ++m_position; // // append closing parenthesis state: // pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace))); pb->index = markid; this->m_paren_start = last_paren_start; // // restore the alternate insertion point: // this->m_alt_insert_point = last_alt_point; // // allow backrefs to this mark: // if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT)) this->m_backrefs |= 1u << (markid - 1); return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_basic_escape(){ ++m_position; bool result = true; switch(this->m_traits.escape_syntax_type(*m_position)) { case regex_constants::syntax_open_mark: return parse_open_paren(); case regex_constants::syntax_close_mark: return false; case regex_constants::syntax_plus: if(this->flags() & regex_constants::bk_plus_qm) { ++m_position; return parse_repeat(1); } else return parse_literal(); case regex_constants::syntax_question: if(this->flags() & regex_constants::bk_plus_qm) { ++m_position; return parse_repeat(0, 1); } else return parse_literal(); case regex_constants::syntax_open_brace: if(this->flags() & regbase::no_intervals) return parse_literal(); ++m_position; return parse_repeat_range(true); case regex_constants::syntax_close_brace: if(this->flags() & regbase::no_intervals) return parse_literal(); fail(regex_constants::error_brace, this->m_position - this->m_base); return false; case regex_constants::syntax_or: if(this->flags() & regbase::bk_vbar) return parse_alt(); else result = parse_literal(); break; case regex_constants::syntax_digit: return parse_backref(); case regex_constants::escape_type_start_buffer: if(this->flags() & regbase::emacs_ex) { ++m_position; this->append_state(syntax_element_buffer_start); } else result = parse_literal(); break; case regex_constants::escape_type_end_buffer: if(this->flags() & regbase::emacs_ex) { ++m_position; this->append_state(syntax_element_buffer_end); } else result = parse_literal(); break; case regex_constants::escape_type_word_assert: if(this->flags() & regbase::emacs_ex) { ++m_position; this->append_state(syntax_element_word_boundary); } else result = parse_literal(); break; case regex_constants::escape_type_not_word_assert: if(this->flags() & regbase::emacs_ex) { ++m_position; this->append_state(syntax_element_within_word); } else result = parse_literal(); break; case regex_constants::escape_type_left_word: if(this->flags() & regbase::emacs_ex) { ++m_position; this->append_state(syntax_element_word_start); } else result = parse_literal(); break; case regex_constants::escape_type_right_word: if(this->flags() & regbase::emacs_ex) { ++m_position; this->append_state(syntax_element_word_end); } else result = parse_literal(); break; default: if(this->flags() & regbase::emacs_ex) { bool negate = true; switch(*m_position) { case 'w': negate = false; // fall through: case 'W': { basic_char_set<charT, traits> char_set; if(negate) char_set.negate(); char_set.add_class(this->m_word_mask); if(0 == this->append_set(char_set)) { fail(regex_constants::error_ctype, m_position - m_base); return false; } ++m_position; return true; } case 's': negate = false; // fall through: case 'S': return add_emacs_code(negate); case 'c': case 'C': // not supported yet: fail(regex_constants::error_escape, m_position - m_base); return false; default: break; } } result = parse_literal(); break; } return result;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_extended_escape(){ ++m_position; bool negate = false; // in case this is a character class escape: \w \d etc switch(this->m_traits.escape_syntax_type(*m_position)) { case regex_constants::escape_type_not_class: negate = true; // fall through: case regex_constants::escape_type_class: { typedef typename traits::char_class_type mask_type; mask_type m = this->m_traits.lookup_classname(m_position, m_position+1); if(m != 0) { basic_char_set<charT, traits> char_set; if(negate) char_set.negate(); char_set.add_class(m); if(0 == this->append_set(char_set)) { fail(regex_constants::error_ctype, m_position - m_base); return false; } ++m_position; return true; } // // not a class, just a regular unknown escape: // this->append_literal(unescape_character()); break; } case regex_constants::syntax_digit: return parse_backref(); case regex_constants::escape_type_left_word: ++m_position; this->append_state(syntax_element_word_start); break; case regex_constants::escape_type_right_word: ++m_position; this->append_state(syntax_element_word_end); break; case regex_constants::escape_type_start_buffer: ++m_position; this->append_state(syntax_element_buffer_start); break; case regex_constants::escape_type_end_buffer: ++m_position; this->append_state(syntax_element_buffer_end); break; case regex_constants::escape_type_word_assert: ++m_position; this->append_state(syntax_element_word_boundary); break; case regex_constants::escape_type_not_word_assert: ++m_position; this->append_state(syntax_element_within_word); break; case regex_constants::escape_type_Z: ++m_position; this->append_state(syntax_element_soft_buffer_end); break; case regex_constants::escape_type_Q: return parse_QE(); case regex_constants::escape_type_C: return parse_match_any(); case regex_constants::escape_type_X: ++m_position; this->append_state(syntax_element_combining); break; case regex_constants::escape_type_G: ++m_position; this->append_state(syntax_element_restart_continue); break; case regex_constants::escape_type_not_property: negate = true; // fall through: case regex_constants::escape_type_property: { ++m_position; char_class_type m; if(m_position == m_end) { fail(regex_constants::error_escape, m_position - m_base); return false; } // maybe have \p{ddd} if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) { const charT* base = m_position; // skip forward until we find enclosing brace: while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) ++m_position; if(m_position == m_end) { fail(regex_constants::error_escape, m_position - m_base); return false; } m = this->m_traits.lookup_classname(++base, m_position++); } else { m = this->m_traits.lookup_classname(m_position, m_position+1); ++m_position; } if(m != 0) { basic_char_set<charT, traits> char_set; if(negate) char_set.negate(); char_set.add_class(m); if(0 == this->append_set(char_set)) { fail(regex_constants::error_ctype, m_position - m_base); return false; } return true; } fail(regex_constants::error_ctype, m_position - m_base); } default: this->append_literal(unescape_character()); break; } return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_match_any(){ // // we have a '.' that can match any character: // ++m_position; static_cast<re_dot*>( this->append_state(syntax_element_wild, sizeof(re_dot)) )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s ? re_detail::force_not_newline : this->flags() & regbase::mod_s ? re_detail::force_newline : re_detail::dont_care); return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high){ bool greedy = true; std::size_t insert_point; // // when we get to here we may have a non-greedy ? mark still to come: // if((m_position != m_end) && ( (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex))) ) ) { // OK we have a perl regex, check for a '?': if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question) { greedy = false; ++m_position; } } if(0 == this->m_last_state) { fail(regex_constants::error_badrepeat, ::boost::re_detail::distance(m_base, m_position)); return false; } if(this->m_last_state->type == syntax_element_endmark) { // insert a repeat before the '(' matching the last ')': insert_point = this->m_paren_start; } else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1)) { // the last state was a literal with more than one character, split it in two: re_literal* lit = static_cast<re_literal*>(this->m_last_state); charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1]; --(lit->length); // now append new state: lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT))); lit->length = 1; (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c; insert_point = this->getoffset(this->m_last_state); } else { // repeat the last state whatever it was, need to add some error checking here: switch(this->m_last_state->type) { case syntax_element_start_line: case syntax_element_end_line: case syntax_element_word_boundary: case syntax_element_within_word: case syntax_element_word_start: case syntax_element_word_end: case syntax_element_buffer_start: case syntax_element_buffer_end: case syntax_element_alt: case syntax_element_soft_buffer_end: case syntax_element_restart_continue: case syntax_element_jump: case syntax_element_startmark: case syntax_element_backstep: // can't legally repeat any of the above: fail(regex_constants::error_badrepeat, m_position - m_base); return false; default: // do nothing... break; } insert_point = this->getoffset(this->m_last_state); } // // OK we now know what to repeat, so insert the repeat around it: // re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size)); rep->min = low; rep->max = high; rep->greedy = greedy; rep->leading = false; // store our repeater position for later: std::ptrdiff_t rep_off = this->getoffset(rep); // and append a back jump to the repeat: re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump))); jmp->alt.i = rep_off - this->getoffset(jmp); this->m_pdata->m_data.align(); // now fill in the alt jump for the repeat: rep = static_cast<re_repeat*>(this->getaddress(rep_off)); rep->alt.i = this->m_pdata->m_data.size() - rep_off; return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic){ // // parse a repeat-range: // std::size_t min, max; int v; // skip whitespace: while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) ++m_position; // fail if at end: if(this->m_position == this->m_end) {
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?