basic_regex_parser.hpp
来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 2,075 行 · 第 1/5 页
HPP
2,075 行
case regex_constants::syntax_equal: pb->index = markid = -1; ++m_position; jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); this->m_pdata->m_data.align(); m_alt_insert_point = this->m_pdata->m_data.size(); break; case regex_constants::syntax_not: pb->index = markid = -2; ++m_position; jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); this->m_pdata->m_data.align(); m_alt_insert_point = this->m_pdata->m_data.size(); break; case regex_constants::escape_type_left_word: { // a lookbehind assertion: if(++m_position == m_end) { fail(regex_constants::error_badrepeat, m_position - m_base); return false; } regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position); if(t == regex_constants::syntax_not) pb->index = markid = -2; else if(t == regex_constants::syntax_equal) pb->index = markid = -1; else { fail(regex_constants::error_badrepeat, m_position - m_base); return false; } ++m_position; jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); this->append_state(syntax_element_backstep, sizeof(re_brace)); this->m_pdata->m_data.align(); m_alt_insert_point = this->m_pdata->m_data.size(); break; } case regex_constants::escape_type_right_word: // // an independent sub-expression: // pb->index = markid = -3; ++m_position; jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); this->m_pdata->m_data.align(); m_alt_insert_point = this->m_pdata->m_data.size(); break; case regex_constants::syntax_open_mark: { // a conditional expression: pb->index = markid = -4; if(++m_position == m_end) { fail(regex_constants::error_badrepeat, m_position - m_base); return false; } int v = this->m_traits.toi(m_position, m_end, 10); if(v > 0) { re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); br->index = v; if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) { fail(regex_constants::error_badrepeat, m_position - m_base); return false; } if(++m_position == m_end) { fail(regex_constants::error_badrepeat, m_position - m_base); return false; } } else { // verify that we have a lookahead or lookbehind assert: if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question) { fail(regex_constants::error_badrepeat, m_position - m_base); return false; } if(++m_position == m_end) { fail(regex_constants::error_badrepeat, m_position - m_base); return false; } if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word) { if(++m_position == m_end) { fail(regex_constants::error_badrepeat, m_position - m_base); return false; } if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not)) { fail(regex_constants::error_badrepeat, m_position - m_base); return false; } m_position -= 3; } else { if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not)) { fail(regex_constants::error_badrepeat, m_position - m_base); return false; } m_position -= 2; } } break; } case regex_constants::syntax_close_mark: fail(regex_constants::error_badrepeat, m_position - m_base); return false; default: // // lets assume that we have a (?imsx) group and try and parse it: // regex_constants::syntax_option_type opts = parse_options(); if(m_position == m_end) return false; // make a note of whether we have a case change: m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase)); pb->index = markid = 0; if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) { // update flags and carry on as normal: this->flags(opts); restore_flags = false; old_case_change |= m_has_case_change; // defer end of scope by one ')' } else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon) { // update flags and carry on until the matching ')' is found: this->flags(opts); ++m_position; } else { fail(regex_constants::error_badrepeat, m_position - m_base); return false; } // finally append a case change state if we need it: if(m_has_case_change) { static_cast<re_case*>( this->append_state(syntax_element_toggle_case, sizeof(re_case)) )->icase = opts & regbase::icase; } } // // now recursively add more states, this will terminate when we get to a // matching ')' : // parse_all(); // // Unwind alternatives: // if(0 == unwind_alts(last_paren_start)) return false; // // we either have a ')' or we have run out of characters prematurely: // if(m_position == m_end) { this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end)); return false; } BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark); ++m_position; // // restore the flags: // if(restore_flags) { // append a case change state if we need it: if(m_has_case_change) { static_cast<re_case*>( this->append_state(syntax_element_toggle_case, sizeof(re_case)) )->icase = old_flags & regbase::icase; } this->flags(old_flags); } // // set up the jump pointer if we have one: // if(jump_offset) { this->m_pdata->m_data.align(); re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset)); jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp); if(this->m_last_state == jmp) { // Oops... we didn't have anything inside the assertion: fail(regex_constants::error_empty, m_position - m_base); return false; } } // // verify that if this is conditional expression, that we do have // an alternative, if not add one: // if(markid == -4) { re_syntax_base* b = this->getaddress(expected_alt_point); // Make sure we have exactly one alternative following this state: if(b->type != syntax_element_alt) { re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt))); alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt); } else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt) { fail(regex_constants::error_bad_pattern, m_position - m_base); return false; } // check for invalid repetition of next state: b = this->getaddress(expected_alt_point); b = this->getaddress(static_cast<re_alt*>(b)->next.i, b); if((b->type != syntax_element_assert_backref) && (b->type != syntax_element_startmark)) { fail(regex_constants::error_badrepeat, m_position - m_base); return false; } } // // append closing parenthesis state: // pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace))); pb->index = markid; this->m_paren_start = last_paren_start; // // restore the alternate insertion point: // this->m_alt_insert_point = last_alt_point; // // and the case change data: // m_has_case_change = old_case_change; return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate){ // // parses an emacs style \sx or \Sx construct. // if(++m_position == m_end) { fail(regex_constants::error_escape, m_position - m_base); return false; } basic_char_set<charT, traits> char_set; if(negate) char_set.negate(); static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', }; switch(*m_position) { case 's': case ' ': char_set.add_class(this->m_mask_space); break; case 'w': char_set.add_class(this->m_word_mask); break; case '_': char_set.add_single(digraph<charT>(charT('$'))); char_set.add_single(digraph<charT>(charT('&'))); char_set.add_single(digraph<charT>(charT('*'))); char_set.add_single(digraph<charT>(charT('+'))); char_set.add_single(digraph<charT>(charT('-'))); char_set.add_single(digraph<charT>(charT('_'))); char_set.add_single(digraph<charT>(charT('<'))); char_set.add_single(digraph<charT>(charT('>'))); break; case '.': char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5)); break; case '(': char_set.add_single(digraph<charT>(charT('('))); char_set.add_single(digraph<charT>(charT('['))); char_set.add_single(digraph<charT>(charT('{'))); break; case ')': char_set.add_single(digraph<charT>(charT(')'))); char_set.add_single(digraph<charT>(charT(']'))); char_set.add_single(digraph<charT>(charT('}'))); break; case '"': char_set.add_single(digraph<charT>(charT('"'))); char_set.add_single(digraph<charT>(charT('\''))); char_set.add_single(digraph<charT>(charT('`'))); break; case '\'': char_set.add_single(digraph<charT>(charT('\''))); char_set.add_single(digraph<charT>(charT(','))); char_set.add_single(digraph<charT>(charT('#'))); break; case '<': char_set.add_single(digraph<charT>(charT(';'))); break; case '>': char_set.add_single(digraph<charT>(charT('\n'))); char_set.add_single(digraph<charT>(charT('\f'))); break; default: fail(regex_constants::error_ctype, m_position - m_base); return false; } if(0 == this->append_set(char_set)) { fail(regex_constants::error_ctype, m_position - m_base); return false; } ++m_position; return true;}template <class charT, class traits>regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options(){ // we have a (?imsx-imsx) group, convert it into a set of flags: regex_constants::syntax_option_type f = this->flags(); bool breakout = false; do { switch(*m_position) { case 's': f |= regex_constants::mod_s; f &= ~regex_constants::no_mod_s; break; case 'm': f &= ~regex_constants::no_mod_m; break; case 'i': f |= regex_constants::icase; break; case 'x': f |= regex_constants::mod_x; break; default: breakout = true; continue; } if(++m_position == m_end) { fail(regex_constants::error_paren, m_position - m_base); return false; } } while(!breakout); if(*m_position == static_cast<charT>('-')) { if(++m_position == m_end) { fail(regex_constants::error_paren, m_position - m_base); return false; } do { switch(*m_position) { case 's': f &= ~regex_constants::mod_s; f |= regex_constants::no_mod_s; break; case 'm': f |= regex_constants::no_mod_m; break; case 'i': f &= ~regex_constants::icase; break; case 'x': f &= ~regex_constants::mod_x; break; default: breakout = true; continue; } if(++m_position == m_end) { fail(regex_constants::error_paren, m_position - m_base); return false; } } while(!breakout); } return f;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start){ // // If we didn't actually add any states after the last // alternative then that's an error: // if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size())) && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)) { fail(regex_constants::error_empty, this->m_position - this->m_base); return false
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?