basic_regex_parser.hpp
来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 2,075 行 · 第 1/5 页
HPP
2,075 行
fail(regex_constants::error_brace, this->m_position - this->m_base); return false; } // get min: v = this->m_traits.toi(m_position, m_end, 10); // skip whitespace: while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) ++m_position; if(v < 0) { fail(regex_constants::error_badbrace, this->m_position - this->m_base); return false; } else if(this->m_position == this->m_end) { fail(regex_constants::error_brace, this->m_position - this->m_base); return false; } min = v; // see if we have a comma: if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma) { // move on and error check: ++m_position; // skip whitespace: while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) ++m_position; if(this->m_position == this->m_end) { fail(regex_constants::error_brace, this->m_position - this->m_base); return false; } // get the value if any: v = this->m_traits.toi(m_position, m_end, 10); max = (v >= 0) ? v : (std::numeric_limits<std::size_t>::max)(); } else { // no comma, max = min: max = min; } // skip whitespace: while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) ++m_position; // OK now check trailing }: if(this->m_position == this->m_end) { fail(regex_constants::error_brace, this->m_position - this->m_base); return false; } if(isbasic) { if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape) { ++m_position; if(this->m_position == this->m_end) { fail(regex_constants::error_brace, this->m_position - this->m_base); return false; } } else { fail(regex_constants::error_badbrace, this->m_position - this->m_base); return false; } } if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace) ++m_position; else { fail(regex_constants::error_badbrace, this->m_position - this->m_base); return false; } // // finally go and add the repeat, unless error: // if(min > max) { fail(regex_constants::error_badbrace, this->m_position - this->m_base); return false; } return parse_repeat(min, max);}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_alt(){ // // error check: if there have been no previous states, // or if the last state was a '(' then error: // if((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark)) { fail(regex_constants::error_empty, this->m_position - this->m_base); return false; } ++m_position; // // we need to append a trailing jump: // re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump)); std::ptrdiff_t jump_offset = this->getoffset(pj); // // now insert the alternative: // re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size)); jump_offset += re_alt_size; this->m_pdata->m_data.align(); palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt); // // update m_alt_insert_point so that the next alternate gets // inserted at the start of the second of the two we've just created: // this->m_alt_insert_point = this->m_pdata->m_data.size(); // // the start of this alternative must have a case changes state // if the current block has messed around with case changes: // if(m_has_case_change) { static_cast<re_case*>( this->append_state(syntax_element_toggle_case, sizeof(re_case)) )->icase = this->m_icase; } // // push the alternative onto our stack, a recursive // implementation here is easier to understand (and faster // as it happens), but causes all kinds of stack overflow problems // on programs with small stacks (COM+). // m_alt_jumps.push_back(jump_offset); return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_set(){ ++m_position; if(m_position == m_end) { fail(regex_constants::error_brack, m_position - m_base); return false; } basic_char_set<charT, traits> char_set; const charT* base = m_position; // where the '[' was const charT* item_base = m_position; // where the '[' or '^' was while(m_position != m_end) { switch(this->m_traits.syntax_type(*m_position)) { case regex_constants::syntax_caret: if(m_position == base) { char_set.negate(); ++m_position; item_base = m_position; } else parse_set_literal(char_set); break; case regex_constants::syntax_close_set: if(m_position == item_base) { parse_set_literal(char_set); break; } else { ++m_position; if(0 == this->append_set(char_set)) { fail(regex_constants::error_range, m_position - m_base); return false; } } return true; case regex_constants::syntax_open_set: if(parse_inner_set(char_set)) break; return true; case regex_constants::syntax_escape: { // // look ahead and see if this is a character class shortcut // \d \w \s etc... // ++m_position; if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_class) { char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1); if(m != 0) { char_set.add_class(m); ++m_position; break; } } else if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_not_class) { // negated character class: char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1); if(m != 0) { char_set.add_negated_class(m); ++m_position; break; } } // not a character class, just a regular escape: --m_position; parse_set_literal(char_set); break; } default: parse_set_literal(char_set); break; } } return m_position != m_end;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set){ // // we have either a character class [:name:] // a collating element [.name.] // or an equivalence class [=name=] // if(m_end == ++m_position) { fail(regex_constants::error_brack, m_position - m_base); return false; } switch(this->m_traits.syntax_type(*m_position)) { case regex_constants::syntax_dot: // // a collating element is treated as a literal: // --m_position; parse_set_literal(char_set); return true; case regex_constants::syntax_colon: { // check that character classes are actually enabled: if((this->flags() & (regbase::main_option_type | regbase::no_char_classes)) == (regbase::basic_syntax_group | regbase::no_char_classes)) { --m_position; parse_set_literal(char_set); return true; } // skip the ':' if(m_end == ++m_position) { fail(regex_constants::error_brack, m_position - m_base); return false; } const charT* name_first = m_position; // skip at least one character, then find the matching ':]' if(m_end == ++m_position) { fail(regex_constants::error_brack, m_position - m_base); return false; } while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon)) ++m_position; const charT* name_last = m_position; if(m_end == m_position) { fail(regex_constants::error_brack, m_position - m_base); return false; } if((m_end == ++m_position) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) { fail(regex_constants::error_brack, m_position - m_base); return false; } // // check for negated class: // bool negated = false; if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret) { ++name_first; negated = true; } typedef typename traits::char_class_type mask_type; mask_type m = this->m_traits.lookup_classname(name_first, name_last); if(m == 0) { if(char_set.empty() && (name_last - name_first == 1)) { // maybe a special case: ++m_position; if( (m_position != m_end) && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)) { if(this->m_traits.escape_syntax_type(*name_first) == regex_constants::escape_type_left_word) { ++m_position; this->append_state(syntax_element_word_start); return false; } if(this->m_traits.escape_syntax_type(*name_first) == regex_constants::escape_type_right_word) { ++m_position; this->append_state(syntax_element_word_end); return false; } } } fail(regex_constants::error_ctype, name_first - m_base); return false; } if(negated == false) char_set.add_class(m); else char_set.add_negated_class(m); ++m_position; break; } case regex_constants::syntax_equal: { // skip the '=' if(m_end == ++m_position) { fail(regex_constants::error_brack, m_position - m_base); return false; } const charT* name_first = m_position; // skip at least one character, then find the matching '=]' if(m_end == ++m_position) { fail(regex_constants::error_brack, m_position - m_base); return false; } while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)) ++m_position; const charT* name_last = m_position; if(m_end == m_position) { fail(regex_constants::error_brack, m_position - m_base); return false; } if((m_end == ++m_position) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) { fail(regex_constants::error_brack, m_position - m_base); return false; } string_type m = this->m_traits.lookup_collatename(name_first, name_last); if((0 == m.size()) || (m.size() > 2)) { fail(regex_constants::error_collate, name_first - m_base); return false; } digraph<charT> d; d.first = m[0]; if(m.size() > 1) d.second = m[1]; else d.second = 0; char_set.add_equivalent(d); ++m_position; break; } default: --m_position; parse_set_literal(char_set); break; } return true;}template <class charT, class traits>void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set){ digraph<charT> start_range(get_next_set_literal(char_set)); if(m_end == m_position) { fail(regex_constants::error_brack, m_position - m_base); return; } if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash) { // we have a range: if(m_end == ++m_position) { fail(regex_constants::error_brack, m_position - m_base); return; } if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set) { digraph<charT> end_range = get_next_set_literal(char_set); char_set.add_range(start_range, end_range); if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash) { if(m_end == ++m_position) { fail(regex_constants::error_brack, m_position - m_base); return; }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?