regex_compile.hpp
来自「CGAL is a collaborative effort of severa」· HPP 代码 · 共 1,861 行 · 第 1/5 页
HPP
1,861 行
} break; case traits_type::syntax_or: if(((_flags & bk_vbar) == 0) || (_flags & limited_ops)) break; goto alt_string_jump; case traits_type::syntax_open_brace: if( ((_flags & bk_braces) == 0) || ((_flags & intervals) == 0)) break; // we have {x} or {x,} or {x,y}: parse_range(ptr, end, rep_min, rep_max); goto repeat_jump; case traits_type::syntax_digit: if(_flags & bk_refs) { // update previous: int i = traits_inst.toi((charT)c); if(i == 0) { // we can have \025 which means take char whose // code is 25 (octal), so parse string: c = traits_inst.toi(ptr, end, -8); --ptr; break; } dat = add_simple(dat, re_detail::syntax_element_backref, sizeof(re_detail::re_brace)); static_cast<re_detail::re_brace*>(dat)->index = i; ++ptr; continue; } break; case traits_type::syntax_b: // re_detail::syntax_element_word_boundary dat = add_simple(dat, re_detail::syntax_element_word_boundary); ++ptr; continue; case traits_type::syntax_B: dat = add_simple(dat, re_detail::syntax_element_within_word); ++ptr; continue; case traits_type::syntax_left_word: dat = add_simple(dat, re_detail::syntax_element_word_start); ++ptr; continue; case traits_type::syntax_right_word: dat = add_simple(dat, re_detail::syntax_element_word_end); ++ptr; continue; case traits_type::syntax_w: //re_detail::syntax_element_word_char dat = compile_set_simple(dat, traits_type::char_class_word); ++ptr; continue; case traits_type::syntax_W: dat = compile_set_simple(dat, traits_type::char_class_word, true); ++ptr; continue; case traits_type::syntax_d: //re_detail::syntax_element_word_char dat = compile_set_simple(dat, traits_type::char_class_digit); ++ptr; continue; case traits_type::syntax_D: dat = compile_set_simple(dat, traits_type::char_class_digit, true); ++ptr; continue; case traits_type::syntax_s: //re_detail::syntax_element_word_char dat = compile_set_simple(dat, traits_type::char_class_space); ++ptr; continue; case traits_type::syntax_S: dat = compile_set_simple(dat, traits_type::char_class_space, true); ++ptr; continue; case traits_type::syntax_l: //re_detail::syntax_element_word_char dat = compile_set_simple(dat, traits_type::char_class_lower); ++ptr; continue; case traits_type::syntax_L: dat = compile_set_simple(dat, traits_type::char_class_lower, true); ++ptr; continue; case traits_type::syntax_u: //re_detail::syntax_element_word_char dat = compile_set_simple(dat, traits_type::char_class_upper); ++ptr; continue; case traits_type::syntax_U: dat = compile_set_simple(dat, traits_type::char_class_upper, true); ++ptr; continue; case traits_type::syntax_Q: ++ptr; while(true) { if(ptr == end) { fail(REG_EESCAPE); return error_code(); } if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_slash) { ++ptr; if((ptr != end) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_E)) break; else { dat = add_literal(dat, *(ptr-1)); continue; } } dat = add_literal(dat, *ptr); ++ptr; } ++ptr; continue; case traits_type::syntax_C: dat = add_simple(dat, re_detail::syntax_element_wild); ++ptr; continue; case traits_type::syntax_X: dat = add_simple(dat, re_detail::syntax_element_combining); ++ptr; continue; case traits_type::syntax_Z: dat = add_simple(dat, re_detail::syntax_element_soft_buffer_end); ++ptr; continue; case traits_type::syntax_G: dat = add_simple(dat, re_detail::syntax_element_restart_continue); ++ptr; continue; case traits_type::syntax_start_buffer: dat = add_simple(dat, re_detail::syntax_element_buffer_start); ++ptr; continue; case traits_type::syntax_end_buffer: dat = add_simple(dat, re_detail::syntax_element_buffer_end); ++ptr; continue; default: c = (traits_size_type)(traits_uchar_type)parse_escape(ptr, end); dat = add_literal(dat, (charT)c); continue; } dat = add_literal(dat, (charT)c); ++ptr; break; } case traits_type::syntax_dollar: dat = add_simple(dat, re_detail::syntax_element_end_line, sizeof(re_detail::re_syntax_base)); ++ptr; continue; case traits_type::syntax_caret: dat = add_simple(dat, re_detail::syntax_element_start_line, sizeof(re_detail::re_syntax_base)); ++ptr; continue; case traits_type::syntax_dot: dat = add_simple(dat, re_detail::syntax_element_wild, sizeof(re_detail::re_syntax_base)); ++ptr; continue; case traits_type::syntax_star: rep_min = 0; rep_max = (unsigned)-1; repeat_jump: { std::ptrdiff_t offset; if(dat == 0) { fail(REG_BADRPT); return error_code(); } switch(dat->type) { case re_detail::syntax_element_endmark: offset = last_mark_popped; break; case re_detail::syntax_element_literal: if(static_cast<re_detail::re_literal*>(dat)->length > 1) { // update previous: charT lit = *reinterpret_cast<charT*>(reinterpret_cast<char*>(dat) + sizeof(re_detail::re_literal) + ((static_cast<re_detail::re_literal*>(dat)->length-1)*sizeof(charT))); --static_cast<re_detail::re_literal*>(dat)->length; dat = add_simple(dat, re_detail::syntax_element_literal, sizeof(re_detail::re_literal) + sizeof(charT)); static_cast<re_detail::re_literal*>(dat)->length = 1; *reinterpret_cast<charT*>(static_cast<re_detail::re_literal*>(dat)+1) = lit; } offset = reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data()); break; case re_detail::syntax_element_backref: case re_detail::syntax_element_long_set: case re_detail::syntax_element_set: case re_detail::syntax_element_wild: case re_detail::syntax_element_combining: // we're repeating a single item: offset = reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data()); break; default: fail(REG_BADRPT); return error_code(); } data.align(); dat->next.i = data.size(); //unsigned pos = (char*)dat - (char*)data.data(); // add the trailing jump: dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size); static_cast<re_detail::re_jump*>(dat)->alt.i = 0; // now insert the leading repeater: dat = static_cast<re_detail::re_syntax_base*>(data.insert(offset, re_detail::re_repeater_size)); dat->next.i = (reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data())) + re_detail::re_repeater_size; dat->type = re_detail::syntax_element_rep; static_cast<re_detail::re_repeat*>(dat)->alt.i = data.size(); static_cast<re_detail::re_repeat*>(dat)->min = rep_min; static_cast<re_detail::re_repeat*>(dat)->max = rep_max; static_cast<re_detail::re_repeat*>(dat)->leading = false; static_cast<re_detail::re_repeat*>(dat)->greedy = true; move_offsets(dat, re_detail::re_repeater_size); ++ptr; // // now check to see if we have a non-greedy repeat: if((ptr != end) && (_flags & (limited_ops | bk_plus_qm | bk_braces)) == 0) { c = (traits_size_type)(traits_uchar_type)*ptr; if(traits_type::syntax_question == traits_inst.syntax_type(c)) { // OK repeat is non-greedy: static_cast<re_detail::re_repeat*>(dat)->greedy = false; ++ptr; } } dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + data.size() - re_detail::re_jump_size); static_cast<re_detail::re_repeat*>(dat)->alt.i = offset; continue; } case traits_type::syntax_plus: if(_flags & (bk_plus_qm | limited_ops)) { dat = add_literal(dat, (charT)c); ++ptr; continue; } rep_min = 1; rep_max = (unsigned)-1; goto repeat_jump; case traits_type::syntax_question: if(_flags & (bk_plus_qm | limited_ops)) { dat = add_literal(dat, (charT)c); ++ptr; continue; } rep_min = 0; rep_max = 1; goto repeat_jump; case traits_type::syntax_open_set: // update previous: if(dat) { data.align(); dat->next.i = data.size(); } // extend: dat = compile_set(ptr, end); if(dat == 0) { if((_flags & regbase::failbit) == 0) fail(REG_EBRACK); return error_code(); } break; case traits_type::syntax_or: { if(_flags & (bk_vbar | limited_ops)) { dat = add_literal(dat, (charT)c); ++ptr; continue; } alt_string_jump: // update previous: if(dat == 0) { // start of pattern can't have empty "|" fail(REG_EMPTY); return error_code(); } // see if we have an empty alternative: if(mark.empty() == false) if(mark.peek() == data.index(dat)) { fail(REG_EMPTY); return error_code(); } // extend: dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size); data.align(); // // we don't know what value to put here yet, // use an arbitrarily large value for now // and check it later (TODO!) static_cast<re_detail::re_jump*>(dat)->alt.i = INT_MAX/2; // now work out where to insert: std::size_t offset = 0; if(mark.empty() == false) { // we have a '(' or '|' to go back to: offset = mark.peek(); re_detail::re_syntax_base* base = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + offset); offset = base->next.i; } re_detail::re_jump* j = static_cast<re_detail::re_jump*>(data.insert(offset, re_detail::re_jump_size)); j->type = re_detail::syntax_element_alt; j->next.i = offset + re_detail::re_jump_size; j->alt.i = data.size(); move_offsets(j, re_detail::re_jump_size); dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + data.size() - re_detail::re_jump_size); mark.push(data.size() - re_detail::re_jump_size); ++ptr; break; } case traits_type::syntax_open_brace: if((_flags & bk_braces) || ((_flags & intervals) == 0)) { dat = add_literal(dat, (charT)c); ++ptr; continue; } // we have {x} or {x,} or {x,y}: parse_range(ptr, end, rep_min, rep_max); goto repeat_jump; case traits_type::syntax_newline: if(_flags & newline_alt) goto alt_string_jump; dat = add_literal(dat, (charT)c); ++ptr; continue; case traits_type::syntax_close_brace: if(_flags & bk_braces) { dat = add_literal(dat, (charT)c); ++ptr; continue; } fail(REG_BADPAT); return error_code(); default: dat = add_literal(dat, (charT)c); ++ptr; break; } // switch } // while // // update previous: if(dat) { data.align(); dat->next.i = data.size(); } // see if we have an empty alternative: if(mark.empty() == false) if(mark.peek() == data.index(dat) ) { re_detail::re_syntax_base* para = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cas
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?