regex_compile.hpp
来自「CGAL is a collaborative effort of severa」· HPP 代码 · 共 1,861 行 · 第 1/5 页
HPP
1,861 行
fail(REG_EESCAPE); break; } if(((traits_uchar_type)(*first) < (traits_uchar_type)'@') || ((traits_uchar_type)(*first) > (traits_uchar_type)127) ) { fail(REG_EESCAPE); return (charT)0; } c = (charT)((traits_uchar_type)(*first) - (traits_uchar_type)'@'); ++first; break; case traits_type::syntax_e: c = (charT)27; ++first; break; case traits_type::syntax_digit: c = (charT)traits_inst.toi(first, last, -8); break; default: //c = *first; ++first; } return c;}template <class charT, class traits, class Allocator>void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_maps(){ re_detail::re_syntax_base* record = static_cast<re_detail::re_syntax_base*>(data.data()); // always compile the first _map: std::memset(startmap, 0, 256); record->can_be_null = 0; compile_map(record, startmap, 0, re_detail::mask_all); while(record->type != re_detail::syntax_element_match) { if((record->type == re_detail::syntax_element_alt) || (record->type == re_detail::syntax_element_rep)) { std::memset(&(static_cast<re_detail::re_jump*>(record)->_map), 0, 256); record->can_be_null = 0; compile_map(record->next.p, static_cast<re_detail::re_jump*>(record)->_map, &(record->can_be_null), re_detail::mask_take, static_cast<re_detail::re_jump*>(record)->alt.p); compile_map(static_cast<re_detail::re_jump*>(record)->alt.p, static_cast<re_detail::re_jump*>(record)->_map, &(record->can_be_null), re_detail::mask_skip); if(record->type == re_detail::syntax_element_rep) { re_detail::re_repeat* rep = static_cast<re_detail::re_repeat*>(record); // set whether this is a singleton repeat or not: if(rep->next.p->next.p->next.p == rep->alt.p) { rep->singleton = true; } else rep->singleton = false; } } else { record->can_be_null = 0; compile_map(record, 0, &(record->can_be_null), re_detail::mask_all); } record = record->next.p; } record->can_be_null = re_detail::mask_all;}template <class charT, class traits, class Allocator>bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start( re_detail::re_syntax_base* node, charT cc, re_detail::re_syntax_base* terminal) const{ unsigned int c; switch(node->type) { case re_detail::syntax_element_startmark: if(static_cast<const re_detail::re_brace*>(node)->index == -1) { return probe_start(node->next.p->next.p, cc, terminal) && probe_start(static_cast<const re_detail::re_jump*>(node->next.p)->alt.p, cc, terminal); } // fall through: case re_detail::syntax_element_endmark: case re_detail::syntax_element_start_line: case re_detail::syntax_element_word_boundary: case re_detail::syntax_element_buffer_start: case re_detail::syntax_element_restart_continue: // doesn't tell us anything about the next character, so: return probe_start(node->next.p, cc, terminal); case re_detail::syntax_element_literal: // only the first character of the literal can match: // note these have already been translated: if(*reinterpret_cast<charT*>(static_cast<re_detail::re_literal*>(node)+1) == traits_inst.translate(cc, (_flags & regbase::icase))) return true; return false; case re_detail::syntax_element_end_line: // next character (if there is one!) must be a newline: if(traits_inst.is_separator(traits_inst.translate(cc, (_flags & regbase::icase)))) return true; return false; case re_detail::syntax_element_wild: return true; case re_detail::syntax_element_match: return true; case re_detail::syntax_element_within_word: case re_detail::syntax_element_word_start: return traits_inst.is_class(traits_inst.translate(cc, (_flags & regbase::icase)), traits_type::char_class_word); case re_detail::syntax_element_word_end: // what follows must not be a word character, return traits_inst.is_class(traits_inst.translate(cc, (_flags & regbase::icase)), traits_type::char_class_word) ? false : true; case re_detail::syntax_element_buffer_end: // we can be null, nothing must follow, // NB we assume that this is followed by // re_detail::syntax_element_match, if its not then we can // never match anything anyway!! return false; case re_detail::syntax_element_soft_buffer_end: // we can be null, only newlines must follow, // NB we assume that this is followed by // re_detail::syntax_element_match, if its not then we can // never match anything anyway!! return traits_inst.is_separator(traits_inst.translate(cc, (_flags & regbase::icase))); case re_detail::syntax_element_backref: // there's no easy way to determine this // which is not to say it can't be done! // for now: return true; case re_detail::syntax_element_long_set: // we can not be null, // we need to add already translated values in the set // to values in the _map return re_detail::re_maybe_set_member(cc, static_cast<const re_detail::re_set_long*>(node), *this) || (re_detail::re_is_set_member(static_cast<const charT*>(&cc), static_cast<const charT*>(&cc+1), static_cast<re_detail::re_set_long*>(node), *this) != &cc); case re_detail::syntax_element_set: // set all the elements that are set in corresponding set: c = (traits_size_type)(traits_uchar_type)traits_inst.translate(cc, (_flags & regbase::icase)); return static_cast<re_detail::re_set*>(node)->_map[c] != 0; case re_detail::syntax_element_jump: if(static_cast<re_detail::re_jump*>(node)->alt.p < node) { // backwards jump, // caused only by end of repeat section, we'll treat this // the same as a match, because the sub-expression has matched. if(node->next.p == terminal) return true; // null repeat - we can always take this else { // // take the jump, add in fix for the fact that if the // repeat that we're jumping to has non-zero minimum count // then we need to add in the possiblity that we could still // skip that repeat. re_detail::re_syntax_base* next = static_cast<re_detail::re_jump*>(node)->alt.p; bool b = probe_start(next, cc, terminal); if((next->type == re_detail::syntax_element_rep) && (static_cast<re_detail::re_repeat*>(next)->min != 0)) { b = b || probe_start(static_cast<re_detail::re_jump*>(next)->alt.p, cc, terminal); } return b; } } else // take the jump and compile: return probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal); case re_detail::syntax_element_alt: // we need to take the OR of the two alternatives: return probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal) || probe_start(node->next.p, cc, terminal); case re_detail::syntax_element_rep: // we need to take the OR of the two alternatives if(static_cast<re_detail::re_repeat*>(node)->min == 0) return probe_start(node->next.p, cc, static_cast<re_detail::re_jump*>(node)->alt.p) || probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal); else return probe_start(node->next.p, cc, static_cast<re_detail::re_jump*>(node)->alt.p); case re_detail::syntax_element_combining: return !traits_inst.is_combining(traits_inst.translate(cc, (_flags & regbase::icase))); } return false;}template <class charT, class traits, class Allocator>bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start_null(re_detail::re_syntax_base* node, re_detail::re_syntax_base* terminal)const{ switch(node->type) { case re_detail::syntax_element_startmark: case re_detail::syntax_element_endmark: case re_detail::syntax_element_start_line: case re_detail::syntax_element_word_boundary: case re_detail::syntax_element_buffer_start: case re_detail::syntax_element_restart_continue: case re_detail::syntax_element_end_line: case re_detail::syntax_element_word_end: // doesn't tell us anything about the next character, so: return probe_start_null(node->next.p, terminal); case re_detail::syntax_element_match: case re_detail::syntax_element_buffer_end: case re_detail::syntax_element_soft_buffer_end: case re_detail::syntax_element_backref: return true; case re_detail::syntax_element_jump: if(static_cast<re_detail::re_jump*>(node)->alt.p < node) { // backwards jump, // caused only by end of repeat section, we'll treat this // the same as a match, because the sub-expression has matched. // this is only caused by NULL repeats as in "(a*)*" or "(\<)*" // these are really nonsensence and make the matching code much // harder, it would be nice to get rid of them altogether. if(node->next.p == terminal) return true; else return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal); } else // take the jump and compile: return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal); case re_detail::syntax_element_alt: // we need to take the OR of the two alternatives: return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal) || probe_start_null(node->next.p, terminal); case re_detail::syntax_element_rep: // only need to consider skipping the repeat: return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal); default: break; } return false;}template <class charT, class traits, class Allocator>void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_map( re_detail::re_syntax_base* node, unsigned char* _map, unsigned int* pnull, unsigned char mask, re_detail::re_syntax_base* terminal)const{ if(_map) { for(unsigned int i = 0; i < 256; ++i) { if(probe_start(node, (charT)i, terminal)) _map[i] |= mask; } } if(pnull && probe_start_null(node, terminal)) *pnull |= mask;} template <class charT, class traits, class Allocator>void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::move_offsets(re_detail::re_syntax_base* j, unsigned size){# ifdef BOOST_MSVC# pragma warning(push)# pragma warning(disable: 4127)#endif // move all offsets starting with j->link forward by size // called after an insert: j = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + j->next.i); while(true) { switch(j->type) { case re_detail::syntax_element_rep: static_cast<re_detail::re_jump*>(j)->alt.i += size; j->next.i += size; break; case re_detail::syntax_element_jump: case re_detail::syntax_element_alt: static_cast<re_detail::re_jump*>(j)->alt.i += size; j->next.i += size; break; default: j->next.i += size; break; } if(j->next.i == size) break; j = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + j->next.i); }# ifdef BOOST_MSVC# pragma warning(pop)#endif}template <class charT, class traits, class Allocator>re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set_simple(re_detail::re_syntax_base* dat, unsigned long cls, bool isnot){ typedef typename re_detail::is_byte<charT>::width_type width_type; re_detail::jstack<traits_string_type, Allocator> singles(64, data.allocator()); re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator()); re_detail::jstack<boost::uint_fast32_t, Allocator> classes(64, data.allocator()); re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator()); classes.push(cls); if(dat) { data.align(); dat->next.i = data.size(); } return compile_set_aux(singles, ranges, classes, equivalents, isnot, width_type());}template <class charT, class traits, class Allocator>re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set(const charT*& first, const charT* last){ re_detail::jstack<traits_string_type, Allocator> singles(64, data.allocator()); re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator()); re_detail::jstack<boost::uint_fast32_t, Allocator> classes(64, data.allocator()); re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator()); bool has_digraphs = false; jm_assert(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_open_set); ++first; bool started = false; bool done = false; bool isnot = false; enum last_type { last_single, last_none, last_dash }; unsigned l = last_none; traits_string_type s; while((first != last) && !done) { traits_size_type c = (traits_size_type)(traits_uchar_type)*first; // this is only used for the switch(), but cannot be folded in // due to a bug in Comeau 4.2.44beta3 traits_size_type syntax = traits_inst.syntax_type(c); switch(syntax) { case traits_type::syntax_caret: if(!started && !isnot) { isnot = true; } else { s = (charT)c; goto char_set_literal; } break; case traits_type::syntax_open_set: { if((_flags & char_classes) == 0) { s = (charT)c; goto char_set_literal; } // check to see if we really have a class: const charT* base = first; // this is only used for the switch(), but cannot be folded in // due to a bug in Comeau 4.2.44beta3 unsigned int inner_set = parse_inner_set(first, last); switch(inner_set) { case traits_type::syntax_colon: { if(l == last_dash) { fail(REG_ERANGE); return 0; } boost::uint_fast32_t id = traits_inst.lookup_classname(base+2, first-2); if(_flags & regbase::icase) { if((id == traits_type::char_class_upper) || (id == traits_type::char_class_lower)) { id = traits_type::char_class_alpha; } } if(id == 0) { fail(REG_ECTYPE); return 0; } classes.push(id);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?