basic_regex_parser.hpp
来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 2,075 行 · 第 1/5 页
HPP
2,075 行
/* * * Copyright (c) 2004 * John Maddock * * Use, modification and distribution are subject to the * Boost Software License, Version 1.0. (See accompanying file * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) * */ /* * LOCATION: see http://www.boost.org for most recent version. * FILE basic_regex_parser.cpp * VERSION see <boost/version.hpp> * DESCRIPTION: Declares template class basic_regex_parser. */#ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP#define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP#ifdef BOOST_MSVC#pragma warning(push)#pragma warning(disable: 4103)#endif#ifdef BOOST_HAS_ABI_HEADERS# include BOOST_ABI_PREFIX#endif#ifdef BOOST_MSVC#pragma warning(pop)#endifnamespace boost{namespace re_detail{#ifdef BOOST_MSVC#pragma warning(push)#pragma warning(disable:4244 4800)#endiftemplate <class charT, class traits>class basic_regex_parser : public basic_regex_creator<charT, traits>{public: basic_regex_parser(regex_data<charT, traits>* data); void parse(const charT* p1, const charT* p2, unsigned flags); void fail(regex_constants::error_type error_code, std::ptrdiff_t position); bool parse_all(); bool parse_basic(); bool parse_extended(); bool parse_literal(); bool parse_open_paren(); bool parse_basic_escape(); bool parse_extended_escape(); bool parse_match_any(); bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)()); bool parse_repeat_range(bool isbasic); bool parse_alt(); bool parse_set(); bool parse_backref(); void parse_set_literal(basic_char_set<charT, traits>& char_set); bool parse_inner_set(basic_char_set<charT, traits>& char_set); bool parse_QE(); bool parse_perl_extension(); bool add_emacs_code(bool negate); bool unwind_alts(std::ptrdiff_t last_paren_start); digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set); charT unescape_character(); regex_constants::syntax_option_type parse_options();private: typedef bool (basic_regex_parser::*parser_proc_type)(); typedef typename traits::string_type string_type; typedef typename traits::char_class_type char_class_type; parser_proc_type m_parser_proc; // the main parser to use const charT* m_base; // the start of the string being parsed const charT* m_end; // the end of the string being parsed const charT* m_position; // our current parser position unsigned m_mark_count; // how many sub-expressions we have std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted). std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative bool m_has_case_change; // true if somewhere in the current block the case has changed#if defined(BOOST_MSVC) && defined(_M_IX86) // This is an ugly warning suppression workaround (for warnings *inside* std::vector // that can not otherwise be suppressed)... BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*)); std::vector<long> m_alt_jumps; // list of alternative in the current scope.#else std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.#endif basic_regex_parser& operator=(const basic_regex_parser&); basic_regex_parser(const basic_regex_parser&);};template <class charT, class traits>basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data) : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false){}template <class charT, class traits>void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags){ // pass l_flags on to base class: this->init(l_flags); // set up pointers: m_position = m_base = p1; m_end = p2; // empty strings are errors: if(p1 == p2) { fail(regex_constants::error_empty, 0); return; } // select which parser to use: switch(l_flags & regbase::main_option_type) { case regbase::perl_syntax_group: m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended; break; case regbase::basic_syntax_group: m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic; break; case regbase::literal: m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal; break; } // parse all our characters: bool result = parse_all(); // // Unwind our alternatives: // unwind_alts(-1); // reset l_flags as a global scope (?imsx) may have altered them: this->flags(l_flags); // if we haven't gobbled up all the characters then we must // have had an unexpected ')' : if(!result) { fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_position)); return; } // if an error has been set then give up now: if(this->m_pdata->m_status) return; // fill in our sub-expression count: this->m_pdata->m_mark_count = 1 + m_mark_count; this->finalize(p1, p2);}template <class charT, class traits>void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position){ if(0 == this->m_pdata->m_status) // update the error code if not already set this->m_pdata->m_status = error_code; m_position = m_end; // don't bother parsing anything else // get the error message: std::string message = this->m_pdata->m_ptraits->error_string(error_code); // and raise the exception, this will do nothing if exceptions are disabled:#ifndef BOOST_NO_EXCEPTIONS if(0 == (this->flags() & regex_constants::no_except)) { boost::regex_error e(message, error_code, position); e.raise(); }#else (void)position; // suppress warnings.#endif}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_all(){ bool result = true; while(result && (m_position != m_end)) { result = (this->*m_parser_proc)(); } return result;}#ifdef BOOST_MSVC#pragma warning(push)#pragma warning(disable:4702)#endiftemplate <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_basic(){ switch(this->m_traits.syntax_type(*m_position)) { case regex_constants::syntax_escape: return parse_basic_escape(); case regex_constants::syntax_dot: return parse_match_any(); case regex_constants::syntax_caret: ++m_position; this->append_state(syntax_element_start_line); break; case regex_constants::syntax_dollar: ++m_position; this->append_state(syntax_element_end_line); break; case regex_constants::syntax_star: if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line)) return parse_literal(); else { ++m_position; return parse_repeat(); } case regex_constants::syntax_plus: if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex)) return parse_literal(); else { ++m_position; return parse_repeat(1); } case regex_constants::syntax_question: if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex)) return parse_literal(); else { ++m_position; return parse_repeat(0, 1); } case regex_constants::syntax_open_set: return parse_set(); case regex_constants::syntax_newline: if(this->flags() & regbase::newline_alt) return parse_alt(); else return parse_literal(); default: return parse_literal(); } return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_extended(){ bool result = true; switch(this->m_traits.syntax_type(*m_position)) { case regex_constants::syntax_open_mark: return parse_open_paren(); case regex_constants::syntax_close_mark: return false; case regex_constants::syntax_escape: return parse_extended_escape(); case regex_constants::syntax_dot: return parse_match_any(); case regex_constants::syntax_caret: ++m_position; this->append_state( (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line)); break; case regex_constants::syntax_dollar: ++m_position; this->append_state( (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line)); break; case regex_constants::syntax_star: if(m_position == this->m_base) { fail(regex_constants::error_badrepeat, 0); return false; } ++m_position; return parse_repeat(); case regex_constants::syntax_question: if(m_position == this->m_base) { fail(regex_constants::error_badrepeat, 0); return false; } ++m_position; return parse_repeat(0,1); case regex_constants::syntax_plus: if(m_position == this->m_base) { fail(regex_constants::error_badrepeat, 0); return false; } ++m_position; return parse_repeat(1); case regex_constants::syntax_open_brace: ++m_position; return parse_repeat_range(false); case regex_constants::syntax_close_brace: fail(regex_constants::error_brace, this->m_position - this->m_end); return false; case regex_constants::syntax_or: return parse_alt(); case regex_constants::syntax_open_set: return parse_set(); case regex_constants::syntax_newline: if(this->flags() & regbase::newline_alt) return parse_alt(); else return parse_literal(); case regex_constants::syntax_hash: // // If we have a mod_x flag set, then skip until // we get to a newline character: // if((this->flags() & (regbase::no_perl_ex|regbase::mod_x)) == regbase::mod_x) { while((m_position != m_end) && !is_separator(*m_position++)){} return true; } // Otherwise fall through: default: result = parse_literal(); break; } return result;}#ifdef BOOST_MSVC#pragma warning(pop)#endiftemplate <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_literal(){ // append this as a literal provided it's not a space character // or the perl option regbase::mod_x is not set: if( ((this->flags() & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex)) != regbase::mod_x) || !this->m_traits.isctype(*m_position, this->m_mask_space)) this->append_literal(*m_position); ++m_position; return true;}template <class charT, class traits>bool basic_regex_parser<charT, traits>::parse_open_paren(){ // // skip the '(' and error check: // if(++m_position == m_end) { fail(regex_constants::error_paren, m_position - m_base); return false; } // // begin by checking for a perl-style (?...) extension: // if( ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0) || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) ) { if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question) return parse_perl_extension(); } // // update our mark count, and append the required state: // unsigned markid = 0; if(0 == (this->flags() & regbase::nosubs)) markid = ++m_mark_count; re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace))); pb->index = markid; std::ptrdiff_t last_paren_start = this->getoffset(pb); // back up insertion point for alternations, and set new point: std::ptrdiff_t last_alt_point = m_alt_insert_point; this->m_pdata->m_data.align(); m_alt_insert_point = this->m_pdata->m_data.size(); // // back up the current flags in case we have a nested (?imsx) group: // regex_constants::syntax_option_type opts = this->flags(); bool old_case_change = m_has_case_change; m_has_case_change = false; // no changes to this scope as yet... // // now recursively add more states, this will terminate when we get to a // matching ')' : // parse_all(); // // Unwind pushed alternatives: // if(0 == unwind_alts(last_paren_start)) return false; // // restore flags: // if(m_has_case_change) { // the case has changed in one or more of the alternatives // within the scoped (...) block: we have to add a state // to reset the case sensitivity: static_cast<re_case*>( this->append_state(syntax_element_toggle_case, sizeof(re_case)) )->icase = opts & regbase::icase; } this->flags(opts); m_has_case_change = old_case_change; // // we either have a ')' or we have run out of characters prematurely: // if(m_position == m_end) { this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end)); return false;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?