📄 basic_regex_parser.hpp
字号:
/*
*
* Copyright (c) 2004
* John Maddock
*
* Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*
*/
/*
* LOCATION: see http://www.boost.org for most recent version.
* FILE basic_regex_parser.cpp
* VERSION see <boost/version.hpp>
* DESCRIPTION: Declares template class basic_regex_parser.
*/
#ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
#define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
#ifdef BOOST_HAS_ABI_HEADERS
# include BOOST_ABI_PREFIX
#endif
namespace boost{
namespace re_detail{
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable:4244)
#endif
template <class charT, class traits>
class basic_regex_parser : public basic_regex_creator<charT, traits>
{
public:
basic_regex_parser(regex_data<charT, traits>* data);
void parse(const charT* p1, const charT* p2, unsigned flags);
void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
bool parse_all();
bool parse_basic();
bool parse_extended();
bool parse_literal();
bool parse_open_paren();
bool parse_basic_escape();
bool parse_extended_escape();
bool parse_match_any();
bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
bool parse_repeat_range(bool isbasic);
bool parse_alt();
bool parse_set();
bool parse_backref();
void parse_set_literal(basic_char_set<charT, traits>& char_set);
bool parse_inner_set(basic_char_set<charT, traits>& char_set);
bool parse_QE();
bool parse_perl_extension();
bool add_emacs_code(bool negate);
bool unwind_alts(std::ptrdiff_t last_paren_start);
digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
charT unescape_character();
regex_constants::syntax_option_type parse_options();
private:
typedef bool (basic_regex_parser::*parser_proc_type)();
typedef typename traits::string_type string_type;
typedef typename traits::char_class_type char_class_type;
parser_proc_type m_parser_proc; // the main parser to use
const charT* m_base; // the start of the string being parsed
const charT* m_end; // the end of the string being parsed
const charT* m_position; // our current parser position
unsigned m_mark_count; // how many sub-expressions we have
std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
bool m_has_case_change; // true if somewhere in the current block the case has changed
#if defined(BOOST_MSVC) && defined(_M_IX86)
// This is an ugly warning suppression workaround (for warnings *inside* std::vector
// that can not otherwise be suppressed)...
BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
std::vector<long> m_alt_jumps; // list of alternative in the current scope.
#else
std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
#endif
basic_regex_parser& operator=(const basic_regex_parser&);
basic_regex_parser(const basic_regex_parser&);
};
template <class charT, class traits>
basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
: basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false)
{
}
template <class charT, class traits>
void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned flags)
{
// pass flags on to base class:
this->init(flags);
// set up pointers:
m_position = m_base = p1;
m_end = p2;
// empty strings are errors:
if(p1 == p2)
{
fail(regex_constants::error_empty, 0);
return;
}
// select which parser to use:
switch(flags & regbase::main_option_type)
{
case regbase::perl_syntax_group:
m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
break;
case regbase::basic_syntax_group:
m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
break;
case regbase::literal:
m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
break;
}
// parse all our characters:
bool result = parse_all();
//
// Unwind our alternatives:
//
unwind_alts(-1);
// reset flags as a global scope (?imsx) may have altered them:
this->flags(flags);
// if we haven't gobbled up all the characters then we must
// have had an unexpected ')' :
if(!result)
{
fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_position));
return;
}
// if an error has been set then give up now:
if(this->m_pdata->m_status)
return;
// fill in our sub-expression count:
this->m_pdata->m_mark_count = 1 + m_mark_count;
this->finalize(p1, p2);
}
template <class charT, class traits>
void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
{
if(0 == this->m_pdata->m_status) // update the error code if not already set
this->m_pdata->m_status = error_code;
m_position = m_end; // don't bother parsing anything else
// get the error message:
std::string message = this->m_pdata->m_ptraits->error_string(error_code);
// and raise the exception, this will do nothing if exceptions are disabled:
#ifndef BOOST_NO_EXCEPTIONS
if(0 == (this->flags() & regex_constants::no_except))
{
boost::regex_error e(message, error_code, position);
e.raise();
}
#else
(void)position; // suppress warnings.
#endif
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_all()
{
bool result = true;
while(result && (m_position != m_end))
{
result = (this->*m_parser_proc)();
}
return result;
}
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable:4702)
#endif
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_basic()
{
switch(this->m_traits.syntax_type(*m_position))
{
case regex_constants::syntax_escape:
return parse_basic_escape();
case regex_constants::syntax_dot:
return parse_match_any();
case regex_constants::syntax_caret:
++m_position;
this->append_state(syntax_element_start_line);
break;
case regex_constants::syntax_dollar:
++m_position;
this->append_state(syntax_element_end_line);
break;
case regex_constants::syntax_star:
if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
return parse_literal();
else
{
++m_position;
return parse_repeat();
}
case regex_constants::syntax_plus:
if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
return parse_literal();
else
{
++m_position;
return parse_repeat(1);
}
case regex_constants::syntax_question:
if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
return parse_literal();
else
{
++m_position;
return parse_repeat(0, 1);
}
case regex_constants::syntax_open_set:
return parse_set();
default:
return parse_literal();
}
return true;
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_extended()
{
bool result = true;
switch(this->m_traits.syntax_type(*m_position))
{
case regex_constants::syntax_open_mark:
return parse_open_paren();
case regex_constants::syntax_close_mark:
return false;
case regex_constants::syntax_escape:
return parse_extended_escape();
case regex_constants::syntax_dot:
return parse_match_any();
case regex_constants::syntax_caret:
++m_position;
this->append_state(
(this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
break;
case regex_constants::syntax_dollar:
++m_position;
this->append_state(
(this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
break;
case regex_constants::syntax_star:
if(m_position == this->m_base)
{
fail(regex_constants::error_badrepeat, 0);
return false;
}
++m_position;
return parse_repeat();
case regex_constants::syntax_question:
if(m_position == this->m_base)
{
fail(regex_constants::error_badrepeat, 0);
return false;
}
++m_position;
return parse_repeat(0,1);
case regex_constants::syntax_plus:
if(m_position == this->m_base)
{
fail(regex_constants::error_badrepeat, 0);
return false;
}
++m_position;
return parse_repeat(1);
case regex_constants::syntax_open_brace:
++m_position;
return parse_repeat_range(false);
case regex_constants::syntax_close_brace:
fail(regex_constants::error_brace, this->m_position - this->m_end);
return false;
case regex_constants::syntax_or:
return parse_alt();
case regex_constants::syntax_open_set:
return parse_set();
case regex_constants::syntax_hash:
//
// If we have a mod_x flag set, then skip until
// we get to a newline character:
//
if((this->flags()
& (regbase::no_perl_ex|regbase::mod_x))
== regbase::mod_x)
{
while((m_position != m_end) && !is_separator(*m_position++)){}
return true;
}
// Otherwise fall through:
default:
result = parse_literal();
break;
}
return result;
}
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_literal()
{
// append this as a literal provided it's not a space character
// or the perl option regbase::mod_x is not set:
if(
((this->flags()
& (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
!= regbase::mod_x)
|| !this->m_traits.isctype(*m_position, this->m_mask_space))
this->append_literal(*m_position);
++m_position;
return true;
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_open_paren()
{
//
// skip the '(' and error check:
//
if(++m_position == m_end)
{
fail(regex_constants::error_paren, m_position - m_base);
return false;
}
//
// begin by checking for a perl-style (?...) extension:
//
if(
((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
|| ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
)
{
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
return parse_perl_extension();
}
//
// update our mark count, and append the required state:
//
unsigned markid = 0;
if(0 == (this->flags() & regbase::nosubs))
markid = ++m_mark_count;
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
pb->index = markid;
std::ptrdiff_t last_paren_start = this->getoffset(pb);
// back up insertion point for alternations, and set new point:
std::ptrdiff_t last_alt_point = m_alt_insert_point;
this->m_pdata->m_data.align();
m_alt_insert_point = this->m_pdata->m_data.size();
//
// back up the current flags in case we have a nested (?imsx) group:
//
regex_constants::syntax_option_type opts = this->flags();
bool old_case_change = m_has_case_change;
m_has_case_change = false; // no changes to this scope as yet...
//
// now recursively add more states, this will terminate when we get to a
// matching ')' :
//
parse_all();
//
// Unwind pushed alternatives:
//
if(0 == unwind_alts(last_paren_start))
return false;
//
// restore flags:
//
if(m_has_case_change)
{
// the case has changed in one or more of the alternatives
// within the scoped (...) block: we have to add a state
// to reset the case sensitivity:
static_cast<re_case*>(
this->append_state(syntax_element_toggle_case, sizeof(re_case))
)->icase = opts & regbase::icase;
}
this->flags(opts);
m_has_case_change = old_case_change;
//
// we either have a ')' or we have run out of characters prematurely:
//
if(m_position == m_end)
{
this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
return false;
}
BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
++m_position;
//
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -