regex_compiler.hpp
来自「support vector clustering for vc++」· HPP 代码 · 共 554 行 · 第 1/2 页
HPP
554 行
///////////////////////////////////////////////////////////////////////////////
/// \file regex_compiler.hpp
/// Contains the definition of regex_compiler, a factory for building regex objects
/// from strings.
//
// Copyright 2004 Eric Niebler. Distributed under the Boost
// Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
#define BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
// MS compatible compilers support #pragma once
#if defined(_MSC_VER) && (_MSC_VER >= 1020)
# pragma once
#endif
#include <boost/xpressive/basic_regex.hpp>
#include <boost/xpressive/detail/dynamic/parser.hpp>
#include <boost/xpressive/detail/dynamic/parse_charset.hpp>
#include <boost/xpressive/detail/dynamic/parser_enum.hpp>
#include <boost/xpressive/detail/dynamic/parser_traits.hpp>
#include <boost/xpressive/detail/core/linker.hpp>
#include <boost/xpressive/detail/core/optimize.hpp>
namespace boost { namespace xpressive
{
///////////////////////////////////////////////////////////////////////////////
// regex_compiler
//
/// \brief Class template regex_compiler is a factory for building basic_regex objects from a string.
///
/// Class template regex_compiler is used to construct a basic_regex object from a string. The string
/// should contain a valid regular expression. You can imbue a regex_compiler object with a locale,
/// after which all basic_regex objects created with that regex_compiler object will use that locale.
/// After creating a regex_compiler object, and optionally imbueing it with a locale, you can call the
/// compile() method to construct a basic_regex object, passing it the string representing the regular
/// expression. You can call compile() multiple times on the same regex_compiler object. Two basic_regex
/// objects compiled from the same string will have different regex_id's.
template<typename BidiIter, typename RegexTraits, typename CompilerTraits>
struct regex_compiler
{
typedef BidiIter iterator_type;
typedef typename iterator_value<BidiIter>::type char_type;
typedef std::basic_string<char_type> string_type;
typedef regex_constants::syntax_option_type flag_type;
typedef RegexTraits traits_type;
typedef typename traits_type::char_class_type char_class_type;
typedef typename traits_type::locale_type locale_type;
explicit regex_compiler(RegexTraits const &traits = RegexTraits())
: mark_count_(0)
, hidden_mark_count_(0)
, traits_(traits)
, upper_(0)
{
this->upper_ = lookup_classname(this->rxtraits(), "upper");
BOOST_ASSERT(0 != this->upper_);
}
///////////////////////////////////////////////////////////////////////////
// imbue
/// Specify the locale to be used by a regex_compiler.
///
/// \param loc The locale that this regex_compiler should use.
/// \return The previous locale.
locale_type imbue(locale_type loc)
{
locale_type oldloc = this->traits_.imbue(loc);
this->upper_ = lookup_classname(this->rxtraits(), "upper");
BOOST_ASSERT(0 != this->upper_);
return oldloc;
}
///////////////////////////////////////////////////////////////////////////
// getloc
/// Get the locale used by a regex_compiler.
///
/// \param loc The locale that this regex_compiler uses.
locale_type getloc() const
{
return this->traits_.getloc();
}
///////////////////////////////////////////////////////////////////////////
// compile
/// Builds a basic_regex object from a std::string.
///
/// \param pat A std::string containing the regular expression pattern.
/// \param flags Optional bitmask that determines how the pat string is interpreted. (See syntax_option_type.)
/// \return A basic_regex object corresponding to the regular expression represented by the string.
/// \pre The std::string pat contains a valid string-based representation of a regular expression.
/// \throw regex_error when the string has invalid regular expression syntax.
basic_regex<BidiIter> compile(string_type pat, flag_type flags = regex_constants::ECMAScript)
{
this->reset();
this->traits_.flags(flags);
string_iterator begin = pat.begin(), end = pat.end();
// at the top level, a regex is a sequence of alternates
alternates_list alternates;
this->parse_alternates(begin, end, alternates);
detail::ensure(begin == end, regex_constants::error_paren, "mismatched parenthesis");
// convert the alternates list to the appropriate matcher and terminate the sequence
detail::sequence<BidiIter> seq = detail::alternates_to_matchable(alternates, alternates_factory());
seq += detail::make_dynamic_xpression<BidiIter>(detail::end_matcher());
// fill in the back-pointers by visiting the regex parse tree
detail::xpression_linker<char_type> linker(this->rxtraits());
seq.first->link(linker);
// bundle the regex information into a regex_impl object
detail::regex_impl<BidiIter> impl;
impl.xpr_ = seq.first;
impl.traits_.reset(new RegexTraits(this->rxtraits()));
impl.mark_count_ = this->mark_count_;
impl.hidden_mark_count_ = this->hidden_mark_count_;
// optimization: get the peek chars OR the boyer-moore search string
detail::optimize_regex(impl, this->rxtraits(), detail::is_random<BidiIter>());
return detail::core_access<BidiIter>::make_regex(impl);
}
private:
typedef typename string_type::const_iterator string_iterator;
typedef std::list<detail::sequence<BidiIter> > alternates_list;
typedef detail::escape_value<char_type, char_class_type> escape_value;
typedef detail::alternates_factory_impl<BidiIter, traits_type> alternates_factory;
///////////////////////////////////////////////////////////////////////////
// reset
/// INTERNAL ONLY
void reset()
{
this->mark_count_ = 0;
this->hidden_mark_count_ = 0;
this->traits_.flags(regex_constants::ECMAScript);
}
///////////////////////////////////////////////////////////////////////////
// regex_traits
/// INTERNAL ONLY
traits_type &rxtraits()
{
return this->traits_.traits();
}
///////////////////////////////////////////////////////////////////////////
// regex_traits
/// INTERNAL ONLY
traits_type const &rxtraits() const
{
return this->traits_.traits();
}
///////////////////////////////////////////////////////////////////////////
// parse_alternates
/// INTERNAL ONLY
void parse_alternates(string_iterator &begin, string_iterator end, alternates_list &alternates)
{
using namespace regex_constants;
string_iterator old_begin;
do
{
alternates.push_back(this->parse_sequence(begin, end));
old_begin = begin;
}
while(begin != end && token_alternate == this->traits_.get_token(begin, end));
begin = old_begin;
}
///////////////////////////////////////////////////////////////////////////
// parse_group
/// INTERNAL ONLY
detail::sequence<BidiIter> parse_group(string_iterator &begin, string_iterator end)
{
using namespace regex_constants;
int mark_nbr = 0;
bool keeper = false;
bool lookahead = false;
bool lookbehind = false;
bool negative = false;
std::size_t old_mark_count = this->mark_count_;
detail::sequence<BidiIter> seq, seq_end;
string_iterator tmp = string_iterator();
syntax_option_type old_flags = this->traits_.flags();
switch(this->traits_.get_group_type(begin, end))
{
case token_no_mark:
// Don't process empty groups like (?:) or (?i)
// BUGBUG this doesn't handle the degenerate (?:)+ correctly
if(token_group_end == this->traits_.get_token(tmp = begin, end))
{
return this->parse_atom(begin = tmp, end);
}
break;
case token_negative_lookahead:
negative = true; // fall-through
case token_positive_lookahead:
lookahead = true;
seq_end = detail::make_dynamic_xpression<BidiIter>(detail::true_matcher());
break;
case token_negative_lookbehind:
negative = true; // fall-through
case token_positive_lookbehind:
lookbehind = true;
seq_end = detail::make_dynamic_xpression<BidiIter>(detail::true_matcher());
break;
case token_independent_sub_expression:
keeper = true;
seq_end = detail::make_dynamic_xpression<BidiIter>(detail::true_matcher());
break;
case token_comment:
while(detail::ensure(begin != end, error_paren, "mismatched parenthesis"))
{
switch(this->traits_.get_token(begin, end))
{
case token_group_end: return this->parse_atom(begin, end);
case token_escape: detail::ensure(begin != end, error_escape, "incomplete escape sequence");
case token_literal: ++begin;
default:;
}
}
break;
default:
mark_nbr = static_cast<int>(++this->mark_count_);
seq = detail::make_dynamic_xpression<BidiIter>(detail::mark_begin_matcher(mark_nbr));
seq_end = detail::make_dynamic_xpression<BidiIter>(detail::mark_end_matcher(mark_nbr));
break;
}
// alternates
alternates_list alternates;
this->parse_alternates(begin, end, alternates);
detail::ensure
(
begin != end && token_group_end == this->traits_.get_token(begin, end)
, error_paren
, "mismatched parenthesis"
);
seq += detail::alternates_to_matchable(alternates, alternates_factory());
seq += seq_end;
typedef shared_ptr<detail::matchable<BidiIter> const> xpr_type;
bool do_save = (this->mark_count_ != old_mark_count);
if(lookahead)
{
detail::lookahead_matcher<xpr_type> lookahead(seq.first, negative, do_save);
seq = detail::make_dynamic_xpression<BidiIter>(lookahead);
}
else if(lookbehind)
{
detail::lookbehind_matcher<xpr_type> lookbehind(seq.first, negative, do_save);
seq = detail::make_dynamic_xpression<BidiIter>(lookbehind);
}
else if(keeper) // independent sub-expression
{
detail::keeper_matcher<xpr_type> keeper(seq.first, do_save);
seq = detail::make_dynamic_xpression<BidiIter>(keeper);
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?