📄 chset.hpp
字号:
/*=============================================================================
Character set
Spirit V1.3.1
Copyright (c) 2001, Joel de Guzman
This software is provided 'as-is', without any express or implied
warranty. In no event will the copyright holder be held liable for
any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute
it freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must
not claim that you wrote the original software. If you use this
software in a product, an acknowledgment in the product documentation
would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must
not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source
distribution.
Acknowledgements:
Special thanks to Dan Nuffer, John (EBo) David, Chris Uzdavinis,
and Doug Gregor. These people are most instrumental in steering
Spirit in the right direction.
Special thanks also to people who have contributed to the code base
and sample code, ported Spirit to various platforms and compilers,
gave suggestions, reported and provided bug fixes. Alexander
Hirner, Andy Elvey, Bogdan Kushnir, Brett Calcott, Bruce Florman,
Changzhe Han, Colin McPhail, Hakki Dogusan, Jan Bares, Joseph
Smith, Martijn W. van der Lee, Raghavendra Satish, Remi Delcos, Tom
Spilman, Vladimir Prus, W. Scott Dillman, David A. Greene, Bob
Bailey, Hartmut Kaiser.
Finally special thanks also to people who gave feedback and
valuable comments, particularly members of Spirit's Source Forge
mailing list and boost.org.
URL: http://spirit.sourceforge.net/
=============================================================================*/
#ifndef SPIRIT_CHSET_HPP
#define SPIRIT_CHSET_HPP
///////////////////////////////////////////////////////////////////////////////
#include "boost/spirit/MSVC/parser.hpp"
#include "boost/spirit/MSVC/primitives.hpp"
#include <cassert>
#include <vector>
#include <algorithm>
#include <functional>
#include "boost/limits.hpp"
///////////////////////////////////////////////////////////////////////////////
namespace spirit {
template <typename CharT = char>
class chset ;
namespace impl {
struct chset_converter ;
namespace chset_converter_namespace
{
template <typename CharTA, typename CharTB>
void convert(chset<CharTA>& dest, chset<CharTB> const& src);
}
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
//
// range class
//
// Implements a closed range of values. This class is used in
// the implementation of the range_run class.
//
///////////////////////////////////////////////////////////////////////////////
template <typename CharT>
struct range {
range(CharT first, CharT last);
bool is_valid() const;
bool includes(CharT v) const;
bool includes(range const& r) const;
bool is_adjacent(range const& r) const;
void merge(range const& r);
bool operator<(range const& r) const;
bool operator<(CharT v) const;
CharT first, last;
};
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
//
// range_run
//
// An implementation of a sparse bit (boolean) set. The set uses a
// sorted vector of disjoint ranges. This class implements the bare
// minimum essentials from which the full range of set operators
// can be implemented. The set is constructed from ranges.
// Internally, adjacent or overlapping ranges are coalesced.
//
// range_runs are very space-economical in situations where there
// are lots of ranges and a few individual disjoint values.
// Searching is O(log n) where n is the number of ranges.
//
///////////////////////////////////////////////////////////////////////////////
template <typename CharT>
class range_run {
public:
typedef typename std::vector<range<CharT> >::iterator iterator;
typedef typename std::vector<range<CharT> >::const_iterator const_iterator;
bool test(CharT v) const;
void set(range<CharT> const& r);
void clear(range<CharT> const& r);
void clear();
const_iterator begin() const;
const_iterator end() const;
private:
void merge(iterator iter, range<CharT> const& r);
friend struct chset_converter;
std::vector<range<CharT> > run;
};
} //end namespace impl
///////////////////////////////////////////////////////////////////////////////
//
// chset free operators
//
// Where a and b are both chsets, implements:
//
// a | b, a & b, a - b, a ^ b
//
// Where a is a chset, implements:
//
// ~a
//
///////////////////////////////////////////////////////////////////////////////
template <typename CharT>
chset<CharT>
operator~(chset<CharT> const& a);
//////////////////////////////////
template <typename CharT>
chset<CharT>
operator|(chset<CharT> const& a, chset<CharT> const& b);
//////////////////////////////////
template <typename CharT>
chset<CharT>
operator&(chset<CharT> const& a, chset<CharT> const& b);
//////////////////////////////////
template <typename CharT>
chset<CharT>
operator-(chset<CharT> const& a, chset<CharT> const& b);
//////////////////////////////////
template <typename CharT>
chset<CharT>
operator^(chset<CharT> const& a, chset<CharT> const& b);
///////////////////////////////////////////////////////////////////////////////
//
// chset class
//
// Character set class. Matches a set of characters over a finite
// range bounded by the limits of its template parameter CharT.
// This class is an optimization of a parser that acts on on a set
// of single characters. The template class is parametized by the
// character type (CharT) and can work efficiently with 8, 16 and
// 32 bit characters.
//
// chsets are constructed from literals (e.g. 'x'), chlits, ranges,
// anychar and nothing (see primitives.hpp) or copy-constructed
// from another chset. The chset class uses a copy-on-write scheme
// that enables instances to be passed along easily by value.
// Examples:
//
// chset<> s1('x');
// chset<> s2(anychar - s1);
//
// Optionally, character sets may also be constructed using a
// definition string following a syntax that resembles posix style
// regular expression character sets, except that double quotes
// delimit the set elements instead of square brackets and there is
// no special negation '^' character.
//
// Character set definition strings follow the meta-syntax:
//
// range = anychar >> '-' >> anychar;
// set = *(range | anychar);
//
// Since we are defining the set using a string, the usual C/C++
// literal string syntax rules apply. Examples:
//
// chset<> s1("a-zA-Z"); // alphabetic characters
// chset<> s2("0-9a-fA-F"); // hexadecimal characters
// chset<> s3("actgACTG"); // DNA identifiers
// chset<> s4("\xff\xfe"); // Hexadecimal 0xFF and 0xFE
//
// The standard Spirit set operators apply (see operators.hpp) plus
// an additional character-set-specific inverse (negation) operator:
//
// ~a // Set inverse
// a | b // Set union
// a & b // Set intersection
// a - b // Set difference
// a ^ b // Set xor
//
// where operands a and b are both chsets or one of the operand is
// either a literal character, a chlit, a range, anychar or
// nothing. Special optimized overloads are provided for anychar or
// nothing operands. A nothing operand is converted to an empty
// set, while an anychar operand is converted to a set having
// elements of the full range of the character type used (e.g. 0-
// 255 for unsigned 8 bit chars).
//
// A special case is ~anychar which yields nothing, but ~nothing
// is illegal. Inversion of anychar is a one-way trip not unlike
// converting a T* to a void*.
//
// An assignment and a copy construtor are provided to allow mixed
// conversion from a chset<A> to a chset<B>. This is possible when
// type A is convertible to type B. For example if type A is a char
// and type B is a wchar_t.
//
///////////////////////////////////////////////////////////////////////////////
template <typename CharT = char>
class chset: public char_parser<chset<CharT> > {
public:
template <typename CharTB>
explicit
chset(CharTB const* definition)
: ptr(new rep)
{
CharTB ch = *definition++;
while (ch)
{
CharTB next = *definition++;
if (next == '-')
{
next = *definition++;
if (next == 0)
{
ptr->set(impl::range<CharT>(ch, ch));
ptr->set(impl::range<CharT>('-', '-'));
break;
}
ptr->set(impl::range<CharT>(ch, next));
}
else
{
ptr->set(impl::range<CharT>(ch, ch));
}
ch = next;
}
}
template <typename CharTB>
chset(chset<CharTB> const& arg)
: ptr(new rep)
{
impl::chset_converter_namespace:convert(*this, arg);
}
template <typename CharTB>
chset&
operator=(chset<CharTB> const& rhs)
{
rep::detach_clear(ptr);
impl::chset_converter_namespace::convert(*this, rhs);
return *this;
}
chset();
chset(chset const& arg);
explicit chset(CharT arg);
explicit chset(anychar_ arg);
explicit chset(nothing_ arg);
explicit chset(chlit<CharT> const& arg);
explicit chset(range<CharT> const& arg);
~chset();
chset& operator=(chset const& rhs);
chset& operator=(CharT rhs);
chset& operator=(anychar_ rhs);
chset& operator=(nothing_ rhs);
chset& operator=(chlit<CharT> const& rhs);
chset& operator=(range<CharT> const& rhs);
void set(range<CharT> const& arg);
void clear(range<CharT> const& arg);
bool test(CharT ch) const;
void add(CharT ch);
class rep
{
public:
typedef typename std::vector<impl::range<CharT> >::iterator iterator;
typedef typename std::vector<impl::range<CharT> >::const_iterator
const_iterator;
rep() : uc(1) {}
rep(rep const& arg) : uc(1), rr(arg.rr) {}
bool test(CharT v) const { return rr.test(v); }
void set(impl::range<CharT> const& r) { rr.set(r); }
void clear(impl::range<CharT> const& r) { rr.clear(r); }
const_iterator begin() const { return rr.begin() ;}
const_iterator end() const { return rr.end() ; }
static rep* ref(rep* ptr) { ptr->uc++; return ptr; }
static void deref(rep* ptr) { if (--ptr->uc == 0) delete ptr; }
static void detach(rep*& ptr)
{
if (ptr->uc > 1)
{
rep* t = new rep(*ptr);
--ptr->uc;
ptr = t;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -