cpp_regex_traits.hpp
来自「support vector clustering for vc++」· HPP 代码 · 共 677 行 · 第 1/2 页
HPP
677 行
///////////////////////////////////////////////////////////////////////////////
/// \file cpp_regex_traits.hpp
/// Contains the definition of the cpp_regex_traits\<\> template, which is a
/// wrapper for std::locale that can be used to customize the behavior of
/// static and dynamic regexes.
//
// Copyright 2004 Eric Niebler. Distributed under the Boost
// Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef BOOST_XPRESSIVE_TRAITS_CPP_REGEX_TRAITS_HPP_EAN_10_04_2005
#define BOOST_XPRESSIVE_TRAITS_CPP_REGEX_TRAITS_HPP_EAN_10_04_2005
// MS compatible compilers support #pragma once
#if defined(_MSC_VER) && (_MSC_VER >= 1020)
# pragma once
#endif
#include <string>
#include <locale>
#include <sstream>
#include <boost/config.hpp>
#include <boost/assert.hpp>
#include <boost/integer.hpp>
#include <boost/mpl/assert.hpp>
#include <boost/type_traits/is_same.hpp>
#include <boost/xpressive/detail/detail_fwd.hpp>
#include <boost/xpressive/detail/utility/literals.hpp>
// From John Maddock:
// Fix for gcc prior to 3.4: std::ctype<wchar_t> doesn't allow masks to be combined, for example:
// std::use_facet<std::ctype<wchar_t> >(locale()).is(std::ctype_base::lower|std::ctype_base::upper, L'a');
// incorrectly returns false.
// NOTE: later version of the gcc define __GLIBCXX__, not __GLIBCPP__
#if BOOST_WORKAROUND(__GLIBCPP__, != 0)
# define BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
#endif
namespace boost { namespace xpressive
{
namespace detail
{
// define an unsigned integral typedef of the same size as std::ctype_base::mask
typedef boost::uint_t<sizeof(std::ctype_base::mask) * CHAR_BIT>::least umask_t;
BOOST_MPL_ASSERT_RELATION(sizeof(std::ctype_base::mask), ==, sizeof(umask_t));
// Calculate what the size of the umaskex_t type should be to fix the 3 extra bitmasks
// 11 char categories in ctype_base
// + 3 extra categories for xpressive
// = 14 total bits needed
int const umaskex_bits = (14 > (sizeof(umask_t) * CHAR_BIT)) ? 14 : sizeof(umask_t) * CHAR_BIT;
// define an unsigned integral type with at least umaskex_bits
typedef boost::uint_t<umaskex_bits>::fast umaskex_t;
BOOST_MPL_ASSERT_RELATION(sizeof(umask_t), <=, sizeof(umaskex_t));
// cast a ctype mask to a umaskex_t
template<std::ctype_base::mask Mask>
struct mask_cast
{
BOOST_STATIC_CONSTANT(umaskex_t, value = static_cast<umask_t>(Mask));
};
#ifdef __CYGWIN__
// Work around a gcc warning on cygwin
template<>
struct mask_cast<std::ctype_base::print>
{
BOOST_MPL_ASSERT_RELATION('\227', ==, std::ctype_base::print);
BOOST_STATIC_CONSTANT(umaskex_t, value = 0227);
};
#endif
#ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION
template<std::ctype_base::mask Mask>
umaskex_t const mask_cast<Mask>::value;
#endif
#ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
// an unsigned integer with the highest bit set
umaskex_t const highest_bit = 1 << (sizeof(umaskex_t) * CHAR_BIT - 1);
///////////////////////////////////////////////////////////////////////////////
// unused_mask
// find a bit in an int that isn't set
template<umaskex_t In, umaskex_t Out = highest_bit, bool Done = (0 == (Out & In))>
struct unused_mask
{
BOOST_MPL_ASSERT_RELATION(1, !=, Out);
BOOST_STATIC_CONSTANT(umaskex_t, value = (unused_mask<In, (Out >> 1)>::value));
};
template<umaskex_t In, umaskex_t Out>
struct unused_mask<In, Out, true>
{
BOOST_STATIC_CONSTANT(umaskex_t, value = Out);
};
#ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION
template<umaskex_t In, umaskex_t Out, bool Done>
umaskex_t const unused_mask<In, Out, Done>::value;
#endif
umaskex_t const std_ctype_alnum = mask_cast<std::ctype_base::alnum>::value;
umaskex_t const std_ctype_alpha = mask_cast<std::ctype_base::alpha>::value;
umaskex_t const std_ctype_cntrl = mask_cast<std::ctype_base::cntrl>::value;
umaskex_t const std_ctype_digit = mask_cast<std::ctype_base::digit>::value;
umaskex_t const std_ctype_graph = mask_cast<std::ctype_base::graph>::value;
umaskex_t const std_ctype_lower = mask_cast<std::ctype_base::lower>::value;
umaskex_t const std_ctype_print = mask_cast<std::ctype_base::print>::value;
umaskex_t const std_ctype_punct = mask_cast<std::ctype_base::punct>::value;
umaskex_t const std_ctype_space = mask_cast<std::ctype_base::space>::value;
umaskex_t const std_ctype_upper = mask_cast<std::ctype_base::upper>::value;
umaskex_t const std_ctype_xdigit = mask_cast<std::ctype_base::xdigit>::value;
// Reserve some bits for the implementation
#if defined(__GLIBCXX__) && __GLIBCXX__ >= 20050209
umaskex_t const std_ctype_reserved = 0x8000;
#else
umaskex_t const std_ctype_reserved = 0;
#endif
// Bitwise-or all the ctype masks together
umaskex_t const all_ctype_masks = std_ctype_reserved
| std_ctype_alnum | std_ctype_alpha | std_ctype_cntrl | std_ctype_digit
| std_ctype_graph | std_ctype_lower | std_ctype_print | std_ctype_punct
| std_ctype_space | std_ctype_upper | std_ctype_xdigit;
// define a new mask for "underscore" ("word" == alnum | underscore)
umaskex_t const non_std_ctype_underscore = unused_mask<all_ctype_masks>::value;
// define a new mask for "blank"
umaskex_t const non_std_ctype_blank = unused_mask<all_ctype_masks | non_std_ctype_underscore>::value;
// define a new mask for "newline"
umaskex_t const non_std_ctype_newline = unused_mask<all_ctype_masks | non_std_ctype_underscore | non_std_ctype_blank>::value;
#else
///////////////////////////////////////////////////////////////////////////////
// Ugly work-around for buggy ctype facets.
umaskex_t const std_ctype_alnum = 1 << 0;
umaskex_t const std_ctype_alpha = 1 << 1;
umaskex_t const std_ctype_cntrl = 1 << 2;
umaskex_t const std_ctype_digit = 1 << 3;
umaskex_t const std_ctype_graph = 1 << 4;
umaskex_t const std_ctype_lower = 1 << 5;
umaskex_t const std_ctype_print = 1 << 6;
umaskex_t const std_ctype_punct = 1 << 7;
umaskex_t const std_ctype_space = 1 << 8;
umaskex_t const std_ctype_upper = 1 << 9;
umaskex_t const std_ctype_xdigit = 1 << 10;
umaskex_t const non_std_ctype_underscore = 1 << 11;
umaskex_t const non_std_ctype_blank = 1 << 12;
umaskex_t const non_std_ctype_newline = 1 << 13;
static umaskex_t const std_masks[] =
{
mask_cast<std::ctype_base::alnum>::value
, mask_cast<std::ctype_base::alpha>::value
, mask_cast<std::ctype_base::cntrl>::value
, mask_cast<std::ctype_base::digit>::value
, mask_cast<std::ctype_base::graph>::value
, mask_cast<std::ctype_base::lower>::value
, mask_cast<std::ctype_base::print>::value
, mask_cast<std::ctype_base::punct>::value
, mask_cast<std::ctype_base::space>::value
, mask_cast<std::ctype_base::upper>::value
, mask_cast<std::ctype_base::xdigit>::value
};
inline int mylog2(umaskex_t i)
{
return "\0\0\1\0\2\0\0\0\3"[i & 0xf]
+ "\0\4\5\0\6\0\0\0\7"[(i & 0xf0) >> 04]
+ "\0\10\11\0\12\0\0\0\13"[(i & 0xf00) >> 010];
}
#endif
// convenient constant for the extra masks
umaskex_t const non_std_ctype_masks = non_std_ctype_underscore | non_std_ctype_blank | non_std_ctype_newline;
///////////////////////////////////////////////////////////////////////////////
// cpp_regex_traits_base
// BUGBUG this should be replaced with a regex facet that lets you query for
// an array of underscore characters and an array of line separator characters.
template<typename Char, std::size_t SizeOfChar = sizeof(Char)>
struct cpp_regex_traits_base
{
protected:
void imbue(std::locale const &)
{
}
static bool is(std::ctype<Char> const &ct, Char ch, umaskex_t mask)
{
#ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
if(ct.is((std::ctype_base::mask)(umask_t)mask, ch))
{
return true;
}
#else
umaskex_t tmp = mask & ~non_std_ctype_masks;
for(umaskex_t i; 0 != (i = (tmp & (~tmp+1))); tmp &= ~i)
{
std::ctype_base::mask m = (std::ctype_base::mask)(umask_t)std_masks[mylog2(i)];
if(ct.is(m, ch))
{
return true;
}
}
#endif
return ((mask & non_std_ctype_blank) && cpp_regex_traits_base::is_blank(ch))
|| ((mask & non_std_ctype_underscore) && cpp_regex_traits_base::is_underscore(ch))
|| ((mask & non_std_ctype_newline) && cpp_regex_traits_base::is_newline(ch));
}
private:
static bool is_blank(Char ch)
{
BOOST_MPL_ASSERT_RELATION('\t', ==, L'\t');
return L'\t' == ch;
}
static bool is_underscore(Char ch)
{
BOOST_MPL_ASSERT_RELATION('_', ==, L'_');
return L'_' == ch;
}
static bool is_newline(Char ch)
{
BOOST_MPL_ASSERT_RELATION('\r', ==, L'\r');
BOOST_MPL_ASSERT_RELATION('\n', ==, L'\n');
BOOST_MPL_ASSERT_RELATION('\f', ==, L'\f');
return L'\r' == ch || L'\n' == ch || L'\f' == ch
|| (1 < SizeOfChar && (0x2028u == ch || 0x2029u == ch || 0x85u == ch));
}
};
#ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
template<typename Char>
struct cpp_regex_traits_base<Char, 1>
{
protected:
void imbue(std::locale const &loc)
{
int i = 0;
Char allchars[UCHAR_MAX + 1];
for(i = 0; i <= UCHAR_MAX; ++i)
{
allchars[i] = static_cast<Char>(i);
}
std::ctype<Char> const &ct = BOOST_USE_FACET(std::ctype<Char>, loc);
std::ctype_base::mask tmp[UCHAR_MAX + 1];
ct.is(allchars, allchars + UCHAR_MAX + 1, tmp);
for(i = 0; i <= UCHAR_MAX; ++i)
{
this->masks_[i] = static_cast<umask_t>(tmp[i]);
BOOST_ASSERT(0 == (this->masks_[i] & non_std_ctype_masks));
}
this->masks_[static_cast<unsigned char>('_')] |= non_std_ctype_underscore;
this->masks_[static_cast<unsigned char>(' ')] |= non_std_ctype_blank;
this->masks_[static_cast<unsigned char>('\t')] |= non_std_ctype_blank;
this->masks_[static_cast<unsigned char>('\n')] |= non_std_ctype_newline;
this->masks_[static_cast<unsigned char>('\r')] |= non_std_ctype_newline;
this->masks_[static_cast<unsigned char>('\f')] |= non_std_ctype_newline;
}
bool is(std::ctype<Char> const &, Char ch, umaskex_t mask) const
{
return 0 != (this->masks_[static_cast<unsigned char>(ch)] & mask);
}
private:
umaskex_t masks_[UCHAR_MAX + 1];
};
#endif
template<typename Char>
struct version_tag
{
typedef regex_traits_version_1_tag type;
};
template<>
struct version_tag<char>
{
typedef regex_traits_version_1_case_fold_tag type;
};
} // namespace detail
///////////////////////////////////////////////////////////////////////////////
// cpp_regex_traits
//
/// \brief Encapsaulates a std::locale for use by the
/// basic_regex\<\> class template.
template<typename Char>
struct cpp_regex_traits
: detail::cpp_regex_traits_base<Char>
{
typedef Char char_type;
typedef std::basic_string<char_type> string_type;
typedef std::locale locale_type;
typedef detail::umaskex_t char_class_type;
typedef typename detail::version_tag<Char>::type version_tag;
typedef detail::cpp_regex_traits_base<Char> base_type;
/// Initialize a cpp_regex_traits object to use the specified std::locale,
/// or the global std::locale if none is specified.
///
cpp_regex_traits(locale_type const &loc = locale_type())
: base_type()
, loc_()
{
this->imbue(loc);
}
/// Checks two cpp_regex_traits objects for equality
///
/// \return this->getloc() == that.getloc().
bool operator ==(cpp_regex_traits<char_type> const &that) const
{
return this->loc_ == that.loc_;
}
/// Checks two cpp_regex_traits objects for inequality
///
/// \return this->getloc() != that.getloc().
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?