cpp_regex_traits.hpp
来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 695 行 · 第 1/2 页
HPP
695 行
////////////////////////////////////////////////////////////////////////////////// \file cpp_regex_traits.hpp/// Contains the definition of the cpp_regex_traits\<\> template, which is a/// wrapper for std::locale that can be used to customize the behavior of/// static and dynamic regexes.//// Copyright 2008 Eric Niebler. Distributed under the Boost// Software License, Version 1.0. (See accompanying file// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)#ifndef BOOST_XPRESSIVE_TRAITS_CPP_REGEX_TRAITS_HPP_EAN_10_04_2005#define BOOST_XPRESSIVE_TRAITS_CPP_REGEX_TRAITS_HPP_EAN_10_04_2005// MS compatible compilers support #pragma once#if defined(_MSC_VER) && (_MSC_VER >= 1020)# pragma once#endif#include <ios>#include <string>#include <locale>#include <sstream>#include <boost/config.hpp>#include <boost/assert.hpp>#include <boost/integer.hpp>#include <boost/mpl/assert.hpp>#include <boost/detail/workaround.hpp>#include <boost/type_traits/is_same.hpp>#include <boost/xpressive/detail/detail_fwd.hpp>#include <boost/xpressive/detail/utility/literals.hpp>// From John Maddock:// Fix for gcc prior to 3.4: std::ctype<wchar_t> doesn't allow masks to be combined, for example:// std::use_facet<std::ctype<wchar_t> >(locale()).is(std::ctype_base::lower|std::ctype_base::upper, L'a');// incorrectly returns false.// NOTE: later version of the gcc define __GLIBCXX__, not __GLIBCPP__#if BOOST_WORKAROUND(__GLIBCPP__, != 0)# define BOOST_XPRESSIVE_BUGGY_CTYPE_FACET#endifnamespace boost { namespace xpressive{namespace detail{ // define an unsigned integral typedef of the same size as std::ctype_base::mask typedef boost::uint_t<sizeof(std::ctype_base::mask) * CHAR_BIT>::least umask_t; BOOST_MPL_ASSERT_RELATION(sizeof(std::ctype_base::mask), ==, sizeof(umask_t)); // Calculate what the size of the umaskex_t type should be to fix the 3 extra bitmasks // 11 char categories in ctype_base // + 3 extra categories for xpressive // = 14 total bits needed int const umaskex_bits = (14 > (sizeof(umask_t) * CHAR_BIT)) ? 14 : sizeof(umask_t) * CHAR_BIT; // define an unsigned integral type with at least umaskex_bits typedef boost::uint_t<umaskex_bits>::fast umaskex_t; BOOST_MPL_ASSERT_RELATION(sizeof(umask_t), <=, sizeof(umaskex_t)); // cast a ctype mask to a umaskex_t template<std::ctype_base::mask Mask> struct mask_cast { BOOST_STATIC_CONSTANT(umaskex_t, value = static_cast<umask_t>(Mask)); }; #ifdef __CYGWIN__ // Work around a gcc warning on cygwin template<> struct mask_cast<std::ctype_base::print> { BOOST_MPL_ASSERT_RELATION('\227', ==, std::ctype_base::print); BOOST_STATIC_CONSTANT(umaskex_t, value = 0227); }; #endif #ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION template<std::ctype_base::mask Mask> umaskex_t const mask_cast<Mask>::value; #endif #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET // an unsigned integer with the highest bit set umaskex_t const highest_bit = 1 << (sizeof(umaskex_t) * CHAR_BIT - 1); /////////////////////////////////////////////////////////////////////////////// // unused_mask // find a bit in an int that isn't set template<umaskex_t In, umaskex_t Out = highest_bit, bool Done = (0 == (Out & In))> struct unused_mask { BOOST_MPL_ASSERT_RELATION(1, !=, Out); BOOST_STATIC_CONSTANT(umaskex_t, value = (unused_mask<In, (Out >> 1)>::value)); }; template<umaskex_t In, umaskex_t Out> struct unused_mask<In, Out, true> { BOOST_STATIC_CONSTANT(umaskex_t, value = Out); }; #ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION template<umaskex_t In, umaskex_t Out, bool Done> umaskex_t const unused_mask<In, Out, Done>::value; #endif umaskex_t const std_ctype_alnum = mask_cast<std::ctype_base::alnum>::value; umaskex_t const std_ctype_alpha = mask_cast<std::ctype_base::alpha>::value; umaskex_t const std_ctype_cntrl = mask_cast<std::ctype_base::cntrl>::value; umaskex_t const std_ctype_digit = mask_cast<std::ctype_base::digit>::value; umaskex_t const std_ctype_graph = mask_cast<std::ctype_base::graph>::value; umaskex_t const std_ctype_lower = mask_cast<std::ctype_base::lower>::value; umaskex_t const std_ctype_print = mask_cast<std::ctype_base::print>::value; umaskex_t const std_ctype_punct = mask_cast<std::ctype_base::punct>::value; umaskex_t const std_ctype_space = mask_cast<std::ctype_base::space>::value; umaskex_t const std_ctype_upper = mask_cast<std::ctype_base::upper>::value; umaskex_t const std_ctype_xdigit = mask_cast<std::ctype_base::xdigit>::value; // Reserve some bits for the implementation #if defined(__GLIBCXX__) umaskex_t const std_ctype_reserved = 0x8000; #elif defined(_CPPLIB_VER) && defined(BOOST_WINDOWS) umaskex_t const std_ctype_reserved = 0x8200; #else umaskex_t const std_ctype_reserved = 0; #endif // Bitwise-or all the ctype masks together umaskex_t const all_ctype_masks = std_ctype_reserved | std_ctype_alnum | std_ctype_alpha | std_ctype_cntrl | std_ctype_digit | std_ctype_graph | std_ctype_lower | std_ctype_print | std_ctype_punct | std_ctype_space | std_ctype_upper | std_ctype_xdigit; // define a new mask for "underscore" ("word" == alnum | underscore) umaskex_t const non_std_ctype_underscore = unused_mask<all_ctype_masks>::value; // define a new mask for "blank" umaskex_t const non_std_ctype_blank = unused_mask<all_ctype_masks | non_std_ctype_underscore>::value; // define a new mask for "newline" umaskex_t const non_std_ctype_newline = unused_mask<all_ctype_masks | non_std_ctype_underscore | non_std_ctype_blank>::value; #else /////////////////////////////////////////////////////////////////////////////// // Ugly work-around for buggy ctype facets. umaskex_t const std_ctype_alnum = 1 << 0; umaskex_t const std_ctype_alpha = 1 << 1; umaskex_t const std_ctype_cntrl = 1 << 2; umaskex_t const std_ctype_digit = 1 << 3; umaskex_t const std_ctype_graph = 1 << 4; umaskex_t const std_ctype_lower = 1 << 5; umaskex_t const std_ctype_print = 1 << 6; umaskex_t const std_ctype_punct = 1 << 7; umaskex_t const std_ctype_space = 1 << 8; umaskex_t const std_ctype_upper = 1 << 9; umaskex_t const std_ctype_xdigit = 1 << 10; umaskex_t const non_std_ctype_underscore = 1 << 11; umaskex_t const non_std_ctype_blank = 1 << 12; umaskex_t const non_std_ctype_newline = 1 << 13; static umaskex_t const std_masks[] = { mask_cast<std::ctype_base::alnum>::value , mask_cast<std::ctype_base::alpha>::value , mask_cast<std::ctype_base::cntrl>::value , mask_cast<std::ctype_base::digit>::value , mask_cast<std::ctype_base::graph>::value , mask_cast<std::ctype_base::lower>::value , mask_cast<std::ctype_base::print>::value , mask_cast<std::ctype_base::punct>::value , mask_cast<std::ctype_base::space>::value , mask_cast<std::ctype_base::upper>::value , mask_cast<std::ctype_base::xdigit>::value }; inline int mylog2(umaskex_t i) { return "\0\0\1\0\2\0\0\0\3"[i & 0xf] + "\0\4\5\0\6\0\0\0\7"[(i & 0xf0) >> 04] + "\0\10\11\0\12\0\0\0\13"[(i & 0xf00) >> 010]; } #endif // convenient constant for the extra masks umaskex_t const non_std_ctype_masks = non_std_ctype_underscore | non_std_ctype_blank | non_std_ctype_newline; /////////////////////////////////////////////////////////////////////////////// // cpp_regex_traits_base // BUGBUG this should be replaced with a regex facet that lets you query for // an array of underscore characters and an array of line separator characters. template<typename Char, std::size_t SizeOfChar = sizeof(Char)> struct cpp_regex_traits_base { protected: void imbue(std::locale const &) { } static bool is(std::ctype<Char> const &ct, Char ch, umaskex_t mask) { #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET if(ct.is((std::ctype_base::mask)(umask_t)mask, ch)) { return true; } #else umaskex_t tmp = mask & ~non_std_ctype_masks; for(umaskex_t i; 0 != (i = (tmp & (~tmp+1))); tmp &= ~i) { std::ctype_base::mask m = (std::ctype_base::mask)(umask_t)std_masks[mylog2(i)]; if(ct.is(m, ch)) { return true; } } #endif return ((mask & non_std_ctype_blank) && cpp_regex_traits_base::is_blank(ch)) || ((mask & non_std_ctype_underscore) && cpp_regex_traits_base::is_underscore(ch)) || ((mask & non_std_ctype_newline) && cpp_regex_traits_base::is_newline(ch)); } private: static bool is_blank(Char ch) { BOOST_MPL_ASSERT_RELATION('\t', ==, L'\t'); BOOST_MPL_ASSERT_RELATION(' ', ==, L' '); return L' ' == ch || L'\t' == ch; } static bool is_underscore(Char ch) { BOOST_MPL_ASSERT_RELATION('_', ==, L'_'); return L'_' == ch; } static bool is_newline(Char ch) { BOOST_MPL_ASSERT_RELATION('\r', ==, L'\r'); BOOST_MPL_ASSERT_RELATION('\n', ==, L'\n'); BOOST_MPL_ASSERT_RELATION('\f', ==, L'\f'); return L'\r' == ch || L'\n' == ch || L'\f' == ch || (1 < SizeOfChar && (0x2028u == ch || 0x2029u == ch || 0x85u == ch)); } }; #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET template<typename Char> struct cpp_regex_traits_base<Char, 1> { protected: void imbue(std::locale const &loc) { int i = 0; Char allchars[UCHAR_MAX + 1]; for(i = 0; i <= UCHAR_MAX; ++i) { allchars[i] = static_cast<Char>(i); } std::ctype<Char> const &ct = BOOST_USE_FACET(std::ctype<Char>, loc); std::ctype_base::mask tmp[UCHAR_MAX + 1]; ct.is(allchars, allchars + UCHAR_MAX + 1, tmp); for(i = 0; i <= UCHAR_MAX; ++i) { this->masks_[i] = static_cast<umask_t>(tmp[i]); BOOST_ASSERT(0 == (this->masks_[i] & non_std_ctype_masks)); } this->masks_[static_cast<unsigned char>('_')] |= non_std_ctype_underscore; this->masks_[static_cast<unsigned char>(' ')] |= non_std_ctype_blank; this->masks_[static_cast<unsigned char>('\t')] |= non_std_ctype_blank; this->masks_[static_cast<unsigned char>('\n')] |= non_std_ctype_newline; this->masks_[static_cast<unsigned char>('\r')] |= non_std_ctype_newline; this->masks_[static_cast<unsigned char>('\f')] |= non_std_ctype_newline; } bool is(std::ctype<Char> const &, Char ch, umaskex_t mask) const { return 0 != (this->masks_[static_cast<unsigned char>(ch)] & mask); } private: umaskex_t masks_[UCHAR_MAX + 1]; }; #endif} // namespace detail///////////////////////////////////////////////////////////////////////////////// cpp_regex_traits///// \brief Encapsaulates a std::locale for use by the/// basic_regex\<\> class template.template<typename Char>struct cpp_regex_traits : detail::cpp_regex_traits_base<Char>{ typedef Char char_type; typedef std::basic_string<char_type> string_type; typedef std::locale locale_type; typedef detail::umaskex_t char_class_type; typedef regex_traits_version_2_tag version_tag; typedef detail::cpp_regex_traits_base<Char> base_type; /// Initialize a cpp_regex_traits object to use the specified std::locale, /// or the global std::locale if none is specified. /// cpp_regex_traits(locale_type const &loc = locale_type()) : base_type() , loc_() { this->imbue(loc); } /// Checks two cpp_regex_traits objects for equality /// /// \return this->getloc() == that.getloc(). bool operator ==(cpp_regex_traits<char_type> const &that) const { return this->loc_ == that.loc_; } /// Checks two cpp_regex_traits objects for inequality /// /// \return this->getloc() != that.getloc(). bool operator !=(cpp_regex_traits<char_type> const &that) const { return this->loc_ != that.loc_; } /// Convert a char to a Char /// /// \param ch The source character. /// \return std::use_facet\<std::ctype\<char_type\> \>(this->getloc()).widen(ch). char_type widen(char ch) const { return this->ctype_->widen(ch); } /// Returns a hash value for a Char in the range [0, UCHAR_MAX]
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?