📄 regexpr2.cpp
字号:
return false;\
}\
return ( m_posixcharsoff.end() !=\
std::find_if( m_posixcharsoff.begin(), m_posixcharsoff.end(),\
std::not1( std::bind1st( std::ptr_fun( CTYPE ), ch ) ) ) )\
|| ( m_nestedcharsets.end() !=\
std::find_if( m_nestedcharsets.begin(), m_nestedcharsets.end(),\
std::bind2nd( regex::detail::mem_fun( PMF ), ch ) ) );\
}
DECLARE_EXTENDED_CHECK(extended_check_narrow,char,_isctype,&charset::in_narrow)
DECLARE_EXTENDED_CHECK(extended_check_wide_with_case,wchar_t,iswctype,&charset::in_wide_with_case)
DECLARE_EXTENDED_CHECK(extended_check_wide_no_case,wchar_t,iswctype,&charset::in_wide_no_case)
#undef DECLARE_EXTENDED_CHECK
// Note overloading based on parameter
bool in_narrow( char ch ) const
{
// Whoops, forgot to call optimize() on this charset
assert( 0 == m_posixcharson );
return m_fcompliment !=
(
( m_ascii_bitvector[ static_cast<unsigned char>( ch ) ] )
|| ( extended_check_narrow( ch ) )
);
}
inline bool in_range_vector_with_case( wchar_t ch ) const
{
return std::binary_search( m_range_vector.begin(), m_range_vector.end(),
range_type( ch, ch ), range_less() );
}
inline bool in_range_vector_no_case( wchar_t ch ) const
{
wchar_t const chup = regex_toupper( ch );
if( std::binary_search( m_range_vector.begin(), m_range_vector.end(),
range_type( chup, chup ), range_less() ) )
return true;
wchar_t const chlo = regex_tolower( ch );
if( chup != chlo &&
std::binary_search( m_range_vector.begin(), m_range_vector.end(),
range_type( chlo, chlo ), range_less() ) )
return true;
return false;
}
// Note overloading based on parameter
bool in_wide_with_case( wchar_t ch ) const
{
// use range_match_type to see if this character is within one of the
// ranges stored in m_rgranges.
return m_fcompliment !=
(
( ( UCHAR_MAX >= ch ) ?
( m_ascii_bitvector[ static_cast<unsigned char>( ch ) ] ) :
( ( ! m_range_vector.empty() && in_range_vector_with_case( ch ) )
|| ( m_posixcharson && iswctype( ch, m_posixcharson ) ) ) )
|| ( extended_check_wide_with_case( ch ) )
);
}
// Note overloading based on parameter
bool in_wide_no_case( wchar_t ch ) const
{
// use range_match_type to see if this character is within one of the
// ranges stored in m_rgranges.
return m_fcompliment !=
(
( ( UCHAR_MAX >= ch ) ?
( m_ascii_bitvector[ static_cast<unsigned char>( ch ) ] ) :
( ( ! m_range_vector.empty() && in_range_vector_no_case( ch ) )
|| ( m_posixcharson && iswctype( ch, m_posixcharson ) ) ) )
|| ( extended_check_wide_no_case( ch ) )
);
}
bool in( char ch, true_t ) const
{
return in_narrow( ch );
}
bool in( char ch, false_t ) const
{
return in_narrow( ch );
}
bool in( wchar_t ch, true_t ) const
{
return in_wide_with_case( ch );
}
bool in( wchar_t ch, false_t ) const
{
return in_wide_no_case( ch );
}
private:
charset_t & operator=( charset_t const & that );
charset_t( charset_t const & that );
};
// Intrinsic character sets are allocated on the heap with the standard allocator.
// They are either the built-in character sets, or the user-defined ones.
struct charset : public charset_t< std::allocator<range_type>,
std::allocator<wctype_t>,
std::allocator<charset const *> >
{
charset()
{
}
private:
charset( charset const & );
charset & operator=( charset const & );
};
// charset is no longer an incomplete type so we now
// know how to destroy one. free_charset() is used in syntax2.h
REGEXPR_H_INLINE void free_charset( charset const * pcharset )
{
delete pcharset;
}
// Custom character sets are the ones that appear in patterns between
// square brackets. They are allocated in a regex_arena to speed up
// pattern compilation and to make rpattern clean-up faster.
struct custom_charset : public charset_t< REGEX_ALLOCATOR< range_type >,
REGEX_ALLOCATOR< wctype_t >,
REGEX_ALLOCATOR< charset const * > >
{
typedef REGEX_ALLOCATOR< range_type > A1;
typedef REGEX_ALLOCATOR< wctype_t > A2;
typedef REGEX_ALLOCATOR< charset const * > A3;
static void * operator new( size_t size, regex_arena & arena )
{
return arena.allocate( size );
}
static void operator delete( void *, regex_arena & ) {}
static void operator delete( void * ) {}
custom_charset( regex_arena & arena )
: charset_t<A1, A2, A3>( MAKE_ALLOCATOR(range_type,arena),
MAKE_ALLOCATOR(wctype_t,arena),
MAKE_ALLOCATOR(charset const*,arena) )
{
}
private:
custom_charset( custom_charset const & );
custom_charset & operator=( custom_charset const & );
};
template< typename CH >
class intrinsic_charsets
{
struct intrinsic_charset : public charset
{
intrinsic_charset( bool fcompliment, wctype_t desc, char const * sz )
{
reset( fcompliment, desc, sz );
}
void reset( bool fcompliment, wctype_t desc, char const * sz )
{
clear();
m_fcompliment = fcompliment;
m_fskip_extended_check = true;
_set( desc, type2type<CH>() );
for( ; *sz; ++sz )
m_ascii_bitvector.set( static_cast<unsigned char>( *sz ) );
}
protected:
void _set( wctype_t desc, type2type<char> )
{
m_ascii_bitvector.zero();
for( unsigned int i=0; i<=UCHAR_MAX; ++i )
if( _isctype( i, desc ) )
m_ascii_bitvector.set( static_cast<unsigned char>( i ) );
}
void _set( wctype_t desc, type2type<wchar_t> )
{
_set( desc, type2type<char>() );
m_posixcharson = desc;
}
private:
intrinsic_charset( intrinsic_charset const & );
intrinsic_charset & operator=( intrinsic_charset const & );
};
static intrinsic_charset & _get_word_charset()
{
static intrinsic_charset s_word_charset( false, _ALPHA|_DIGIT, "_" );
return s_word_charset;
}
static intrinsic_charset & _get_digit_charset()
{
static intrinsic_charset s_digit_charset( false, _DIGIT, "" );
return s_digit_charset;
}
static intrinsic_charset & _get_space_charset()
{
static intrinsic_charset s_space_charset( false, _SPACE, "" );
return s_space_charset;
}
static intrinsic_charset & _get_not_word_charset()
{
static intrinsic_charset s_not_word_charset( true, _ALPHA|_DIGIT, "_" );
return s_not_word_charset;
}
static intrinsic_charset & _get_not_digit_charset()
{
static intrinsic_charset s_not_digit_charset( true, _DIGIT, "" );
return s_not_digit_charset;
}
static intrinsic_charset & _get_not_space_charset()
{
static intrinsic_charset s_not_space_charset( true, _SPACE, "" );
return s_not_space_charset;
}
public:
static charset const & get_word_charset()
{
return _get_word_charset();
}
static charset const & get_digit_charset()
{
return _get_digit_charset();
}
static charset const & get_space_charset()
{
return _get_space_charset();
}
static charset const & get_not_word_charset()
{
return _get_not_word_charset();
}
static charset const & get_not_digit_charset()
{
return _get_not_digit_charset();
}
static charset const & get_not_space_charset()
{
return _get_not_space_charset();
}
static void reset()
{
_get_word_charset().reset( false, _ALPHA|_DIGIT, "_" );
_get_digit_charset().reset( false, _DIGIT, "" );
_get_space_charset().reset( false, _SPACE, "" );
_get_not_word_charset().reset( true, _ALPHA|_DIGIT, "_" );
_get_not_digit_charset().reset( true, _DIGIT, "" );
_get_not_space_charset().reset( true, _SPACE, "" );
}
};
//
// Operator implementations
//
// Evaluates the beginning-of-string condition
template< typename CSTRINGS >
struct bos_t
{
template< typename CI >
static bool eval( match_param<CI> const & param, CI iter )
{
return param.ibegin == iter;
}
template< typename U > struct rebind { typedef bos_t<U> other; };
};
// Find the beginning of a line, either beginning of a string, or the character
// immediately following a newline
template< typename CSTRINGS >
struct bol_t
{
template< typename CI >
static bool eval( match_param<CI> const & param, CI iter )
{
typedef typename std::iterator_traits<CI>::value_type CH;
typedef std::char_traits<CH> traits_type;
return param.ibegin == iter || traits_type::eq( REGEX_CHAR(CH,'\n'), *--iter );
}
template< typename U > struct rebind { typedef bol_t<U> other; };
};
// Evaluates end-of-string condition for string's
template< typename CSTRINGS >
struct eos_t
{
template< typename CI >
static bool eval( match_param<CI> const & param, CI iter )
{
return param.istop == iter;
}
template< typename U > struct rebind { typedef eos_t<U> other; };
};
template<>
struct eos_t<true_t>
{
template< typename CI >
static bool eval( match_param<CI> const &, CI iter )
{
typedef typename std::iterator_traits<CI>::value_type CH;
typedef std::char_traits<CH> traits_type;
return traits_type::eq( REGEX_CHAR(CH,'\0'), *iter );
}
template< typename U > struct rebind { typedef eos_t<U> other; };
};
// Evaluates end-of-line conditions, either the end of the string, or a
// newline character.
template< typename CSTRINGS >
struct eol_t
{
template< typename CI >
static bool eval( match_param<CI> const & param, CI iter )
{
typedef typename std::iterator_traits<CI>::value_type CH;
typedef std::char_traits<CH> traits_type;
return param.istop == iter
|| traits_type::eq( REGEX_CHAR(CH,'\n'), *iter );
}
template< typename U > struct rebind { typedef eol_t<U> other; };
};
template<>
struct eol_t<true_t>
{
template< typename CI >
static bool eval( match_param<CI> const &, CI iter )
{
typedef typename std::iterator_traits<CI>::value_type CH;
typedef std::char_traits<CH> traits_type;
return traits_type::eq( REGEX_CHAR(CH,'\0'), *iter )
|| traits_type::eq( REGEX_CHAR(CH,'\n'), *iter );
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -