📄 syntax2.h
字号:
class charset_map
{
std::map<CharT, charset_map_node<CharT> > m_map;
public:
typedef typename std::map<CharT, charset_map_node<CharT> >::iterator iterator;
~charset_map()
{
for( iterator iter = m_map.begin(); m_map.end() != iter; ++iter )
iter->second.clear();
}
charset_map_node<CharT> & operator[]( CharT ch ) { return m_map[ ch ]; }
iterator begin() { return m_map.begin(); }
iterator end() { return m_map.end(); }
iterator find( CharT ch ) { return m_map.find( ch ); }
void erase( iterator iter ) { m_map.erase( iter ); }
};
inline detail::charset_map<char> & get_perl_charset_map( char )
{
static detail::charset_map<char> s_charset_map;
return s_charset_map;
}
inline detail::charset_map<wchar_t> & get_perl_charset_map( wchar_t )
{
static detail::charset_map<wchar_t> s_charset_map;
return s_charset_map;
}
inline detail::charset_map<char> & get_posix_charset_map( char )
{
static detail::charset_map<char> s_charset_map;
return s_charset_map;
}
inline detail::charset_map<wchar_t> & get_posix_charset_map( wchar_t )
{
static detail::charset_map<wchar_t> s_charset_map;
return s_charset_map;
}
inline bool regex_isspace( char ch )
{
using namespace std;
return 0 != isspace( ch );
}
inline bool regex_isspace( wchar_t wch )
{
using namespace std;
return 0 != iswspace( wch );
}
template< typename T >
T const & regex_max( T const & lhs, T const & rhs )
{
return ( lhs > rhs ) ? lhs : rhs;
}
template< typename T >
T const & regex_min( T const & lhs, T const & rhs )
{
return ( lhs < rhs ) ? lhs : rhs;
}
} // namespace detail
//
// The perl_syntax class encapsulates the Perl 5 regular expression syntax. It is
// used as a template parameter to basic_rpattern. To customize regex syntax, create
// your own syntax class and use it as a template parameter instead.
//
class perl_syntax_base
{
protected:
perl_syntax_base()
{
}
static TOKEN const s_rgreg[ UCHAR_MAX + 1 ];
static TOKEN const s_rgescape[ UCHAR_MAX + 1 ];
static TOKEN look_up( char ch, TOKEN const rg[] )
{
return rg[ static_cast<unsigned char>( ch ) ];
}
static TOKEN look_up( wchar_t ch, TOKEN const rg[] )
{
return UCHAR_MAX < ch ? NO_TOKEN : rg[ static_cast<unsigned char>( ch ) ];
}
};
// --------------------------------------------------------------------------
//
// Class: perl_syntax
//
// Description: Module that encapsulates the Perl syntax
//
// Methods: eat_whitespace -
// min_quant -
// perl_syntax -
// perl_syntax -
// set_flags -
// get_flags -
// reg_token -
// quant_token -
// charset_token -
// subst_token -
// ext_token -
// get_charset_map -
// invalid_charset -
// register_intrinsic_charset -
// _invalid_charset -
// _invalid_charset -
//
// Members: m_flags -
// s_charset_map -
//
// Typedefs: iterator -
// const_iterator -
// char_type -
//
// History: 11/16/2001 - ericne - Created
//
// --------------------------------------------------------------------------
template< typename CharT >
class perl_syntax : protected perl_syntax_base
{
public:
typedef typename std::basic_string<CharT>::iterator iterator;
typedef typename std::basic_string<CharT>::const_iterator const_iterator;
typedef CharT char_type;
template< typename OtherT > struct rebind { typedef perl_syntax<OtherT> other; };
private:
REGEX_FLAGS m_flags;
const_iterator eat_whitespace( iterator & icur, const_iterator iend )
{
if( m_flags & EXTENDED )
{
while( iend != icur && ( REGEX_CHAR(CharT,'#') == *icur || detail::regex_isspace( *icur ) ) )
{
if( REGEX_CHAR(CharT,'#') == *icur++ )
{
while( iend != icur && REGEX_CHAR(CharT,'\n') != *icur++ ) {}
}
else
{
for( ; iend != icur && detail::regex_isspace( *icur ); ++icur ) {}
}
}
}
return icur;
}
bool min_quant( iterator & icur, const_iterator iend )
{
return ( iend != eat_whitespace( ++icur, iend ) && REGEX_CHAR(CharT,'?') == *icur ? ( ++icur, true ) : false );
}
public:
perl_syntax( REGEX_FLAGS flags )
: m_flags( flags )
{
}
perl_syntax( perl_syntax<CharT> const & sy )
: m_flags( sy.m_flags )
{
}
void set_flags( REGEX_FLAGS flags )
{
m_flags = flags;
}
REGEX_FLAGS get_flags() const
{
return m_flags;
}
TOKEN reg_token( iterator & icur, const_iterator iend )
{
REGEX_ASSERT( iend != icur );
if( iend == eat_whitespace( icur, iend ) )
return NO_TOKEN;
TOKEN tok = look_up( *icur, s_rgreg );
if( tok )
++icur;
if( ESCAPE == tok && iend != icur )
{
tok = look_up( *icur, s_rgescape );
if( tok )
++icur;
else
tok = ESCAPE;
}
return tok;
}
TOKEN quant_token( iterator & icur, const_iterator iend )
{
REGEX_ASSERT( iend != icur );
if( iend == eat_whitespace( icur, iend ) )
return NO_TOKEN;
TOKEN tok = NO_TOKEN;
switch( *icur )
{
case REGEX_CHAR(CharT,'*'):
tok = min_quant( icur, iend ) ? ZERO_OR_MORE_MIN : ZERO_OR_MORE;
break;
case REGEX_CHAR(CharT,'+'):
tok = min_quant( icur, iend ) ? ONE_OR_MORE_MIN : ONE_OR_MORE;
break;
case REGEX_CHAR(CharT,'?'):
tok = min_quant( icur, iend ) ? ZERO_OR_ONE_MIN : ZERO_OR_ONE;
break;
case REGEX_CHAR(CharT,'}'):
tok = min_quant( icur, iend ) ? END_RANGE_MIN : END_RANGE;
break;
case REGEX_CHAR(CharT,'{'):
tok = BEGIN_RANGE;
++icur;
break;
case REGEX_CHAR(CharT,','):
tok = RANGE_SEPARATOR;
++icur;
break;
}
return tok;
}
TOKEN charset_token( iterator & icur, const_iterator iend )
{
REGEX_ASSERT( iend != icur );
TOKEN tok = NO_TOKEN;
switch( *icur )
{
case REGEX_CHAR(CharT,'-'):
tok = CHARSET_RANGE;
++icur;
break;
case REGEX_CHAR(CharT,'^'):
tok = CHARSET_NEGATE;
++icur;
break;
case REGEX_CHAR(CharT,']'):
tok = CHARSET_END;
++icur;
break;
case REGEX_CHAR(CharT,'\\'):
tok = CHARSET_ESCAPE;
if( iend == ++icur )
break;
switch( *icur )
{
case REGEX_CHAR(CharT,'b'):
tok = CHARSET_BACKSPACE;
++icur;
break;
case REGEX_CHAR(CharT,'d'):
tok = ESC_DIGIT;
++icur;
break;
case REGEX_CHAR(CharT,'D'):
tok = ESC_NOT_DIGIT;
++icur;
break;
case REGEX_CHAR(CharT,'s'):
tok = ESC_SPACE;
++icur;
break;
case REGEX_CHAR(CharT,'S'):
tok = ESC_NOT_SPACE;
++icur;
break;
case REGEX_CHAR(CharT,'w'):
tok = ESC_WORD;
++icur;
break;
case REGEX_CHAR(CharT,'W'):
tok = ESC_NOT_WORD;
++icur;
break;
}
break;
case REGEX_CHAR(CharT,'['):
if( REGEX_CHAR(CharT,':') == *( ++icur )-- )
{
for( size_t i=0; !tok && i < detail::g_cposix_charsets; ++i )
{
if( detail::is_posix_charset<const_iterator>( icur, iend, detail::g_rgposix_charsets[i].m_szcharset ) )
{
tok = TOKEN( CHARSET_ALNUM + i );
std::advance( icur, detail::g_rgposix_charsets[i].cchars );
}
}
}
break;
}
return tok;
}
TOKEN subst_token( iterator & icur, const_iterator iend )
{
REGEX_ASSERT( iend != icur );
TOKEN tok = NO_TOKEN;
switch( *icur )
{
case REGEX_CHAR(CharT,'\\'):
tok = SUBST_ESCAPE;
if( iend != ++icur )
switch( *icur )
{
case REGEX_CHAR(CharT,'Q'):
tok = SUBST_QUOTE_META_ON;
++icur;
break;
case REGEX_CHAR(CharT,'U'):
tok = SUBST_UPPER_ON;
++icur;
break;
case REGEX_CHAR(CharT,'u'):
tok = SUBST_UPPER_NEXT;
++icur;
break;
case REGEX_CHAR(CharT,'L'):
tok = SUBST_LOWER_ON;
++icur;
break;
case REGEX_CHAR(CharT,'l'):
tok = SUBST_LOWER_NEXT;
++icur;
break;
case REGEX_CHAR(CharT,'E'):
tok = SUBST_ALL_OFF;
++icur;
break;
}
break;
case REGEX_CHAR(CharT,'$'):
tok = SUBST_BACKREF;
if( iend != ++icur )
switch( *icur )
{
case REGEX_CHAR(CharT,'&'):
tok = SUBST_MATCH;
++icur;
break;
case REGEX_CHAR(CharT,'`'):
tok = SUBST_PREMATCH;
++icur;
break;
case REGEX_CHAR(CharT,'\''):
tok = SUBST_POSTMATCH;
++icur;
break;
}
break;
}
return tok;
}
TOKEN ext_token( iterator & icur, const_iterator iend )
{
REGEX_ASSERT( iend != icur );
if( iend == eat_whitespace( icur, iend ) )
return NO_TOKEN;
bool finclude;
TOKEN tok = NO_TOKEN;
if( REGEX_CHAR(CharT,'?') == *icur )
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -