📄 syntax2.h
字号:
{
case REGEX_CHAR(CH,':'):
tok = EXT_NOBACKREF;
++icur;
break;
case REGEX_CHAR(CH,'='):
tok = EXT_POS_LOOKAHEAD;
++icur;
break;
case REGEX_CHAR(CH,'!'):
tok = EXT_NEG_LOOKAHEAD;
++icur;
break;
case REGEX_CHAR(CH,'#'):
tok = EXT_COMMENT;
++icur;
break;
case REGEX_CHAR(CH,'('):
tok = EXT_CONDITION;
++icur;
break;
case REGEX_CHAR(CH,'R'):
tok = EXT_RECURSE;
++icur;
break;
case REGEX_CHAR(CH,'<'):
if( iend == eat_whitespace( ++icur, iend ) )
break;
switch( *icur )
{
case REGEX_CHAR(CH,'='):
tok = EXT_POS_LOOKBEHIND;
++icur;
break;
case REGEX_CHAR(CH,'!'):
tok = EXT_NEG_LOOKBEHIND;
++icur;
break;
}
break;
case REGEX_CHAR(CH,'>'):
tok = EXT_INDEPENDENT;
++icur;
break;
default:
finclude = true;
do
{
if( REGEX_CHAR(CH,':') == *icur )
{
tok = EXT_NOBACKREF;
++icur;
break;
}
if( REGEX_CHAR(CH,')') == *icur )
{
tok = EXT_NOBACKREF;
break;
}
if( REGEX_CHAR(CH,'-') == *icur && finclude )
finclude = false;
else if( REGEX_CHAR(CH,'i') == *icur )
m_flags = ( REGEX_FLAGS ) ( finclude ? ( m_flags | NOCASE ) : ( m_flags & ~NOCASE ) );
else if( REGEX_CHAR(CH,'m') == *icur )
m_flags = ( REGEX_FLAGS ) ( finclude ? ( m_flags | MULTILINE ) : ( m_flags & ~MULTILINE ) );
else if( REGEX_CHAR(CH,'s') == *icur )
m_flags = ( REGEX_FLAGS ) ( finclude ? ( m_flags | SINGLELINE ) : ( m_flags & ~SINGLELINE ) );
else if( REGEX_CHAR(CH,'x') == *icur )
m_flags = ( REGEX_FLAGS ) ( finclude ? ( m_flags | EXTENDED ) : ( m_flags & ~EXTENDED ) );
else
break;
} while( iend != eat_whitespace( ++icur, iend ) );
break;
}
}
}
return tok;
}
// Functions used for making user-defined intrinsic character sets
static detail::charset_map<CH> s_charset_map;
static bool invalid_charset( CH ch )
{
return _invalid_charset( ch );
}
static void register_intrinsic_charset( CH ch, std::basic_string<CH> const & str ) //throw( bad_regexpr, std::bad_alloc )
{
perl_syntax sy( NOFLAGS );
if( invalid_charset( ch ) )
throw bad_regexpr( "invalid character specified to register_intrinsic_charset" );
std::basic_string<CH> pat = str;
typename std::basic_string<CH>::iterator istart = pat.begin();
if( BEGIN_CHARSET != sy.reg_token( istart, pat.end() ) )
throw bad_regexpr( "expecting beginning of charset" );
regex::detail::charset_map<CH> & charset_map = s_charset_map;
regex::detail::charset_map_node<CH> & map_node = charset_map[ ch ];
map_node.set( std::basic_string<CH>( istart, pat.end() ) );
}
private:
static bool _invalid_charset( char ch )
{
return NO_TOKEN != s_rgescape[ static_cast<unsigned char>( ch ) ]
|| isdigit( ch ) || 'e' == ch || 'x' == ch || 'c' == ch;
}
static bool _invalid_charset( wchar_t ch )
{
return UCHAR_MAX >= ch && _invalid_charset( static_cast<char>( ch ) );
}
};
template< typename CH >
detail::charset_map<CH> perl_syntax<CH>::s_charset_map;
// --------------------------------------------------------------------------
//
// Class: posix_syntax
//
// Description: Implements the basic POSIX regular expression syntax
//
// Methods: posix_syntax -
// posix_syntax -
// get_flags -
// set_flags -
// reg_token -
// quant_token -
// charset_token -
// subst_token -
// ext_token -
// invalid_charset -
// register_intrinsic_charset -
//
// Members: m_flags -
// s_charset_map -
//
// Typedefs: iterator -
// const_iterator -
// char_type -
//
// History: 11/16/2001 - ericne - Created
//
// --------------------------------------------------------------------------
template< typename CH >
class posix_syntax
{
REGEX_FLAGS m_flags;
public:
typedef typename std::basic_string<CH>::iterator iterator;
typedef typename std::basic_string<CH>::const_iterator const_iterator;
typedef CH char_type;
template< typename CH2 > struct rebind { typedef posix_syntax<CH2> other; };
posix_syntax( REGEX_FLAGS flags )
: m_flags( flags )
{
}
posix_syntax( posix_syntax<CH> const & sy )
: m_flags( sy.m_flags )
{
}
REGEX_FLAGS get_flags() const
{
return m_flags;
}
void set_flags( REGEX_FLAGS flags )
{
m_flags = flags;
}
TOKEN reg_token( iterator & icur, const_iterator iend )
{
TOKEN tok = NO_TOKEN;
switch( *icur )
{
case REGEX_CHAR(CH,'.'):
tok = MATCH_ANY;
++icur;
break;
case REGEX_CHAR(CH,'^'):
tok = BEGIN_LINE;
++icur;
break;
case REGEX_CHAR(CH,'$'):
tok = END_LINE;
++icur;
break;
case REGEX_CHAR(CH,'['):
tok = BEGIN_CHARSET;
++icur;
break;
case REGEX_CHAR(CH,'\\'):
tok = ESCAPE;
++icur;
if( iend != icur )
{
switch( *icur )
{
case REGEX_CHAR(CH,'('):
tok = BEGIN_GROUP;
++icur;
break;
case REGEX_CHAR(CH,')'):
tok = END_GROUP;
++icur;
break;
case REGEX_CHAR(CH,'|'):
tok = ALTERNATION;
++icur;
break;
}
}
break;
}
return tok;
}
TOKEN quant_token( iterator & icur, const_iterator iend )
{
TOKEN tok = NO_TOKEN;
switch( *icur )
{
case REGEX_CHAR(CH,'*'):
tok = ZERO_OR_MORE;
++icur;
break;
case REGEX_CHAR(CH,','):
tok = RANGE_SEPARATOR;
++icur;
break;
case REGEX_CHAR(CH,'\\'):
++icur;
if( iend != icur )
{
switch( *icur )
{
case REGEX_CHAR(CH,'?'):
tok = ZERO_OR_ONE;
++icur;
break;
case REGEX_CHAR(CH,'+'):
tok = ONE_OR_MORE;
++icur;
break;
case REGEX_CHAR(CH,'{'):
tok = BEGIN_RANGE;
++icur;
break;
case REGEX_CHAR(CH,'}'):
tok = END_RANGE;
++icur;
break;
default:
--icur;
break;
}
}
else
{
--icur;
}
}
return tok;
}
TOKEN charset_token( iterator & icur, const_iterator iend )
{
TOKEN tok = NO_TOKEN;
switch( *icur )
{
case REGEX_CHAR(CH,'^'):
tok = CHARSET_NEGATE;
++icur;
break;
case REGEX_CHAR(CH,'-'):
tok = CHARSET_RANGE;
++icur;
break;
case REGEX_CHAR(CH,']'):
tok = CHARSET_END;
++icur;
break;
case REGEX_CHAR(CH,'['):
if( REGEX_CHAR(CH,':') == *( ++icur )-- )
{
for( size_t i=0; !tok && i < detail::g_cposix_charsets; ++i )
{
if( detail::is_posix_charset<const_iterator>( icur, iend, detail::g_rgposix_charsets[i].szcharset ) )
{
tok = TOKEN( CHARSET_ALNUM + i );
std::advance( icur, detail::g_rgposix_charsets[i].cchars );
}
}
}
break;
}
return tok;
}
TOKEN subst_token( iterator & icur, const_iterator iend )
{
TOKEN tok = NO_TOKEN;
if( REGEX_CHAR(CH,'\\') == *icur )
{
tok = SUBST_ESCAPE;
++icur;
if( iend != icur && REGEX_CHAR(CH,'0') <= *icur && REGEX_CHAR(CH,'9') >= *icur )
{
tok = SUBST_BACKREF;
}
}
return tok;
}
TOKEN ext_token( iterator &, const_iterator )
{
return NO_TOKEN;
}
// Functions for making user-defined intrinsic character sets
static detail::charset_map<CH> s_charset_map;
static bool invalid_charset( CH ch )
{
return _invalid_charset( ch );
}
static void register_intrinsic_charset( CH ch, std::basic_string<CH> const & str ) //throw( bad_regexpr, std::bad_alloc )
{
posix_syntax sy( NOFLAGS );
if( invalid_charset( ch ) )
throw bad_regexpr( "invalid character specified to register_intrinsic_charset" );
std::basic_string<CH> pat = str;
typename std::basic_string<CH>::iterator istart = pat.begin();
if( BEGIN_CHARSET != sy.reg_token( istart, pat.end() ) )
throw bad_regexpr( "expecting beginning of charset" );
regex::detail::charset_map<CH> & charset_map = s_charset_map;
regex::detail::charset_map_node<CH> & map_node = charset_map[ ch ];
map_node.set( std::basic_string<CH>( istart, pat.end() ) );
}
private:
static bool _invalid_charset( char ch )
{
static char const s_invalid[] = "0123456789()|?+{}\\exc";
return NULL != std::char_traits<CH>::find( s_invalid, ARRAYSIZE( s_invalid ) - 1, ch );
}
static bool _invalid_charset( wchar_t ch )
{
return UCHAR_MAX >= ch && _invalid_charset( static_cast<char>( ch ) );
}
};
template< typename CH >
detail::charset_map<CH> posix_syntax<CH>::s_charset_map;
} // namespace regex
#ifdef _MSC_VER
#pragma warning( pop )
#endif
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -