📄 syntax2.h
字号:
//+---------------------------------------------------------------------------
//
// Copyright ( C ) Microsoft, 1994 - 2002.
//
// File: syntax2.h
//
// Contents: syntax modules for regexpr
//
// Classes: perl_syntax, posix_syntax
//
// Author: Eric Niebler ( ericne@microsoft.com )
//
// History: 3-29-00 ericne Created
//
//----------------------------------------------------------------------------
#ifndef SYNTAX_H
#define SYNTAX_H
#ifdef _MSC_VER
#pragma warning( push )
// warning C4786: identifier was truncated to '255' characters in the debug information
#pragma warning( disable : 4786 )
#endif
#include <map>
#include <iosfwd>
#include <string>
#include <cctype>
#include <cwctype>
#include <cassert>
#include <iterator>
#include <stdexcept>
#ifndef ARRAYSIZE
# define ARRAYSIZE( a ) (sizeof(a)/sizeof((a)[0]))
#endif
#ifndef UCHAR_MAX
# define UCHAR_MAX 0xff
#endif
#ifndef WCHAR_MAX
# define WCHAR_MAX ((wchar_t)-1)
#endif
#ifdef _MSC_VER
# include <crtdbg.h>
# define REGEX_ASSERT(x) _ASSERTE(x)
# define REGEX_FORCEINLINE __forceinline
# define REGEX_SELECTANY __declspec(selectany)
# define REGEX_CDECL __cdecl
# define REGEX_SEH_TRY __try
# define REGEX_SEH_EXCEPT(x) __except( x )
# define REGEX_RESET_STK_OFLW() _resetstkoflw()
# if 1200 < _MSC_VER
# define REGEX_NOINLINE __declspec(noinline)
# define REGEX_DEPRECATED __declspec(deprecated)
# define REGEX_DEPENDENT_TYPENAME typename
# else
# define REGEX_NOINLINE
# define REGEX_DEPRECATED
# define REGEX_DEPENDENT_TYPENAME
# endif
#else
# include <cassert>
# define REGEX_ASSERT(x) assert(x)
# define REGEX_NOINLINE
# define REGEX_FORCEINLINE inline
# define REGEX_SELECTANY
# define REGEX_CDECL
# define REGEX_SEH_TRY
# define REGEX_SEH_EXCEPT(x) if( false )
# define REGEX_RESET_STK_OFLW() ((void)0)
# define REGEX_DEPRECATED
# define REGEX_DEPENDENT_TYPENAME typename
#endif
#define REGEX_STRING(CharT,sz) (::regex::detail::literal<CharT>::string( sz, L##sz ))
#define REGEX_CHAR(CharT,ch) (static_cast<CharT>(::regex::detail::literal<CharT>::template character<ch,L##ch>::value))
#if defined(_MSC_VER) & _CPPLIB_VER <= 310
namespace std
{
template<>
struct iterator_traits< char * >
{ // get traits from iterator _Iter
typedef random_access_iterator_tag iterator_category;
typedef char value_type;
typedef ptrdiff_t difference_type;
typedef difference_type distance_type; // retained
typedef char * pointer;
typedef char & reference;
};
template<>
struct iterator_traits< char const * >
{ // get traits from iterator _Iter
typedef random_access_iterator_tag iterator_category;
typedef char value_type;
typedef ptrdiff_t difference_type;
typedef difference_type distance_type; // retained
typedef char * pointer;
typedef char & reference;
};
template<>
struct iterator_traits< wchar_t * >
{ // get traits from iterator _Iter
typedef random_access_iterator_tag iterator_category;
typedef wchar_t value_type;
typedef ptrdiff_t difference_type;
typedef difference_type distance_type; // retained
typedef wchar_t * pointer;
typedef wchar_t & reference;
};
template<>
struct iterator_traits< wchar_t const * >
{ // get traits from iterator _Iter
typedef random_access_iterator_tag iterator_category;
typedef wchar_t value_type;
typedef ptrdiff_t difference_type;
typedef difference_type distance_type; // retained
typedef wchar_t * pointer;
typedef wchar_t & reference;
};
}
#endif
namespace regex
{
class bad_regexpr : public std::invalid_argument
{
public:
explicit bad_regexpr( std::string const & s )
: std::invalid_argument( s ) {}
virtual ~bad_regexpr() throw() {}
};
//
// Flags to control how matching occurs
//
enum REGEX_FLAGS
{
NOFLAGS = 0x0000,
NOCASE = 0x0001, // ignore case
GLOBAL = 0x0002, // match everywhere in the string
MULTILINE = 0x0004, // ^ and $ can match internal line breaks
SINGLELINE = 0x0008, // . can match newline character
RIGHTMOST = 0x0010, // start matching at the right of the string
NOBACKREFS = 0x0020, // only meaningful when used with GLOBAL and substitute
FIRSTBACKREFS = 0x0040, // only meaningful when used with GLOBAL
ALLBACKREFS = 0x0080, // only meaningful when used with GLOBAL
NORMALIZE = 0x0100, // Preprocess patterns: "\\n" => "\n", etc.
EXTENDED = 0x0200, // ignore whitespace in pattern
};
// For backwards compatibility
REGEX_FLAGS const noflags = NOFLAGS;
// helper functions to make it easier to combine
// the regex flags.
inline REGEX_FLAGS operator|( REGEX_FLAGS f1, REGEX_FLAGS f2 )
{
return ( REGEX_FLAGS ) ( ( unsigned )f1 | ( unsigned )f2 );
}
inline REGEX_FLAGS & operator|=( REGEX_FLAGS & f1, REGEX_FLAGS f2 )
{
return f1 = ( f1 | f2 );
}
inline REGEX_FLAGS operator&( REGEX_FLAGS f1, REGEX_FLAGS f2 )
{
return ( REGEX_FLAGS ) ( ( unsigned )f1 & ( unsigned )f2 );
}
inline REGEX_FLAGS & operator&=( REGEX_FLAGS & f1, REGEX_FLAGS f2 )
{
return f1 = ( f1 & f2 );
}
#if !defined(_MSC_VER) | 1200 < _MSC_VER
inline REGEX_FLAGS operator~( REGEX_FLAGS f )
{
return ( REGEX_FLAGS ) ~( unsigned )f;
}
#endif
//
// The following are the tokens that can be emitted by the syntax module.
// Don't reorder this list!!!
//
enum TOKEN
{
NO_TOKEN = 0,
// REGULAR TOKENS
BEGIN_GROUP,
END_GROUP,
ALTERNATION,
BEGIN_LINE,
END_LINE,
BEGIN_CHARSET,
MATCH_ANY,
ESCAPE,
// QUANTIFICATION TOKENS
ONE_OR_MORE,
ZERO_OR_MORE,
ZERO_OR_ONE,
ONE_OR_MORE_MIN,
ZERO_OR_MORE_MIN,
ZERO_OR_ONE_MIN,
BEGIN_RANGE,
RANGE_SEPARATOR,
END_RANGE,
END_RANGE_MIN,
// ESCAPE SEQUENCES
ESC_DIGIT,
ESC_NOT_DIGIT,
ESC_SPACE,
ESC_NOT_SPACE,
ESC_WORD,
ESC_NOT_WORD,
ESC_BEGIN_STRING,
ESC_END_STRING,
ESC_END_STRING_z,
ESC_WORD_BOUNDARY,
ESC_NOT_WORD_BOUNDARY,
ESC_WORD_START,
ESC_WORD_STOP,
ESC_QUOTE_META_ON,
ESC_QUOTE_META_OFF,
// SUBSTITUTION TOKENS
SUBST_BACKREF,
SUBST_PREMATCH,
SUBST_POSTMATCH,
SUBST_MATCH,
SUBST_ESCAPE,
SUBST_QUOTE_META_ON,
SUBST_UPPER_ON,
SUBST_UPPER_NEXT,
SUBST_LOWER_ON,
SUBST_LOWER_NEXT,
SUBST_ALL_OFF,
// CHARSET TOKENS
CHARSET_NEGATE,
CHARSET_ESCAPE,
CHARSET_RANGE,
CHARSET_BACKSPACE,
CHARSET_END,
CHARSET_ALNUM,
CHARSET_NOT_ALNUM,
CHARSET_ALPHA,
CHARSET_NOT_ALPHA,
CHARSET_BLANK,
CHARSET_NOT_BLANK,
CHARSET_CNTRL,
CHARSET_NOT_CNTRL,
CHARSET_DIGIT,
CHARSET_NOT_DIGIT,
CHARSET_GRAPH,
CHARSET_NOT_GRAPH,
CHARSET_LOWER,
CHARSET_NOT_LOWER,
CHARSET_PRINT,
CHARSET_NOT_PRINT,
CHARSET_PUNCT,
CHARSET_NOT_PUNCT,
CHARSET_SPACE,
CHARSET_NOT_SPACE,
CHARSET_UPPER,
CHARSET_NOT_UPPER,
CHARSET_XDIGIT,
CHARSET_NOT_XDIGIT,
// EXTENSION TOKENS
EXT_NOBACKREF,
EXT_POS_LOOKAHEAD,
EXT_NEG_LOOKAHEAD,
EXT_POS_LOOKBEHIND,
EXT_NEG_LOOKBEHIND,
EXT_INDEPENDENT,
EXT_COMMENT,
EXT_CONDITION,
EXT_RECURSE,
EXT_UNKNOWN
};
namespace detail
{
template< typename CharT > struct literal;
template<> struct literal<char>
{
static char const * string( char const * sz, wchar_t const * ) { return sz; }
template< char ch, wchar_t > struct character { enum { value = ch }; };
};
template<> struct literal<wchar_t>
{
static wchar_t const * string( char const *, wchar_t const * sz ) { return sz; }
template< char, wchar_t ch > struct character { enum { value = ch }; };
};
struct posix_charset_type
{
char const * m_szcharset;
size_t cchars;
};
extern posix_charset_type const g_rgposix_charsets[];
extern size_t const g_cposix_charsets;
template< typename IterT >
bool is_posix_charset( IterT icur, IterT iend, char const * szcharset )
{
for( ; iend != icur && char() != *szcharset; ++icur, ++szcharset )
{
if( *icur != *szcharset )
return false;
}
return char() == *szcharset;
}
// Forward-declare the class that holds all the information
// about the set of characters that can be matched by a charset
struct charset;
void free_charset( charset const * );
template< typename CharT >
struct charset_map_node
{
std::basic_string<CharT> m_str;
charset const * m_rgcharsets[2]; // 0==case, 1==nocase
charset_map_node()
{
m_rgcharsets[0] = m_rgcharsets[1] = 0;
}
charset_map_node( charset_map_node const & node )
{
*this = node;
}
charset_map_node & operator=( charset_map_node const & node )
{
m_str = node.m_str;
m_rgcharsets[0] = node.m_rgcharsets[0];
m_rgcharsets[1] = node.m_rgcharsets[1];
return *this;
}
void set( std::basic_string<CharT> const & str )
{
clear();
m_str = str;
}
void clear()
{
std::basic_string<CharT>().swap( m_str );
free_charset( m_rgcharsets[0] );
free_charset( m_rgcharsets[1] );
m_rgcharsets[0] = m_rgcharsets[1] = 0;
}
};
template< typename CharT >
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -