📄 regex
字号:
// regex TR1 header
#pragma once
#ifndef _REGEX_
#define _REGEX_
#ifndef RC_INVOKED
#ifndef _DISABLE_TRIGRAPH_WARNINGS
#pragma warning(default: 4837)
#endif /* _DISABLE_TRIGRAPH_WARNINGS */
#include <algorithm>
#include <iterator>
#include <limits>
#include <locale>
#include <memory>
#include <string>
#include <stdexcept>
#include <utility>
#include <vector>
#include <ctype.h>
#include <wchar.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#pragma pack(push,_CRT_PACKING)
#pragma warning(push,3)
#if _HAS_EXCEPTIONS
#else /* _HAS_EXCEPTIONS */
#error regex without exception handling not supported
#endif /* _HAS_EXCEPTIONS */
#ifndef _ENHANCED_REGEX_VISUALIZER
#ifdef _DEBUG
#define _ENHANCED_REGEX_VISUALIZER 1
#else /* _DEBUG */
#define _ENHANCED_REGEX_VISUALIZER 0
#endif /* _DEBUG */
#endif /* _ENHANCED_REGEX_VISUALIZER */
#define _REGEX_DIFFT(iter) \
typename iterator_traits<iter>::difference_type
#define _REGEX_VALT(iter) \
typename iterator_traits<iter>::value_type
_STD_BEGIN
namespace tr1 { // TR1 additions
// NAMED CONSTANTS
enum _Meta_type
{ // meta character representations for parser
_Meta_lpar = '(',
_Meta_rpar = ')',
_Meta_dlr = '$',
_Meta_caret = '^',
_Meta_dot = '.',
_Meta_star = '*',
_Meta_plus = '+',
_Meta_query = '?',
_Meta_lsq = '[',
_Meta_rsq = ']',
_Meta_bar = '|',
_Meta_esc = '\\',
_Meta_dash = '-',
_Meta_lbr = '{',
_Meta_rbr = '}',
_Meta_comma = ',',
_Meta_colon = ':',
_Meta_equal = '=',
_Meta_exc = '!',
_Meta_eos = -1,
_Meta_nl = '\n',
_Meta_cr = '\r',
_Meta_bsp = '\b',
_Meta_chr = 0,
_Esc_bsl = '\\',
_Esc_word = 'b',
_Esc_not_word = 'B',
_Esc_ctrl_a = 'a',
_Esc_ctrl_b = 'b',
_Esc_ctrl_f = 'f',
_Esc_ctrl_n = 'n',
_Esc_ctrl_r = 'r',
_Esc_ctrl_t = 't',
_Esc_ctrl_v = 'v',
_Esc_ctrl = 'c',
_Esc_hex = 'x',
_Esc_uni = 'u'
};
// NAMESPACE regex_constants
#define _REGEX_BITMASK_OPS(Ty) \
inline Ty& operator&=(Ty& _Left, Ty _Right) \
{ /* return _Left &= _Right */ \
_Left = (Ty)((int)_Left & (int)_Right); return (_Left); \
} \
\
inline Ty& operator|=(Ty& _Left, Ty _Right) \
{ /* return _Left |= _Right */ \
_Left = (Ty)((int)_Left | (int)_Right); return (_Left); \
} \
\
inline Ty& operator^=(Ty& _Left, Ty _Right) \
{ /* return _Left ^= _Right */ \
_Left = (Ty)((int)_Left ^ (int)_Right); return (_Left); \
} \
\
inline Ty operator&(Ty _Left, Ty _Right) \
{ /* return _Left & _Right */ \
return ((Ty)((int)_Left & (int)_Right)); \
} \
\
inline Ty operator|(Ty _Left, Ty _Right) \
{ /* return _Left | _Right */ \
return ((Ty)((int)_Left | (int)_Right)); \
} \
\
inline Ty operator^(Ty _Left, Ty _Right) \
{ /* return _Left ^ _Right */ \
return ((Ty)((int)_Left ^ (int)_Right)); \
} \
\
inline Ty operator~(Ty _Left) \
{ /* return ~_Left */ \
return ((Ty)~(int)_Left); \
}
namespace regex_constants {
// constants used in regular expressions
enum syntax_option_type
{ // specify RE syntax rules
ECMAScript = 0x01,
basic = 0x02,
extended = 0x04,
awk = 0x08,
grep = 0x10,
egrep = 0x20,
_Gmask = 0x3F,
icase = 0x0100,
nosubs = 0x0200,
optimize = 0x0400,
collate = 0x0800
};
_REGEX_BITMASK_OPS(syntax_option_type)
enum match_flag_type
{ // specify matching and formatting rules
match_default = 0x0000,
match_not_bol = 0x0001,
match_not_eol = 0x0002,
match_not_bow = 0x0004,
match_not_eow = 0x0008,
match_any = 0x0010,
match_not_null = 0x0020,
match_continuous = 0x0040,
_Match_partial = 0x0080,
match_prev_avail = 0x0100,
format_default = 0x0000,
format_sed = 0x0400,
format_no_copy = 0x0800,
format_first_only = 0x1000,
_Match_not_null = 0x2000
};
_REGEX_BITMASK_OPS(match_flag_type)
enum error_type
{ // identify error
error_collate,
error_ctype,
error_escape,
error_backref,
error_brack,
error_paren,
error_brace,
error_badbrace,
error_range,
error_space,
error_badrepeat,
error_complexity,
error_stack,
error_parse,
error_syntax
};
} // namespace regex_constants
_CRTIMP2_PURE void __CLRCALL_PURE_OR_CDECL _Xbad(regex_constants::error_type);
_CRTIMP2_PURE void __CLRCALL_PURE_OR_CDECL _Xmem();
// TEMPLATE CLASS regex_traits
template<class _Elem>
class regex_traits;
template<class _Elem>
struct _Cl_names
{ // structure to associate class name with mask value
const _Elem *_Ptr;
unsigned _Len;
_STD ctype_base::mask _Ctype;
};
template<class _RxTraits>
struct _Cmp_cs
{ // functor to compare two character values for equality
typedef typename _RxTraits::char_type _Elem;
bool operator()(_Elem _E1, _Elem _E2)
{ // return true if equal
return (_E1 == _E2);
}
};
template<class _RxTraits>
struct _Cmp_icase
{ // functor to compare for case-insensitive equality
typedef typename _RxTraits::char_type _Elem;
_Cmp_icase(const _RxTraits &_Tr)
: _Traits(_Tr)
{ // construct
}
bool operator()(_Elem _E1, _Elem _E2)
{ // return true if equal
return (_Traits.translate_nocase(_E1)
== _Traits.translate_nocase(_E2));
}
const _RxTraits &_Traits;
private:
_Cmp_icase& operator=(const _Cmp_icase&);
};
template<class _RxTraits>
struct _Cmp_collate
{ // functor to compare for locale-specific equality
typedef typename _RxTraits::char_type _Elem;
_Cmp_collate(const _RxTraits &_Tr)
: _Traits(_Tr)
{ // construct
}
bool operator()(_Elem _E1, _Elem _E2)
{ // return true if equal
return (_Traits.translate(_E1) == _Traits.translate(_E2));
}
const _RxTraits &_Traits;
private:
_Cmp_collate& operator=(const _Cmp_collate&);
};
template<class _InIt1,
class _InIt2,
class _Pr>
bool _Same(_InIt1 _First1, _InIt1 _Last1,
_InIt2 _First2, _InIt2 _Last2, _Pr _Pred)
{ // return true if two sequences match using _Pred
while (_First1 != _Last1 && _First2 != _Last2)
if (!_Pred(*_First1++, *_First2++))
return (false);
return (_First1 == _Last1 && _First2 == _Last2);
}
struct _Regex_traits_base
{ // base of all regular expression traits
enum _Char_class_type {
_Ch_none = 0,
_Ch_alnum = _STD ctype_base::alnum,
_Ch_alpha = _STD ctype_base::alpha,
_Ch_cntrl = _STD ctype_base::cntrl,
_Ch_digit = _STD ctype_base::digit,
_Ch_graph = _STD ctype_base::graph,
_Ch_lower = _STD ctype_base::lower,
_Ch_print = _STD ctype_base::print,
_Ch_punct = _STD ctype_base::punct,
_Ch_space = _STD ctype_base::space,
_Ch_upper = _STD ctype_base::upper,
_Ch_xdigit = _STD ctype_base::xdigit,
_Ch_blank = _STD ctype_base::space
};
typedef _STD ctype_base::mask char_class_type;
};
template<class _Elem>
class _Regex_traits
: public _Regex_traits_base
{ // base class for regular expression traits
public:
typedef _Regex_traits<_Elem> _Myt;
typedef _Elem char_type;
typedef size_t size_type;
typedef _STD basic_string<_Elem> string_type;
typedef _STD locale locale_type;
static size_type length(const _Elem *_Str)
{ // return length of _Str
if (_Str != 0)
return (char_traits<_Elem>::length(_Str));
_Xbad(regex_constants::error_parse); // to quiet diagnostics
return (0);
}
_Regex_traits()
: _Pcoll(0), _Pctype(0)
{ // default construct
}
_Regex_traits(const _Myt&)
: _Pcoll(0), _Pctype(0)
{ // copy construct
}
_Myt& operator=(const _Myt&)
{ // assign
return (*this);
}
_Elem translate(_Elem _Ch) const
{ // provide locale-sensitive mapping
string_type _Res = _Getcoll()->transform(&_Ch, &_Ch + 1);
return (_Res.length() == 1 ? _Res[0] : _Ch);
}
_Elem translate_nocase(_Elem _Ch) const
{ // provide case-insensitive mapping
return (_Getctype()->tolower(_Ch));
}
template<class _FwdIt>
string_type transform(_FwdIt _First, _FwdIt _Last) const
{ // apply locale-specific transformation
return (_Getcoll()->transform(_First, _Last));
}
template<class _FwdIt>
string_type transform_primary(_FwdIt _First, _FwdIt _Last) const
{ // apply locale-specific case-insensitive transformation
string_type _Res;
if (_First != _Last)
{ // non-empty string, transform it
_STD vector<_Elem> _Temp(_First, _Last);
_Getctype()->tolower(&*_Temp.begin(),
&*_Temp.begin() + _Temp.size());
_Res = _Getcoll()->transform(&*_Temp.begin(),
&*_Temp.begin() + _Temp.size());
}
return (_Res);
}
bool isctype(_Elem _Ch, char_class_type _Fx) const
{ // return true if _Ch is in character class _Fx
if (_Fx != (char_class_type)(-1))
return (_Getctype()->is(_Fx, _Ch));
else
return (_Ch == '_' // assumes L'_' == '_'
|| _Getctype()->is(_Ch_alnum, _Ch));
}
template<class _Iter>
char_class_type lookup_classname(_Iter _First, _Iter _Last,
bool _Icase = false) const
{ // map [_First, _Last) to character class mask value
_DEBUG_RANGE(_First, _Last);
int _Ix = 0;
for (; _Names[_Ix]._Ptr; ++_Ix)
if (_Same(_First, _Last,
_Names[_Ix]._Ptr, _Names[_Ix]._Ptr + _Names[_Ix]._Len,
_Cmp_icase<_Regex_traits<_Elem> >(*this)))
break;
char_class_type _Mask = (char_class_type)0;
if (_Names[_Ix]._Ptr != 0)
_Mask = _Names[_Ix]._Ctype;
if (_Icase && _Mask & (_Ch_lower | _Ch_upper))
_Mask |= _Ch_lower | _Ch_upper;
return (_Mask);
}
template<class _FwdIt>
string_type lookup_collatename(_FwdIt _First, _FwdIt _Last) const
{ // map [_First, _Last) to collation element
return (string_type(_First, _Last));
}
locale_type imbue(locale_type _Lx)
{ // store locale object
locale_type _Tmp = _Loc;
_Loc = _Lx;
return (_Tmp);
}
locale_type getloc() const
{ // return locale object
return (_Loc);
}
const _STD collate<_Elem> *_Getcoll() const
{ // get collate facet pointer
if (_Pcoll == 0)
*((const _STD collate<_Elem> **)&_Pcoll) =
&_USE(getloc(), _STD collate<_Elem>);
return (_Pcoll);
}
const _STD ctype<_Elem> *_Getctype() const
{ // get ctype facet pointer
if (_Pctype == 0)
*((const _STD ctype<_Elem> **)&_Pctype) =
&_USE(getloc(), _STD ctype<_Elem>);
return (_Pctype);
}
private:
const _STD collate<_Elem> *_Pcoll;
const _STD ctype<_Elem> *_Pctype;
locale_type _Loc;
static const _Cl_names<_Elem> _Names[];
};
// CHARACTER CLASS NAMES
#define _REGEX_CHAR_CLASS_NAME(n, c) { n, sizeof(n)/sizeof(n[0]) - 1, c }
template<>
const _Cl_names<char> _Regex_traits<char>::_Names[] =
{ // map class names to numeric constants
_REGEX_CHAR_CLASS_NAME("alnum", _Regex_traits<char>::_Ch_alnum),
_REGEX_CHAR_CLASS_NAME("alpha", _Regex_traits<char>::_Ch_alpha),
_REGEX_CHAR_CLASS_NAME("blank", _Regex_traits<char>::_Ch_blank),
_REGEX_CHAR_CLASS_NAME("cntrl", _Regex_traits<char>::_Ch_cntrl),
_REGEX_CHAR_CLASS_NAME("d", _Regex_traits<char>::_Ch_digit),
_REGEX_CHAR_CLASS_NAME("digit", _Regex_traits<char>::_Ch_digit),
_REGEX_CHAR_CLASS_NAME("graph", _Regex_traits<char>::_Ch_graph),
_REGEX_CHAR_CLASS_NAME("lower", _Regex_traits<char>::_Ch_lower),
_REGEX_CHAR_CLASS_NAME("print", _Regex_traits<char>::_Ch_print),
_REGEX_CHAR_CLASS_NAME("punct", _Regex_traits<char>::_Ch_punct),
_REGEX_CHAR_CLASS_NAME("space", _Regex_traits<char>::_Ch_space),
_REGEX_CHAR_CLASS_NAME("s", _Regex_traits<char>::_Ch_space),
_REGEX_CHAR_CLASS_NAME("upper", _Regex_traits<char>::_Ch_upper),
_REGEX_CHAR_CLASS_NAME("w", (_STD ctype_base::mask)(-1)),
_REGEX_CHAR_CLASS_NAME("xdigit", _Regex_traits<char>::_Ch_xdigit),
{0, 0, 0},
};
template<>
const _Cl_names<wchar_t> _Regex_traits<wchar_t>::_Names[] =
{ // map class names to numeric constants
_REGEX_CHAR_CLASS_NAME(L"alnum", _Regex_traits<wchar_t>::_Ch_alnum),
_REGEX_CHAR_CLASS_NAME(L"alpha", _Regex_traits<wchar_t>::_Ch_alpha),
_REGEX_CHAR_CLASS_NAME(L"blank", _Regex_traits<wchar_t>::_Ch_blank),
_REGEX_CHAR_CLASS_NAME(L"cntrl", _Regex_traits<wchar_t>::_Ch_cntrl),
_REGEX_CHAR_CLASS_NAME(L"d", _Regex_traits<wchar_t>::_Ch_digit),
_REGEX_CHAR_CLASS_NAME(L"digit", _Regex_traits<wchar_t>::_Ch_digit),
_REGEX_CHAR_CLASS_NAME(L"graph", _Regex_traits<wchar_t>::_Ch_graph),
_REGEX_CHAR_CLASS_NAME(L"lower", _Regex_traits<wchar_t>::_Ch_lower),
_REGEX_CHAR_CLASS_NAME(L"print", _Regex_traits<wchar_t>::_Ch_print),
_REGEX_CHAR_CLASS_NAME(L"punct", _Regex_traits<wchar_t>::_Ch_punct),
_REGEX_CHAR_CLASS_NAME(L"space", _Regex_traits<wchar_t>::_Ch_space),
_REGEX_CHAR_CLASS_NAME(L"s", _Regex_traits<wchar_t>::_Ch_space),
_REGEX_CHAR_CLASS_NAME(L"upper", _Regex_traits<wchar_t>::_Ch_upper),
_REGEX_CHAR_CLASS_NAME(L"w", (_STD ctype_base::mask)(-1)),
_REGEX_CHAR_CLASS_NAME(L"xdigit", _Regex_traits<wchar_t>::_Ch_xdigit),
{0, 0, 0},
};
#undef _REGEX_CHAR_CLASS_NAME
// CLASS regex_traits<char>
template<>
class regex_traits<char>
: public _Regex_traits<char>
{ // specialization for char
public:
int value(char ch, int base) const
{ // map character value to numeric value
if (base != 8 && '0' <= ch && ch <= '9'
|| base == 8 && '0' <= ch && ch <= '7')
return (ch - '0');
else if (base != 16)
;
else if ('a' <= ch && ch <= 'f')
return (ch - 'a' + 10);
else if ('A' <= ch && ch <= 'F')
return (ch - 'A' + 10);
return (-1);
}
};
// CLASS regex_traits<wchar_t>
template<>
class regex_traits<wchar_t>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -