📄 regex
字号:
// TEMPLATE CLASS _Node_class
template<class _Elem, class _RxTraits>
class _Node_class
: public _Node_base
{ // node that holds a character class (POSIX bracket expression)
public:
_Node_class(_Node_type _Ty = _N_class, _Node_flags _Fl = _Fl_none)
: _Node_base(_Ty, _Fl), _Coll(0), _Small(0), _Large(0), _Ranges(0),
_Classes((typename _RxTraits::char_class_type)0), _Equiv(0)
{ // construct
}
~_Node_class()
{ // destroy
_Tidy(_Coll);
delete _Small;
delete _Large;
delete _Ranges;
_Tidy(_Equiv);
}
void _Tidy(_Sequence<_Elem> *_Head)
{ // clean up a list of sequences
while (_Head)
{ // delete the head of the list
_Sequence<_Elem> *_Temp = _Head;
_Head = _Head->_Next;
delete _Temp;
}
}
_Sequence<_Elem> *_Coll;
_Bitmap *_Small;
_Buf<_Elem> *_Large;
_Buf<_Elem> *_Ranges;
typename _RxTraits::char_class_type _Classes;
_Sequence<_Elem> *_Equiv;
};
// CLASS _Node_endif
class _Node_endif
: public _Node_base
{ // node that marks the end of an alternative
public:
_Node_endif()
: _Node_base(_N_endif, _Fl_none)
{ // construct
}
};
// CLASS _Node_if
class _Node_if
: public _Node_base
{ // node that marks the beginning of an alternative
public:
_Node_if(_Node_base *_End)
: _Node_base(_N_if, _Fl_none), _Endif((_Node_endif *)_End), _Child(0)
{ // construct
}
~_Node_if()
{ // destroy branches of if node
_Node_if *_Cur = _Child;
while (_Cur)
{ // destroy branch
_Node_if *_Tmp = _Cur;
_Cur = _Cur->_Child;
_Tmp->_Child = 0;
_Destroy_node(_Tmp, _Endif);
}
}
_Node_endif *_Endif;
_Node_if *_Child;
};
// CLASS _Node_end_rep
class _Node_end_rep
: public _Node_base
{ // node that marks the end of a repetition
public:
_Node_end_rep(int _New_idx)
: _Node_base(_N_end_rep), _Idx(_New_idx)
{ // construct
}
const int _Idx;
private:
_Node_end_rep& operator=(const _Node_end_rep&);
};
// CLASS _Node_rep
class _Node_rep
: public _Node_base
{ // node that marks the beginning of a repetition
public:
_Node_rep(bool _Greedy, int _Mn, int _Mx, _Node_end_rep *_End)
: _Node_base(_N_rep, _Greedy ? _Fl_greedy : _Fl_none),
_Min(_Mn), _Max(_Mx), _End_rep(_End)
{ // construct
}
_Node_end_rep *_End_rep;
const int _Min;
const int _Max;
private:
_Node_rep& operator=(const _Node_rep&);
};
// TEMPLATE CLASS _Builder
template<class _FwdIt,
class _Elem,
class _RxTraits>
class _Builder
{ // provides operations used by _Parser to build the nfa
public:
typedef _REGEX_DIFFT(_FwdIt) _Difft;
_Builder(const _RxTraits& _Tr, regex_constants::syntax_option_type);
bool _Beg_expr() const;
void _Setlong();
void _Discard_pattern();
_Node_base *_Getmark() const;
void _Add_nop();
void _Add_bol();
void _Add_eol();
void _Add_wbound();
void _Add_dot();
void _Add_char(_Elem _Ch);
void _Add_class();
void _Add_char_to_class(_Elem _Ch);
void _Add_range(_Elem _E0, _Elem _E1);
void _Add_named_class(_Regex_traits_base::char_class_type);
void _Add_equiv(_FwdIt, _FwdIt, _Difft);
void _Add_coll(_FwdIt, _FwdIt, _Difft);
_Node_base *_Begin_group();
void _End_group(_Node_base *_Back);
_Node_base *_Begin_assert_group(bool);
void _End_assert_group(_Node_base *);
_Node_base *_Begin_capture_group(unsigned _Idx);
void _Add_backreference(unsigned _Idx);
_Node_base *_Begin_if(_Node_base *_Start);
void _Else_if(_Node_base *, _Node_base *);
void _Add_rep(int _Min, int _Max, bool _Greedy);
void _Negate();
void _Mark_final();
_Root_node *_End_pattern();
private:
_Node_base *_Link_node(_Node_base *);
_Node_base *_New_node(_Node_type _Type);
void _Add_str_node();
bool _Beg_expr(_Node_base *) const;
void _Add_char_to_bitmap(_Elem _Ch);
void _Add_char_to_array(_Elem _Ch);
void _Add_elts(_Node_class<_Elem, _RxTraits>*,
_Regex_traits_base::char_class_type, const _RxTraits&);
void _Char_to_elts(_FwdIt, _FwdIt, _Difft, _Sequence<_Elem> **);
_Root_node *_Root;
_Node_base *_Current;
int _Rep_idx;
regex_constants::syntax_option_type _Flags;
const _RxTraits& _Traits;
const int _Bmax;
const int _Tmax;
private:
_Builder& operator=(const _Builder&);
};
// TEMPLATE CLASS _Bt_state_t
template<class _BidIt>
class _Bt_state_t
{ // holds the state needed for backtracking
public:
_BidIt _Cur;
_Grps _Grp_valid;
};
// TEMPLATE CLASS _Tgt_state_t
template<class _BidIt>
class _Tgt_state_t
: public _Bt_state_t<_BidIt>
{ // holds the current state of the match
public:
struct _Grp_t {
_BidIt _Begin;
_BidIt _End;
};
_Grp_t _Grps[_MAX_GRP];
void operator=(const _Bt_state_t<_BidIt>& _Other)
{ // assign from object of type _Bt_state_t<_BidIt>
*(_Bt_state_t<_BidIt>*)this = _Other;
}
};
// TEMPLATE CLASS _Rep_state_t
template<class _BidIt>
class _Rep_state_t
: public _Tgt_state_t<_BidIt>
{ // holds the state needed for repetitions
public:
bool _Saved;
void operator=(const _Tgt_state_t<_BidIt>& _Other)
{ // assign from object of type _Tgt_state_t<_BidIt>
*(_Tgt_state_t<_BidIt>*)this = _Other;
}
};
// TEMPLATE CLASS _Matcher
template<class _BidIt,
class _Elem,
class _RxTraits,
class _It>
class _Matcher
{ // provides ways to match a regular expression to a text sequence
public:
_Matcher(_It _Pfirst, _It _Plast,
_Root_node *_Re, unsigned _Nx,
regex_constants::syntax_option_type _Sf,
regex_constants::match_flag_type _Mf)
: _First(_Pfirst), _End(_Plast), _Rep(_Re),
_Sflags(_Sf), _Mflags(_Mf), _Matched(false),
_Ncap((unsigned char)_Nx),
_Longest((_Re->_Flags & _Fl_longest)
&& !(_Mf & regex_constants::match_any))
{ // construct
_DEBUG_RANGE(_Pfirst, _Plast);
}
void _Setf(regex_constants::match_flag_type _Mf)
{ // set specified flags
_Mflags |= _Mf;
}
void _Clearf(regex_constants::match_flag_type _Mf)
{ // clear specified flags
_Mflags &= ~_Mf;
}
template<class _Alloc>
bool _Match(_It _Pfirst,
match_results<_BidIt, _Alloc> *_Matches,
bool _Full_match)
{ // try to match
_First = _Pfirst;
return (_Match(_Matches, _Full_match));
}
template<class _Alloc>
bool _Match(match_results<_BidIt, _Alloc> *_Matches,
bool _Full_match)
{ // try to match
_Begin = _First;
_Tgt_state._Cur = _First;
_Tgt_state._Grp_valid = 0;
_Cap = _Matches != 0;
_Full = _Full_match;
_Matched = false;
if (!_Match(_Rep))
return (false);
if (_Matches)
{ // copy results to _Matches
_Matches->_Resize(_Ncap);
for (unsigned int _Idx = 0; _Idx < _Ncap; ++_Idx)
{ // copy submatch _Idx
if (_Res._Grp_valid & (1 << _Idx))
{ // copy successful match
_Matches->_At(_Idx).matched = true;
_Matches->_At(_Idx).first = _Res._Grps[_Idx]._Begin;
_Matches->_At(_Idx).second = _Res._Grps[_Idx]._End;
}
else
{ // copy failed match
_Matches->_At(_Idx).matched = false;
_Matches->_At(_Idx).first = _End;
_Matches->_At(_Idx).second = _End;
}
}
_Matches->_Org = _Begin;
_Matches->_Pfx().matched = true;
_Matches->_Pfx().first = _Begin;
_Matches->_Pfx().second = _Matches->_At(0).first;
_Matches->_Sfx().matched = true;
_Matches->_Sfx().first = _Matches->_At(0).second;
_Matches->_Sfx().second = _End;
_Matches->_Null().first = _End;
_Matches->_Null().second = _End;
}
return (true);
}
_BidIt _Skip(_BidIt, _BidIt, _Node_base * = 0);
private:
_Tgt_state_t<_It> _Tgt_state;
_Tgt_state_t<_It> _Res;
bool _Do_if(_Node_if *, int);
bool _Do_rep(_Node_rep *, bool, int);
bool _Is_wc(_It, int);
bool _Do_class(_Node_base *);
bool _Match(_Node_base *, int = 0);
bool _Better_match();
_It _Begin;
_It _End;
_It _First;
_Node_base *_Rep;
regex_constants::syntax_option_type _Sflags;
regex_constants::match_flag_type _Mflags;
bool _Matched;
bool _Cap;
unsigned char _Ncap;
bool _Longest;
_RxTraits _Traits;
bool _Full;
private:
_Matcher& operator=(const _Matcher&);
};
enum _Prs_ret
{ // indicate class element type
_Prs_none,
_Prs_chr,
_Prs_set
};
// TEMPLATE CLASS _Parser
template<class _FwdIt,
class _Elem,
class _RxTraits>
class _Parser
{ // parse a regular expression
public:
typedef typename _RxTraits::char_class_type char_class_type;
_Parser(const _RxTraits& _Tr, _FwdIt _Pfirst, _FwdIt _Plast,
regex_constants::syntax_option_type _Fx);
_Root_node *_Compile();
unsigned _Mark_count() const
{ // return number of capture groups
return (_Grp_idx + 1);
}
private:
// lexing
void _Error(regex_constants::error_type);
bool _Is_esc() const;
void _Trans();
void _Next();
void _Expect(_Meta_type, regex_constants::error_type);
// parsing
int _Do_digits(int _Base, int _Count);
bool _DecimalDigits();
void _HexDigits(int);
bool _OctalDigits();
void _Do_ex_class(_Meta_type);
bool _CharacterClassEscape(bool);
_Prs_ret _ClassEscape(bool);
_Prs_ret _ClassAtom();
void _ClassRanges();
void _CharacterClass();
bool _IdentityEscape();
bool _Do_ffn(_Elem);
bool _Do_ffnx(_Elem);
bool _CharacterEscape();
void _AtomEscape();
void _Do_capture_group();
void _Do_noncapture_group();
void _Do_assert_group(bool);
bool _Wrapped_disjunction();
void _Quantifier();
bool _Alternative();
void _Disjunction();
_FwdIt _Pat;
_FwdIt _Begin;
_FwdIt _End;
unsigned _Grp_idx;
int _Disj_count;
unsigned long _Finished_grps;
_Builder<_FwdIt, _Elem, _RxTraits> _Nfa;
const _RxTraits& _Traits;
regex_constants::syntax_option_type _Flags;
unsigned short _Val;
_Elem _Char;
_Meta_type _Mchar;
unsigned long _L_flags;
};
enum _Lang_flags
{ // describe language properties
_L_ext_rep = 0x00000001, // + and ? repetitions
_L_alt_pipe = 0x00000002, // uses '|' for alternation
_L_alt_nl = 0x00000004, // uses '\n' for alternation (grep, egrep)
_L_nex_grp = 0x00000008, // has non-escaped capture groups
_L_nex_rep = 0x00000010, // has non-escaped repeats
_L_nc_grp = 0x00000020, // has non-capture groups (?:xxx)
_L_asrt_gen = 0x00000040, // has generalized assertions (?=xxx), (?!xxx)
_L_asrt_wrd = 0x00000080, // has word boundary assertions (\b, \B)
_L_bckr = 0x00000100, // has backreferences (ERE doesn't)
_L_lim_bckr = 0x00000200, // has limited backreferences (BRE \1-\9)
_L_ngr_rep = 0x00000400, // has non-greedy repeats
_L_esc_uni = 0x00000800, // has Unicode escape sequences
_L_esc_hex = 0x00001000, // has hexadecimal escape sequences
_L_esc_oct = 0x00002000, // has octal escape sequences
_L_esc_bsl = 0x00004000, // has escape backslash in character classes
_L_esc_ffnx = 0x00008000, // has full file escapes (\[ab])
_L_esc_ffn = 0x00010000, // has limited file escapes (\[fnrtv])
_L_esc_wsd = 0x00020000, // has w, s, and d character set escapes
_L_esc_ctrl = 0x00040000, // has control escape
_L_no_nl = 0x00080000, // no newline in pattern or matching text
_L_bzr_chr = 0x00100000, // \0 is a valid character constant
_L_grp_esc = 0x00200000, // \ is special character in group
_L_ident_ECMA = 0x00400000, // ECMA identity escape (not identifierpart)
_L_ident_ERE = 0x00800000, // ERE identity escape (.[\*^$, plus {+?}()
_L_ident_awk = 0x01000000, // awk identity escape ( ERE plus "/)
_L_anch_rstr = 0x02000000, // anchor restricted to beginning/end
_L_star_beg = 0x04000000, // star okay at beginning of RE/expr (BRE)
_L_empty_grp = 0x08000000, // empty group allowed (ERE prohibits "()")
_L_paren_bal = 0x10000000, // ')'/'}'/']' special only after '('/'{'/']'
_L_brk_rstr = 0x20000000, // ']' not special when first character in set
_L_mtch_long = 0x40000000, // find longest match (ERE, BRE)
_L_no_subs = 0x80000000 // subexpression matches not recorded
};
// CLASS _Regex_base
class _Regex_base
: public _Container_base
{ // base class for basic_regex to construct and destroy proxy
public:
#if 0 < _ITERATOR_DEBUG_LEVEL
_Regex_base()
{ // construct proxy
this->_Myproxy = new _STD _Container_proxy;
this->_Myproxy->_Mycont = this;
}
~_Regex_base()
{ // destroy proxy
this->_Orphan_all();
delete this->_Myproxy;
this->_Myproxy = 0;
}
#endif /* 0 < _ITERATOR_DEBUG_LEVEL */
};
// TEMPLATE CLASS basic_regex
template<class _Elem,
class _RxTraits = regex_traits<_Elem> >
class basic_regex
: public _Regex_base
{ // regular expression
public:
typedef basic_regex<_Elem, _RxTraits> _MyT;
typedef _Elem value_type;
typedef typename _RxTraits::locale_type locale_type;
typedef regex_constants::syntax_option_type flag_type;
enum {_EEN_VIS =
_ENHANCED_REGEX_VISUALIZER}; // helper for expression evaluator
static const flag_type icase = regex_constants::icase;
static const flag_type nosubs = regex_constants::nosubs;
static const flag_type optimize = regex_constants::optimize;
static const flag_type collate = regex_constants::collate;
static const flag_type ECMAScript = regex_constants::ECMAScript;
static const flag_type basic = regex_constants::basic;
static const flag_type extended = regex_constants::extended;
static const flag_type awk = regex_constants::awk;
static const flag_type grep = regex_constants::grep;
static const flag_type egrep = regex_constants::egrep;
basic_regex()
: _Rep(0)
{ // construct empty object
}
explicit basic_regex(_In_z_ const _Elem *_Ptr,
flag_type _Flags = regex_constants::ECMAScript)
: _Rep(0)
{ // construct from null terminated character sequence
_Reset(_Ptr, _Ptr + _RxTraits::length(_Ptr),
_Flags, random_access_iterator_tag());
}
basic_regex(_In_count_(_Count) const _Elem *_Ptr, size_t _Count,
flag_type _Flags = regex_constants::ECMAScript)
: _Rep(0)
{ // construct from character sequence
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -