⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex

📁 C语言库函数的原型,有用的拿去
💻
📖 第 1 页 / 共 5 页
字号:
	// TEMPLATE CLASS _Node_class
template<class _Elem, class _RxTraits>
	class _Node_class
		: public _Node_base
	{	// node that holds a character class (POSIX bracket expression)
public:
	_Node_class(_Node_type _Ty = _N_class, _Node_flags _Fl = _Fl_none)
		: _Node_base(_Ty, _Fl), _Coll(0), _Small(0), _Large(0), _Ranges(0),
		_Classes((typename _RxTraits::char_class_type)0), _Equiv(0)
		{	// construct
		}

	~_Node_class()
		{	// destroy
		_Tidy(_Coll);
		delete _Small;
		delete _Large;
		delete _Ranges;
		_Tidy(_Equiv);
		}

	void _Tidy(_Sequence<_Elem> *_Head)
		{	// clean up a list of sequences
		while (_Head)
			{	// delete the head of the list
			_Sequence<_Elem> *_Temp = _Head;
			_Head = _Head->_Next;
			delete _Temp;
			}
		}

	_Sequence<_Elem> *_Coll;
	_Bitmap *_Small;
	_Buf<_Elem> *_Large;
	_Buf<_Elem> *_Ranges;
	typename _RxTraits::char_class_type _Classes;
	_Sequence<_Elem> *_Equiv;
	};

	// CLASS _Node_endif
class _Node_endif
	: public _Node_base
	{	// node that marks the end of an alternative
public:
	_Node_endif()
		: _Node_base(_N_endif, _Fl_none)
		{	// construct
		}
	};

	// CLASS _Node_if
class _Node_if
	: public _Node_base
	{	// node that marks the beginning of an alternative
public:
	_Node_if(_Node_base *_End)
		: _Node_base(_N_if, _Fl_none), _Endif((_Node_endif *)_End), _Child(0)
		{	// construct
		}

	~_Node_if()
		{	// destroy branches of if node
		_Node_if *_Cur = _Child;
		while (_Cur)
			{	// destroy branch
			_Node_if *_Tmp = _Cur;
			_Cur = _Cur->_Child;
			_Tmp->_Child = 0;
			_Destroy_node(_Tmp, _Endif);
			}
		}

	_Node_endif *_Endif;
	_Node_if *_Child;
	};

	// CLASS _Node_end_rep
class _Node_end_rep
	: public _Node_base
	{	// node that marks the end of a repetition
public:
	_Node_end_rep(int _New_idx)
		: _Node_base(_N_end_rep), _Idx(_New_idx)
		{	// construct
		}

	const int _Idx;

private:
	_Node_end_rep& operator=(const _Node_end_rep&);
	};

	// CLASS _Node_rep
class _Node_rep
	: public _Node_base
	{	// node that marks the beginning of a repetition
public:
	_Node_rep(bool _Greedy, int _Mn, int _Mx, _Node_end_rep *_End)
		: _Node_base(_N_rep, _Greedy ? _Fl_greedy : _Fl_none),
			_Min(_Mn), _Max(_Mx), _End_rep(_End)
		{	// construct
		}

	_Node_end_rep *_End_rep;
	const int _Min;
	const int _Max;

private:
	_Node_rep& operator=(const _Node_rep&);
	};

	// TEMPLATE CLASS _Builder
template<class _FwdIt,
	class _Elem,
	class _RxTraits>
	class _Builder
	{	// provides operations used by _Parser to build the nfa
public:
	typedef _REGEX_DIFFT(_FwdIt) _Difft;

	_Builder(const _RxTraits& _Tr, regex_constants::syntax_option_type);
	bool _Beg_expr() const;
	void _Setlong();
	void _Discard_pattern();
	_Node_base *_Getmark() const;

	void _Add_nop();
	void _Add_bol();
	void _Add_eol();
	void _Add_wbound();
	void _Add_dot();
	void _Add_char(_Elem _Ch);
	void _Add_class();
	void _Add_char_to_class(_Elem _Ch);
	void _Add_range(_Elem _E0, _Elem _E1);
	void _Add_named_class(_Regex_traits_base::char_class_type);
	void _Add_equiv(_FwdIt, _FwdIt, _Difft);
	void _Add_coll(_FwdIt, _FwdIt, _Difft);
	_Node_base *_Begin_group();
	void _End_group(_Node_base *_Back);
	_Node_base *_Begin_assert_group(bool);
	void _End_assert_group(_Node_base *);
	_Node_base *_Begin_capture_group(unsigned _Idx);
	void _Add_backreference(unsigned _Idx);
	_Node_base *_Begin_if(_Node_base *_Start);
	void _Else_if(_Node_base *, _Node_base *);
	void _Add_rep(int _Min, int _Max, bool _Greedy);
	void _Negate();
	void _Mark_final();
	_Root_node *_End_pattern();

private:
	_Node_base *_Link_node(_Node_base *);
	_Node_base *_New_node(_Node_type _Type);
	void _Add_str_node();
	bool _Beg_expr(_Node_base *) const;
	void _Add_char_to_bitmap(_Elem _Ch);
	void _Add_char_to_array(_Elem _Ch);
	void _Add_elts(_Node_class<_Elem, _RxTraits>*,
		_Regex_traits_base::char_class_type, const _RxTraits&);
	void _Char_to_elts(_FwdIt, _FwdIt, _Difft, _Sequence<_Elem> **);

	_Root_node *_Root;
	_Node_base *_Current;
	int _Rep_idx;
	regex_constants::syntax_option_type _Flags;
	const _RxTraits& _Traits;
	const int _Bmax;
	const int _Tmax;

private:
	_Builder& operator=(const _Builder&);
	};

	// TEMPLATE CLASS _Bt_state_t
template<class _BidIt>
	class _Bt_state_t
	{	// holds the state needed for backtracking
public:
	_BidIt _Cur;
	_Grps _Grp_valid;
	};

	// TEMPLATE CLASS _Tgt_state_t
template<class _BidIt>
	class _Tgt_state_t
	: public _Bt_state_t<_BidIt>
	{	// holds the current state of the match
public:
	struct _Grp_t {
		_BidIt _Begin;
		_BidIt _End;
		};

	_Grp_t _Grps[_MAX_GRP];

	void operator=(const _Bt_state_t<_BidIt>& _Other)
		{	// assign from object of type _Bt_state_t<_BidIt>
		*(_Bt_state_t<_BidIt>*)this = _Other;
		}
	};

	// TEMPLATE CLASS _Rep_state_t
template<class _BidIt>
	class _Rep_state_t
	: public _Tgt_state_t<_BidIt>
	{	// holds the state needed for repetitions
public:
	bool _Saved;
	void operator=(const _Tgt_state_t<_BidIt>& _Other)
		{	// assign from object of type _Tgt_state_t<_BidIt>
		*(_Tgt_state_t<_BidIt>*)this = _Other;
		}
	};

	// TEMPLATE CLASS _Matcher
template<class _BidIt,
	class _Elem,
	class _RxTraits,
	class _It>
	class _Matcher
	{	// provides ways to match a regular expression to a text sequence
public:
	_Matcher(_It _Pfirst, _It _Plast,
		_Root_node *_Re, unsigned _Nx,
		regex_constants::syntax_option_type _Sf,
		regex_constants::match_flag_type _Mf)
			: _First(_Pfirst), _End(_Plast), _Rep(_Re),
				_Sflags(_Sf), _Mflags(_Mf), _Matched(false),
				_Ncap((unsigned char)_Nx),
				_Longest((_Re->_Flags & _Fl_longest)
					&& !(_Mf & regex_constants::match_any))
		{	// construct
		_DEBUG_RANGE(_Pfirst, _Plast);
		}

	void _Setf(regex_constants::match_flag_type _Mf)
		{	// set specified flags
		_Mflags |= _Mf;
		}

	void _Clearf(regex_constants::match_flag_type _Mf)
		{	// clear specified flags
		_Mflags &= ~_Mf;
		}

	template<class _Alloc>
		bool _Match(_It _Pfirst,
			match_results<_BidIt, _Alloc> *_Matches,
			bool _Full_match)
		{	// try to match
		_First = _Pfirst;
		return (_Match(_Matches, _Full_match));
		}

	template<class _Alloc>
		bool _Match(match_results<_BidIt, _Alloc> *_Matches,
			bool _Full_match)
		{	// try to match
		_Begin = _First;
		_Tgt_state._Cur = _First;
		_Tgt_state._Grp_valid = 0;
		_Cap = _Matches != 0;
		_Full = _Full_match;

		_Matched = false;

		if (!_Match(_Rep))
			return (false);
		if (_Matches)
			{	// copy results to _Matches
			_Matches->_Resize(_Ncap);
			for (unsigned int _Idx = 0; _Idx < _Ncap; ++_Idx)
				{	// copy submatch _Idx
				if (_Res._Grp_valid & (1 << _Idx))
					{	// copy successful match
					_Matches->_At(_Idx).matched = true;
					_Matches->_At(_Idx).first = _Res._Grps[_Idx]._Begin;
					_Matches->_At(_Idx).second = _Res._Grps[_Idx]._End;
					}
				else
					{	// copy failed match
					_Matches->_At(_Idx).matched = false;
					_Matches->_At(_Idx).first = _End;
					_Matches->_At(_Idx).second = _End;
					}
				}
			_Matches->_Org = _Begin;
			_Matches->_Pfx().matched = true;
			_Matches->_Pfx().first = _Begin;
			_Matches->_Pfx().second = _Matches->_At(0).first;
			_Matches->_Sfx().matched = true;
			_Matches->_Sfx().first = _Matches->_At(0).second;
			_Matches->_Sfx().second = _End;
			_Matches->_Null().first = _End;
			_Matches->_Null().second = _End;
			}
		return (true);
		}

	_BidIt _Skip(_BidIt, _BidIt, _Node_base * = 0);

private:
	_Tgt_state_t<_It> _Tgt_state;
	_Tgt_state_t<_It> _Res;
	bool _Do_if(_Node_if *, int);
	bool _Do_rep(_Node_rep *, bool, int);
	bool _Is_wc(_It, int);
	bool _Do_class(_Node_base *);
	bool _Match(_Node_base *, int = 0);
	bool _Better_match();

	_It _Begin;
	_It _End;
	_It _First;
	_Node_base *_Rep;
	regex_constants::syntax_option_type _Sflags;
	regex_constants::match_flag_type _Mflags;
	bool _Matched;
	bool _Cap;
	unsigned char _Ncap;
	bool _Longest;
	_RxTraits _Traits;
	bool _Full;

private:
	_Matcher& operator=(const _Matcher&);
	};

enum _Prs_ret
	{	// indicate class element type
	_Prs_none,
	_Prs_chr,
	_Prs_set
	};

	// TEMPLATE CLASS _Parser
template<class _FwdIt,
	class _Elem,
	class _RxTraits>
	class _Parser
	{	// parse a regular expression
public:
	typedef typename _RxTraits::char_class_type char_class_type;

	_Parser(const _RxTraits& _Tr, _FwdIt _Pfirst, _FwdIt _Plast,
		regex_constants::syntax_option_type _Fx);
	_Root_node *_Compile();

	unsigned _Mark_count() const
		{	// return number of capture groups
		return (_Grp_idx + 1);
		}

private:
	// lexing
	void _Error(regex_constants::error_type);

	bool _Is_esc() const;
	void _Trans();
	void _Next();
	void _Expect(_Meta_type, regex_constants::error_type);

	// parsing
	int _Do_digits(int _Base, int _Count);
	bool _DecimalDigits();
	void _HexDigits(int);
	bool _OctalDigits();
	void _Do_ex_class(_Meta_type);
	bool _CharacterClassEscape(bool);
	_Prs_ret _ClassEscape(bool);
	_Prs_ret _ClassAtom();
	void _ClassRanges();
	void _CharacterClass();
	bool _IdentityEscape();
	bool _Do_ffn(_Elem);
	bool _Do_ffnx(_Elem);
	bool _CharacterEscape();
	void _AtomEscape();
	void _Do_capture_group();
	void _Do_noncapture_group();
	void _Do_assert_group(bool);
	bool _Wrapped_disjunction();
	void _Quantifier();
	bool _Alternative();
	void _Disjunction();

	_FwdIt _Pat;
	_FwdIt _Begin;
	_FwdIt _End;
	unsigned _Grp_idx;
	int _Disj_count;
	unsigned long _Finished_grps;
	_Builder<_FwdIt, _Elem, _RxTraits> _Nfa;
	const _RxTraits& _Traits;
	regex_constants::syntax_option_type _Flags;
	unsigned short _Val;
	_Elem _Char;
	_Meta_type _Mchar;
	unsigned long _L_flags;
	};

enum _Lang_flags
	{	// describe language properties
	_L_ext_rep = 0x00000001,	// + and ? repetitions
	_L_alt_pipe = 0x00000002,	// uses '|' for alternation
	_L_alt_nl = 0x00000004,		// uses '\n' for alternation (grep, egrep)
	_L_nex_grp = 0x00000008,	// has non-escaped capture groups
	_L_nex_rep = 0x00000010,	// has non-escaped repeats
	_L_nc_grp = 0x00000020,		// has non-capture groups (?:xxx)
	_L_asrt_gen = 0x00000040,	// has generalized assertions (?=xxx), (?!xxx)
	_L_asrt_wrd = 0x00000080,	// has word boundary assertions (\b, \B)
	_L_bckr = 0x00000100,		// has backreferences (ERE doesn't)
	_L_lim_bckr = 0x00000200,	// has limited backreferences (BRE \1-\9)
	_L_ngr_rep = 0x00000400,	// has non-greedy repeats
	_L_esc_uni = 0x00000800,	// has Unicode escape sequences
	_L_esc_hex = 0x00001000,	// has hexadecimal escape sequences
	_L_esc_oct = 0x00002000,	// has octal escape sequences
	_L_esc_bsl = 0x00004000,	// has escape backslash in character classes
	_L_esc_ffnx = 0x00008000,	// has full file escapes (\[ab])
	_L_esc_ffn = 0x00010000,	// has limited file escapes (\[fnrtv])
	_L_esc_wsd = 0x00020000,	// has w, s, and d character set escapes
	_L_esc_ctrl = 0x00040000,	// has control escape
	_L_no_nl = 0x00080000,		// no newline in pattern or matching text
	_L_bzr_chr = 0x00100000,	// \0 is a valid character constant
	_L_grp_esc = 0x00200000,	// \ is special character in group
	_L_ident_ECMA = 0x00400000,	// ECMA identity escape (not identifierpart)
	_L_ident_ERE = 0x00800000,	// ERE identity escape (.[\*^$, plus {+?}()
	_L_ident_awk = 0x01000000,	// awk identity escape ( ERE plus "/)
	_L_anch_rstr = 0x02000000,	// anchor restricted to beginning/end
	_L_star_beg = 0x04000000,	// star okay at beginning of RE/expr (BRE)
	_L_empty_grp = 0x08000000,	// empty group allowed (ERE prohibits "()")
	_L_paren_bal = 0x10000000,	// ')'/'}'/']' special only after '('/'{'/']'
	_L_brk_rstr = 0x20000000,	// ']' not special when first character in set
	_L_mtch_long = 0x40000000,	// find longest match (ERE, BRE)
	_L_no_subs = 0x80000000		// subexpression matches not recorded
	};

	// CLASS _Regex_base
class _Regex_base
	: public _Container_base
	{	// base class for basic_regex to construct and destroy proxy
public:
 #if 0 < _ITERATOR_DEBUG_LEVEL
	_Regex_base()
	 	{ // construct proxy
		this->_Myproxy = new _STD _Container_proxy;
		this->_Myproxy->_Mycont = this;
	 	}

	~_Regex_base()
		{ // destroy proxy
		this->_Orphan_all();
		delete this->_Myproxy;
		this->_Myproxy = 0;
		}
 #endif /* 0 < _ITERATOR_DEBUG_LEVEL */
	};

	// TEMPLATE CLASS basic_regex
template<class _Elem,
	class _RxTraits = regex_traits<_Elem> >
	class basic_regex
		: public _Regex_base
	{	// regular expression
public:
	typedef basic_regex<_Elem, _RxTraits> _MyT;
	typedef _Elem value_type;
	typedef typename _RxTraits::locale_type locale_type;
	typedef regex_constants::syntax_option_type flag_type;

	enum {_EEN_VIS =
		_ENHANCED_REGEX_VISUALIZER};	// helper for expression evaluator

	static const flag_type icase = regex_constants::icase;
	static const flag_type nosubs = regex_constants::nosubs;
	static const flag_type optimize = regex_constants::optimize;
	static const flag_type collate = regex_constants::collate;
	static const flag_type ECMAScript = regex_constants::ECMAScript;
	static const flag_type basic = regex_constants::basic;
	static const flag_type extended = regex_constants::extended;
	static const flag_type awk = regex_constants::awk;
	static const flag_type grep = regex_constants::grep;
	static const flag_type egrep = regex_constants::egrep;

	basic_regex()
		: _Rep(0)
		{	// construct empty object
		}

	explicit basic_regex(_In_z_ const _Elem *_Ptr,
		flag_type _Flags = regex_constants::ECMAScript)
		: _Rep(0)
		{	// construct from null terminated character sequence
		_Reset(_Ptr, _Ptr + _RxTraits::length(_Ptr),
			_Flags, random_access_iterator_tag());
		}

	basic_regex(_In_count_(_Count) const _Elem *_Ptr, size_t _Count,
		flag_type _Flags = regex_constants::ECMAScript)
		: _Rep(0)
		{	// construct from character sequence

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -