parser.cs

来自「没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没」· CS 代码 · 共 1,121 行 · 第 1/2 页

CS
1,121
字号
//// assembly:	System// namespace:	System.Text.RegularExpressions// file:	parser.cs//// author:	Dan Lewis (dlewis@gmx.co.uk)// 		(c) 2002using System;using System.Collections;using System.Globalization;namespace System.Text.RegularExpressions.Syntax {	class Parser {		public static int ParseDecimal (string str, ref int ptr) {			return ParseNumber (str, ref ptr, 10, 1, Int32.MaxValue);		}		public static int ParseOctal (string str, ref int ptr) {			return ParseNumber (str, ref ptr, 8, 1, 3);		}		public static int ParseHex (string str, ref int ptr, int digits) {			return ParseNumber (str, ref ptr, 16, digits, digits);		}		public static int ParseNumber (string str, ref int ptr, int b, int min, int max) {			int p = ptr, n = 0, digits = 0, d;			if (max < min)				max = Int32.MaxValue;			while (digits < max && p < str.Length) {				d = ParseDigit (str[p ++], b, digits);				if (d < 0) {					-- p;					break;				}				n = n * b + d;				++ digits;			}			if (digits < min)				return -1;			ptr = p;			return n;		}		public static string ParseName (string str, ref int ptr) {			if (Char.IsDigit (str[ptr])) {				int gid = ParseNumber (str, ref ptr, 10, 1, 0);				if (gid > 0)					return gid.ToString ();								return null;			}			int start = ptr;			for (;;) {				if (!IsNameChar (str[ptr]))					break;				++ ptr;			}			if (ptr - start > 0)				return str.Substring (start, ptr - start);			return null;		}		public static string Escape (string str) {			string result = "";			for (int i = 0; i < str.Length; ++ i) {				char c = str[i];				switch (c) {				case '\\': case '*': case '+': case '?': case '|':				case '{': case '[': case '(': case ')': case '^':				case '$': case '.': case '#': case ' ':					result += "\\" + c;					break;				case '\t': result += "\\t"; break;				case '\n': result += "\\n"; break;				case '\r': result += "\\r"; break;				case '\f': result += "\\f"; break;				default: result += c; break;				}			}			return result;		}		public static string Unescape (string str) {			return new Parser ().ParseString (str);		}		// public instance		public Parser () {			this.caps = new ArrayList ();			this.refs = new Hashtable ();		}		public RegularExpression ParseRegularExpression (string pattern, RegexOptions options) {			this.pattern = pattern;			this.ptr = 0;			caps.Clear ();			refs.Clear ();			this.num_groups = 0;			try {				RegularExpression re = new RegularExpression ();				ParseGroup (re, options, null);				ResolveReferences ();				re.GroupCount = num_groups;								return re;			}			catch (IndexOutOfRangeException) {				throw NewParseException ("Unexpected end of pattern.");			}		}		public IDictionary GetMapping () {			Hashtable mapping = new Hashtable ();			int end = caps.Count;			mapping.Add ("0", 0);			for (int i = 0; i < end;) {				CapturingGroup group = (CapturingGroup) caps [i];				i++;				if (group.Name != null && !mapping.Contains (group.Name))					mapping.Add (group.Name, group.Number);				else					mapping.Add (i.ToString (), i);			}			return mapping;		}		// private methods		private void ParseGroup (Group group, RegexOptions options, Assertion assertion) {			bool is_top_level = group is RegularExpression;					Alternation alternation = null;			string literal = null;			Group current = new Group ();			Expression expr = null;			bool closed = false;			while (true) {				ConsumeWhitespace (IsIgnorePatternWhitespace (options));				if (ptr >= pattern.Length)					break;								// (1) Parse for Expressions							char ch = pattern[ptr ++];								switch (ch) {				case '^': {					Position pos =						IsMultiline (options) ? Position.StartOfLine : Position.Start;					expr = new PositionAssertion (pos);					break;				}				case '$': {					Position pos =						IsMultiline (options) ? Position.EndOfLine : Position.End;					expr = new PositionAssertion (pos);					break;				}				case '.': {					Category cat =						IsSingleline (options) ? Category.AnySingleline : Category.Any;					expr = new CharacterClass (cat, false);					break;				}				case '\\': {					int c = ParseEscape ();					if (c >= 0)						ch = (char)c;					else {						expr = ParseSpecial (options);						if (expr == null)							ch = pattern[ptr ++];		// default escape					}					break;				}				case '[': {					expr = ParseCharacterClass (options);					break;				}				case '(': {					bool ignore = IsIgnoreCase (options);					expr = ParseGroupingConstruct (ref options);					if (expr == null) {						if (literal != null && IsIgnoreCase (options) != ignore) {							current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));							literal = null;						}						continue;					}					break;				}				case ')': {					closed = true;					goto EndOfGroup;				}				case '|': {					if (literal != null) {						current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));						literal = null;					}					if (assertion != null) {						if (assertion.TrueExpression == null)							assertion.TrueExpression = current;						else if (assertion.FalseExpression == null)							assertion.FalseExpression = current;						else							throw NewParseException ("Too many | in (?()|).");					}					else {						if (alternation == null)							alternation = new Alternation ();						alternation.AddAlternative (current);					}					current = new Group ();					continue;				}				case '*': case '+': case '?': {					throw NewParseException ("Bad quantifier.");				}				default: 					break;		// literal character				}				ConsumeWhitespace (IsIgnorePatternWhitespace (options));								// (2) Check for Repetitions								if (ptr < pattern.Length) {					char k = pattern[ptr];					if (k == '?' || k == '*' || k == '+' || k == '{') {						++ ptr;						int min = 0, max = 0;						bool lazy = false;						switch (k) {						case '?': min = 0; max = 1; break;						case '*': min = 0; max = 0xffff; break;						case '+': min = 1; max = 0xffff; break;						case '{': ParseRepetitionBounds (out min, out max, options); break;						}						ConsumeWhitespace (IsIgnorePatternWhitespace (options));						if (ptr < pattern.Length && pattern[ptr] == '?') {							++ ptr;							lazy = true;						}						Repetition repetition = new Repetition (min, max, lazy);						if (expr == null)							repetition.Expression = new Literal (ch.ToString (), IsIgnoreCase (options));						else							repetition.Expression = expr;						expr = repetition;					}				}				// (3) Append Expression and/or Literal				if (expr == null) {					if (literal == null)						literal = "";					literal += ch;				}				else {					if (literal != null) {						current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));						literal = null;					}					current.AppendExpression (expr);					expr = null;				}				if (is_top_level && ptr >= pattern.Length)					goto EndOfGroup;			}		EndOfGroup:			if (is_top_level && closed)				throw NewParseException ("Too many )'s.");			if (!is_top_level && !closed)				throw NewParseException ("Not enough )'s.");									// clean up literals and alternations			if (literal != null)				current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));			if (assertion != null) {				if (assertion.TrueExpression == null)					assertion.TrueExpression = current;				else					assertion.FalseExpression = current;								group.AppendExpression (assertion);			}			else if (alternation != null) {				alternation.AddAlternative (current);				group.AppendExpression (alternation);			}			else				group.AppendExpression (current);		}		private Expression ParseGroupingConstruct (ref RegexOptions options) {			if (pattern[ptr] != '?') {				Group group;				if (IsExplicitCapture (options))					group = new Group ();				else {					group = new CapturingGroup ();					caps.Add (group);				}				ParseGroup (group, options, null);				return group;			}			else				++ ptr;			switch (pattern[ptr]) {			case ':': {						// non-capturing group				++ ptr;				Group group = new Group ();				ParseGroup (group, options, null);				return group;			}			case '>': {						// non-backtracking group				++ ptr;				Group group = new NonBacktrackingGroup ();				ParseGroup (group, options, null);								return group;			}			case 'i': case 'm': case 'n':			case 's': case 'x': case '-': {				// options				RegexOptions o = options;				ParseOptions (ref o, false);				if (pattern[ptr] == '-') {					++ ptr;					ParseOptions (ref o, true);				}				if (pattern[ptr] == ':') {			// pass options to child group					++ ptr;					Group group = new Group ();					ParseGroup (group, o, null);					return group;				}				else if (pattern[ptr] == ')') {			// change options of enclosing group					++ ptr;					options = o;					return null;				}				else					throw NewParseException ("Bad options");			}			case '<': case '=': case '!': {				// lookahead/lookbehind				ExpressionAssertion asn = new ExpressionAssertion ();				if (!ParseAssertionType (asn))					goto case '\'';				// it's a (?<name> ) construct				Group test = new Group ();				ParseGroup (test, options, null);				asn.TestExpression = test;				return asn;			}			case '\'': {						// named/balancing group				char delim;				if (pattern[ptr] == '<')					delim = '>';				else					delim = '\'';				++ ptr;				string name = ParseName ();				if (pattern[ptr] == delim) {					// capturing group					if (name == null)						throw NewParseException ("Bad group name.");					++ ptr;					CapturingGroup cap = new CapturingGroup ();					cap.Name = name;					caps.Add (cap);					ParseGroup (cap, options, null);					return cap;				}				else if (pattern[ptr] == '-') {					// balancing group					++ ptr;					string balance_name = ParseName ();					if (balance_name == null || pattern[ptr] != delim)						throw NewParseException ("Bad balancing group name.");					++ ptr;					BalancingGroup bal = new BalancingGroup ();					bal.Name = name;					caps.Add (bal);					refs.Add (bal, balance_name);					return bal;				}				else					throw NewParseException ("Bad group name.");			}			case '(': {						// expression/capture test				Assertion asn;							++ ptr;				int p = ptr;				string name = ParseName ();				if (name == null || pattern[ptr] != ')') {	// expression test					// FIXME MS implementation doesn't seem to					// implement this version of (?(x) ...)					ptr = p;					ExpressionAssertion expr_asn = new ExpressionAssertion ();					if (pattern[ptr] == '?') {						++ ptr;						if (!ParseAssertionType (expr_asn))							throw NewParseException ("Bad conditional.");					}					else {						expr_asn.Negate = false;						expr_asn.Reverse = false;					}					Group test = new Group ();					ParseGroup (test, options, null);					expr_asn.TestExpression = test;					asn = expr_asn;				}				else {						// capture test					++ ptr;					asn = new CaptureAssertion ();					refs.Add (asn, name);				}				Group group = new Group ();				ParseGroup (group, options, asn);				return group;			}			case '#': {						// comment				++ ptr;				while (pattern[ptr ++] != ')') {					if (ptr >= pattern.Length)						throw NewParseException ("Unterminated (?#...) comment.");				}				return null;			}			default: 						// error				throw NewParseException ("Bad grouping construct.");			}		}		private bool ParseAssertionType (ExpressionAssertion assertion) {			if (pattern[ptr] == '<') {				switch (pattern[ptr + 1]) {				case '=':					assertion.Negate = false;					break;				case '!':					assertion.Negate = true;					break;				default:					return false;				}				assertion.Reverse = true;				ptr += 2;			}			else {				switch (pattern[ptr]) {				case '=':					assertion.Negate = false;					break;				case '!':					assertion.Negate = true;					break;				default:					return false;				}				assertion.Reverse = false;				ptr += 1;			}			return true;		}		private void ParseOptions (ref RegexOptions options, bool negate) {			for (;;) {				switch (pattern[ptr]) {				case 'i':					if (negate)						options &= ~RegexOptions.IgnoreCase;					else						options |= RegexOptions.IgnoreCase;					break;				case 'm':					if (negate)						options &= ~RegexOptions.Multiline;					else						options |= RegexOptions.Multiline;					break;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?