parser.cs
来自「没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没」· CS 代码 · 共 1,121 行 · 第 1/2 页
CS
1,121 行
//// assembly: System// namespace: System.Text.RegularExpressions// file: parser.cs//// author: Dan Lewis (dlewis@gmx.co.uk)// (c) 2002using System;using System.Collections;using System.Globalization;namespace System.Text.RegularExpressions.Syntax { class Parser { public static int ParseDecimal (string str, ref int ptr) { return ParseNumber (str, ref ptr, 10, 1, Int32.MaxValue); } public static int ParseOctal (string str, ref int ptr) { return ParseNumber (str, ref ptr, 8, 1, 3); } public static int ParseHex (string str, ref int ptr, int digits) { return ParseNumber (str, ref ptr, 16, digits, digits); } public static int ParseNumber (string str, ref int ptr, int b, int min, int max) { int p = ptr, n = 0, digits = 0, d; if (max < min) max = Int32.MaxValue; while (digits < max && p < str.Length) { d = ParseDigit (str[p ++], b, digits); if (d < 0) { -- p; break; } n = n * b + d; ++ digits; } if (digits < min) return -1; ptr = p; return n; } public static string ParseName (string str, ref int ptr) { if (Char.IsDigit (str[ptr])) { int gid = ParseNumber (str, ref ptr, 10, 1, 0); if (gid > 0) return gid.ToString (); return null; } int start = ptr; for (;;) { if (!IsNameChar (str[ptr])) break; ++ ptr; } if (ptr - start > 0) return str.Substring (start, ptr - start); return null; } public static string Escape (string str) { string result = ""; for (int i = 0; i < str.Length; ++ i) { char c = str[i]; switch (c) { case '\\': case '*': case '+': case '?': case '|': case '{': case '[': case '(': case ')': case '^': case '$': case '.': case '#': case ' ': result += "\\" + c; break; case '\t': result += "\\t"; break; case '\n': result += "\\n"; break; case '\r': result += "\\r"; break; case '\f': result += "\\f"; break; default: result += c; break; } } return result; } public static string Unescape (string str) { return new Parser ().ParseString (str); } // public instance public Parser () { this.caps = new ArrayList (); this.refs = new Hashtable (); } public RegularExpression ParseRegularExpression (string pattern, RegexOptions options) { this.pattern = pattern; this.ptr = 0; caps.Clear (); refs.Clear (); this.num_groups = 0; try { RegularExpression re = new RegularExpression (); ParseGroup (re, options, null); ResolveReferences (); re.GroupCount = num_groups; return re; } catch (IndexOutOfRangeException) { throw NewParseException ("Unexpected end of pattern."); } } public IDictionary GetMapping () { Hashtable mapping = new Hashtable (); int end = caps.Count; mapping.Add ("0", 0); for (int i = 0; i < end;) { CapturingGroup group = (CapturingGroup) caps [i]; i++; if (group.Name != null && !mapping.Contains (group.Name)) mapping.Add (group.Name, group.Number); else mapping.Add (i.ToString (), i); } return mapping; } // private methods private void ParseGroup (Group group, RegexOptions options, Assertion assertion) { bool is_top_level = group is RegularExpression; Alternation alternation = null; string literal = null; Group current = new Group (); Expression expr = null; bool closed = false; while (true) { ConsumeWhitespace (IsIgnorePatternWhitespace (options)); if (ptr >= pattern.Length) break; // (1) Parse for Expressions char ch = pattern[ptr ++]; switch (ch) { case '^': { Position pos = IsMultiline (options) ? Position.StartOfLine : Position.Start; expr = new PositionAssertion (pos); break; } case '$': { Position pos = IsMultiline (options) ? Position.EndOfLine : Position.End; expr = new PositionAssertion (pos); break; } case '.': { Category cat = IsSingleline (options) ? Category.AnySingleline : Category.Any; expr = new CharacterClass (cat, false); break; } case '\\': { int c = ParseEscape (); if (c >= 0) ch = (char)c; else { expr = ParseSpecial (options); if (expr == null) ch = pattern[ptr ++]; // default escape } break; } case '[': { expr = ParseCharacterClass (options); break; } case '(': { bool ignore = IsIgnoreCase (options); expr = ParseGroupingConstruct (ref options); if (expr == null) { if (literal != null && IsIgnoreCase (options) != ignore) { current.AppendExpression (new Literal (literal, IsIgnoreCase (options))); literal = null; } continue; } break; } case ')': { closed = true; goto EndOfGroup; } case '|': { if (literal != null) { current.AppendExpression (new Literal (literal, IsIgnoreCase (options))); literal = null; } if (assertion != null) { if (assertion.TrueExpression == null) assertion.TrueExpression = current; else if (assertion.FalseExpression == null) assertion.FalseExpression = current; else throw NewParseException ("Too many | in (?()|)."); } else { if (alternation == null) alternation = new Alternation (); alternation.AddAlternative (current); } current = new Group (); continue; } case '*': case '+': case '?': { throw NewParseException ("Bad quantifier."); } default: break; // literal character } ConsumeWhitespace (IsIgnorePatternWhitespace (options)); // (2) Check for Repetitions if (ptr < pattern.Length) { char k = pattern[ptr]; if (k == '?' || k == '*' || k == '+' || k == '{') { ++ ptr; int min = 0, max = 0; bool lazy = false; switch (k) { case '?': min = 0; max = 1; break; case '*': min = 0; max = 0xffff; break; case '+': min = 1; max = 0xffff; break; case '{': ParseRepetitionBounds (out min, out max, options); break; } ConsumeWhitespace (IsIgnorePatternWhitespace (options)); if (ptr < pattern.Length && pattern[ptr] == '?') { ++ ptr; lazy = true; } Repetition repetition = new Repetition (min, max, lazy); if (expr == null) repetition.Expression = new Literal (ch.ToString (), IsIgnoreCase (options)); else repetition.Expression = expr; expr = repetition; } } // (3) Append Expression and/or Literal if (expr == null) { if (literal == null) literal = ""; literal += ch; } else { if (literal != null) { current.AppendExpression (new Literal (literal, IsIgnoreCase (options))); literal = null; } current.AppendExpression (expr); expr = null; } if (is_top_level && ptr >= pattern.Length) goto EndOfGroup; } EndOfGroup: if (is_top_level && closed) throw NewParseException ("Too many )'s."); if (!is_top_level && !closed) throw NewParseException ("Not enough )'s."); // clean up literals and alternations if (literal != null) current.AppendExpression (new Literal (literal, IsIgnoreCase (options))); if (assertion != null) { if (assertion.TrueExpression == null) assertion.TrueExpression = current; else assertion.FalseExpression = current; group.AppendExpression (assertion); } else if (alternation != null) { alternation.AddAlternative (current); group.AppendExpression (alternation); } else group.AppendExpression (current); } private Expression ParseGroupingConstruct (ref RegexOptions options) { if (pattern[ptr] != '?') { Group group; if (IsExplicitCapture (options)) group = new Group (); else { group = new CapturingGroup (); caps.Add (group); } ParseGroup (group, options, null); return group; } else ++ ptr; switch (pattern[ptr]) { case ':': { // non-capturing group ++ ptr; Group group = new Group (); ParseGroup (group, options, null); return group; } case '>': { // non-backtracking group ++ ptr; Group group = new NonBacktrackingGroup (); ParseGroup (group, options, null); return group; } case 'i': case 'm': case 'n': case 's': case 'x': case '-': { // options RegexOptions o = options; ParseOptions (ref o, false); if (pattern[ptr] == '-') { ++ ptr; ParseOptions (ref o, true); } if (pattern[ptr] == ':') { // pass options to child group ++ ptr; Group group = new Group (); ParseGroup (group, o, null); return group; } else if (pattern[ptr] == ')') { // change options of enclosing group ++ ptr; options = o; return null; } else throw NewParseException ("Bad options"); } case '<': case '=': case '!': { // lookahead/lookbehind ExpressionAssertion asn = new ExpressionAssertion (); if (!ParseAssertionType (asn)) goto case '\''; // it's a (?<name> ) construct Group test = new Group (); ParseGroup (test, options, null); asn.TestExpression = test; return asn; } case '\'': { // named/balancing group char delim; if (pattern[ptr] == '<') delim = '>'; else delim = '\''; ++ ptr; string name = ParseName (); if (pattern[ptr] == delim) { // capturing group if (name == null) throw NewParseException ("Bad group name."); ++ ptr; CapturingGroup cap = new CapturingGroup (); cap.Name = name; caps.Add (cap); ParseGroup (cap, options, null); return cap; } else if (pattern[ptr] == '-') { // balancing group ++ ptr; string balance_name = ParseName (); if (balance_name == null || pattern[ptr] != delim) throw NewParseException ("Bad balancing group name."); ++ ptr; BalancingGroup bal = new BalancingGroup (); bal.Name = name; caps.Add (bal); refs.Add (bal, balance_name); return bal; } else throw NewParseException ("Bad group name."); } case '(': { // expression/capture test Assertion asn; ++ ptr; int p = ptr; string name = ParseName (); if (name == null || pattern[ptr] != ')') { // expression test // FIXME MS implementation doesn't seem to // implement this version of (?(x) ...) ptr = p; ExpressionAssertion expr_asn = new ExpressionAssertion (); if (pattern[ptr] == '?') { ++ ptr; if (!ParseAssertionType (expr_asn)) throw NewParseException ("Bad conditional."); } else { expr_asn.Negate = false; expr_asn.Reverse = false; } Group test = new Group (); ParseGroup (test, options, null); expr_asn.TestExpression = test; asn = expr_asn; } else { // capture test ++ ptr; asn = new CaptureAssertion (); refs.Add (asn, name); } Group group = new Group (); ParseGroup (group, options, asn); return group; } case '#': { // comment ++ ptr; while (pattern[ptr ++] != ')') { if (ptr >= pattern.Length) throw NewParseException ("Unterminated (?#...) comment."); } return null; } default: // error throw NewParseException ("Bad grouping construct."); } } private bool ParseAssertionType (ExpressionAssertion assertion) { if (pattern[ptr] == '<') { switch (pattern[ptr + 1]) { case '=': assertion.Negate = false; break; case '!': assertion.Negate = true; break; default: return false; } assertion.Reverse = true; ptr += 2; } else { switch (pattern[ptr]) { case '=': assertion.Negate = false; break; case '!': assertion.Negate = true; break; default: return false; } assertion.Reverse = false; ptr += 1; } return true; } private void ParseOptions (ref RegexOptions options, bool negate) { for (;;) { switch (pattern[ptr]) { case 'i': if (negate) options &= ~RegexOptions.IgnoreCase; else options |= RegexOptions.IgnoreCase; break; case 'm': if (negate) options &= ~RegexOptions.Multiline; else options |= RegexOptions.Multiline; break;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?