interpreter.cs
来自「没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没」· CS 代码 · 共 959 行 · 第 1/2 页
CS
959 行
//// assembly: System// namespace: System.Text.RegularExpressions// file: interpreter.cs//// author: Dan Lewis (dlewis@gmx.co.uk)// (c) 2002using System;using System.Collections;using System.Diagnostics;using System.Globalization;namespace System.Text.RegularExpressions { class Interpreter : IMachine { public Interpreter (ushort[] program) { this.program = program; this.qs = null; // process info block Debug.Assert ((OpCode)program[0] == OpCode.Info, "Regex", "Cant' find info block"); this.group_count = program[1] + 1; this.match_min = program[2]; this.match_max = program[3]; // setup this.program_start = 4; this.groups = new int [group_count]; } // IMachine implementation public Match Scan (Regex regex, string text, int start, int end) { this.text = text; this.text_end = end; this.scan_ptr = start; if (Eval (Mode.Match, ref scan_ptr, program_start)) return GenerateMatch (regex); return Match.Empty; } // private methods private void Reset () { ResetGroups (); fast = repeat = null; } private bool Eval (Mode mode, ref int ref_ptr, int pc) { int ptr = ref_ptr; Begin: for (;;) { ushort word = program[pc]; OpCode op = (OpCode)(word & 0x00ff); OpFlags flags = (OpFlags)(word & 0xff00); switch (op) { case OpCode.Anchor: { int skip = program[pc + 1]; int anch_offset = program[pc + 2]; int anch_ptr = ptr + anch_offset; int anch_end = text_end - match_min + anch_offset; // maximum anchor position // the general case for an anchoring expression is at the bottom, however we // do some checks for the common cases before to save processing time. the current // optimizer only outputs three types of anchoring expressions: fixed position, // fixed substring, and no anchor. OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff); if (anch_op == OpCode.Position && skip == 6) { // position anchor // Anchor // Position // True switch ((Position)program[pc + 4]) { case Position.StartOfString: if (anch_ptr == 0) { ptr = 0; if (TryMatch (ref ptr, pc + skip)) goto Pass; } break; case Position.StartOfLine: if (anch_ptr == 0) { ptr = 0; if (TryMatch (ref ptr, pc + skip)) goto Pass; ++ anch_ptr; } while (anch_ptr <= anch_end) { if (text[anch_ptr - 1] == '\n') { ptr = anch_ptr - anch_offset; if (TryMatch (ref ptr, pc + skip)) goto Pass; } ++ anch_ptr; } break; case Position.StartOfScan: if (anch_ptr == scan_ptr) { ptr = scan_ptr - anch_offset; if (TryMatch (ref ptr, pc + skip)) goto Pass; } break; default: // FIXME break; } } else if (qs != null || (anch_op == OpCode.String && skip == 6 + program[pc + 4])) { // substring anchor // Anchor // String // True if (qs == null) { bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0; string substring = GetString (pc + 3); qs = new QuickSearch (substring, ignore); } while (anch_ptr <= anch_end) { anch_ptr = qs.Search (text, anch_ptr, anch_end); if (anch_ptr < 0) break; ptr = anch_ptr - anch_offset; if (TryMatch (ref ptr, pc + skip)) goto Pass; ++ anch_ptr; } } else if (anch_op == OpCode.True) { // no anchor // Anchor // True while (anch_ptr <= anch_end) { ptr = anch_ptr; if (TryMatch (ref ptr, pc + skip)) goto Pass; ++ anch_ptr; } } else { // general case // Anchor // <expr> // True while (anch_ptr <= anch_end) { ptr = anch_ptr; if (Eval (Mode.Match, ref ptr, pc + 3)) { // anchor expression passed: try real expression at the correct offset ptr = anch_ptr - anch_offset; if (TryMatch (ref ptr, pc + skip)) goto Pass; } ++ anch_ptr; } } goto Fail; } case OpCode.False: { goto Fail; } case OpCode.True: { goto Pass; } case OpCode.Position: { if (!IsPosition ((Position)program[pc + 1], ptr)) goto Fail; pc += 2; break; } case OpCode.String: { bool reverse = (flags & OpFlags.RightToLeft) != 0; bool ignore = (flags & OpFlags.IgnoreCase) != 0; int len = program[pc + 1]; if (reverse) { ptr -= len; if (ptr < 0) goto Fail; } else if (ptr + len > text_end) goto Fail; pc += 2; for (int i = 0; i < len; ++ i) { char c = text[ptr + i]; if (ignore) c = Char.ToLower (c); if (c != (char)program[pc ++]) goto Fail; } if (!reverse) ptr += len; break; } case OpCode.Reference: { bool reverse = (flags & OpFlags.RightToLeft) != 0; bool ignore = (flags & OpFlags.IgnoreCase) != 0; int m = GetLastDefined (program [pc + 1]); if (m < 0) goto Fail; int str = marks [m].Index; int len = marks [m].Length; if (reverse) { ptr -= len; if (ptr < 0) goto Fail; } else if (ptr + len > text_end) goto Fail; pc += 2; for (int i = 0; i < len; ++ i) { if (ignore) { if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i])) goto Fail; } else { if (text[ptr + i] != text[str + i]) goto Fail; } } if (!reverse) ptr += len; break; } case OpCode.Character: case OpCode.Category: case OpCode.Range: case OpCode.Set: { if (!EvalChar (mode, ref ptr, ref pc, false)) goto Fail; break; } case OpCode.In: { int target = pc + program[pc + 1]; pc += 2; if (!EvalChar (mode, ref ptr, ref pc, true)) goto Fail; pc = target; break; } case OpCode.Open: { Open (program[pc + 1], ptr); pc += 2; break; } case OpCode.Close: { Close (program[pc + 1], ptr); pc += 2; break; } case OpCode.Balance: { Balance (program[pc + 1], program[pc + 2], ptr); break; } case OpCode.IfDefined: { int m = GetLastDefined (program [pc + 2]); if (m < 0) pc += program[pc + 1]; else pc += 3; break; } case OpCode.Sub: { if (!Eval (Mode.Match, ref ptr, pc + 2)) goto Fail; pc += program[pc + 1]; break; } case OpCode.Test: { int cp = Checkpoint (); int test_ptr = ptr; if (Eval (Mode.Match, ref test_ptr, pc + 3)) pc += program[pc + 1]; else { Backtrack (cp); pc += program[pc + 2]; } break; } case OpCode.Branch: { OpCode branch_op; do { int cp = Checkpoint (); if (Eval (Mode.Match, ref ptr, pc + 2)) goto Pass; Backtrack (cp); pc += program[pc + 1]; branch_op = (OpCode)(program[pc] & 0xff); } while (branch_op != OpCode.False); goto Fail; } case OpCode.Jump: { pc += program[pc + 1]; break; } case OpCode.Repeat: { this.repeat = new RepeatContext ( this.repeat, // previous context program[pc + 2], // minimum program[pc + 3], // maximum (flags & OpFlags.Lazy) != 0, // lazy pc + 4 // subexpression ); if (Eval (Mode.Match, ref ptr, pc + program[pc + 1])) goto Pass; else { this.repeat = this.repeat.Previous; goto Fail; } } case OpCode.Until: { RepeatContext current = this.repeat; int start = current.Start; if (!current.IsMinimum) { ++ current.Count; current.Start = ptr; if (Eval (Mode.Match, ref ptr, repeat.Expression)) goto Pass; current.Start = start; -- current.Count; goto Fail; } if (ptr == current.Start) { // degenerate match ... match tail or fail this.repeat = current.Previous; if (Eval (Mode.Match, ref ptr, pc + 1)) goto Pass; this.repeat = current; goto Fail; } if (current.IsLazy) { // match tail first ... this.repeat = current.Previous; int cp = Checkpoint (); if (Eval (Mode.Match, ref ptr, pc + 1)) goto Pass; Backtrack (cp); // ... then match more this.repeat = current; if (!current.IsMaximum) { ++ current.Count; current.Start = ptr; if (Eval (Mode.Match, ref ptr, current.Expression)) goto Pass; current.Start = start; -- current.Count; goto Fail; } return false; } else { // match more first ... if (!current.IsMaximum) { int cp = Checkpoint (); ++ current.Count; current.Start = ptr; if (Eval (Mode.Match, ref ptr, current.Expression)) goto Pass; current.Start = start; -- current.Count; Backtrack (cp); } // ... then match tail this.repeat = current.Previous; if (Eval (Mode.Match, ref ptr, pc + 1)) goto Pass; this.repeat = current; goto Fail; } } case OpCode.FastRepeat: { this.fast = new RepeatContext ( fast, program[pc + 2], // minimum program[pc + 3], // maximum (flags & OpFlags.Lazy) != 0, // lazy pc + 4 // subexpression ); fast.Start = ptr; int cp = Checkpoint (); pc += program[pc + 1]; // tail expression ushort tail_word = program[pc]; int c1, c2; // first character of tail operator int coff; // 0 or -1 depending on direction OpCode tail_op = (OpCode)(tail_word & 0xff); if (tail_op == OpCode.Character || tail_op == OpCode.String) { OpFlags tail_flags = (OpFlags)(tail_word & 0xff00); if (tail_op == OpCode.String) c1 = program[pc + 2]; // first char of string else c1 = program[pc + 1]; // character if ((tail_flags & OpFlags.IgnoreCase) != 0) c2 = Char.ToUpper ((char)c1); // ignore case else c2 = c1; if ((tail_flags & OpFlags.RightToLeft) != 0) coff = -1; // reverse else coff = 0; } else { c1 = c2 = -1; coff = 0; }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?