interpreter.cs

来自「没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没」· CS 代码 · 共 959 行 · 第 1/2 页

CS
959
字号
//// assembly:	System// namespace:	System.Text.RegularExpressions// file:	interpreter.cs//// author:	Dan Lewis (dlewis@gmx.co.uk)// 		(c) 2002using System;using System.Collections;using System.Diagnostics;using System.Globalization;namespace System.Text.RegularExpressions {	class Interpreter : IMachine {		public Interpreter (ushort[] program) {			this.program = program;			this.qs = null;			// process info block			Debug.Assert ((OpCode)program[0] == OpCode.Info, "Regex", "Cant' find info block");			this.group_count = program[1] + 1;			this.match_min = program[2];			this.match_max = program[3];			// setup			this.program_start = 4;			this.groups = new int [group_count];		}		// IMachine implementation		public Match Scan (Regex regex, string text, int start, int end) {			this.text = text;			this.text_end = end;			this.scan_ptr = start;			if (Eval (Mode.Match, ref scan_ptr, program_start))				return GenerateMatch (regex);			return Match.Empty;		}		// private methods		private void Reset () {			ResetGroups ();			fast = repeat = null;		}		private bool Eval (Mode mode, ref int ref_ptr, int pc) {			int ptr = ref_ptr;		Begin:			for (;;) {				ushort word = program[pc];				OpCode op = (OpCode)(word & 0x00ff);				OpFlags flags = (OpFlags)(word & 0xff00);				switch (op) {				case OpCode.Anchor: {					int skip = program[pc + 1];					int anch_offset = program[pc + 2];					int anch_ptr = ptr + anch_offset;					int anch_end = text_end - match_min + anch_offset;	// maximum anchor position					// the general case for an anchoring expression is at the bottom, however we					// do some checks for the common cases before to save processing time. the current					// optimizer only outputs three types of anchoring expressions: fixed position,					// fixed substring, and no anchor.					OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff);					if (anch_op == OpCode.Position && skip == 6) {				// position anchor						// Anchor						// 	Position						//	True						switch ((Position)program[pc + 4]) {						case Position.StartOfString:							if (anch_ptr == 0) {								ptr = 0;								if (TryMatch (ref ptr, pc + skip))									goto Pass;							}							break;												case Position.StartOfLine:							if (anch_ptr == 0) {								ptr = 0;								if (TryMatch (ref ptr, pc + skip))									goto Pass;								++ anch_ptr;							}							while (anch_ptr <= anch_end) {								if (text[anch_ptr - 1] == '\n') {									ptr = anch_ptr - anch_offset;									if (TryMatch (ref ptr, pc + skip))										goto Pass;								}								++ anch_ptr;							}							break;												case Position.StartOfScan:							if (anch_ptr == scan_ptr) {								ptr = scan_ptr - anch_offset;								if (TryMatch (ref ptr, pc + skip))									goto Pass;							}							break;						default:							// FIXME							break;						}					}					else if (qs != null ||						(anch_op == OpCode.String && skip == 6 + program[pc + 4])) {	// substring anchor						// Anchor						//	String						//	True						if (qs == null) {							bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;							string substring = GetString (pc + 3);							qs = new QuickSearch (substring, ignore);						}						while (anch_ptr <= anch_end) {							anch_ptr = qs.Search (text, anch_ptr, anch_end);							if (anch_ptr < 0)								break;							ptr = anch_ptr - anch_offset;							if (TryMatch (ref ptr, pc + skip))								goto Pass;							++ anch_ptr;						}					}					else if (anch_op == OpCode.True) {					// no anchor						// Anchor						//	True						while (anch_ptr <= anch_end) {							ptr = anch_ptr;							if (TryMatch (ref ptr, pc + skip))								goto Pass;							++ anch_ptr;						}					}					else {									// general case						// Anchor						//	<expr>						//	True						while (anch_ptr <= anch_end) {							ptr = anch_ptr;							if (Eval (Mode.Match, ref ptr, pc + 3)) {								// anchor expression passed: try real expression at the correct offset								ptr = anch_ptr - anch_offset;								if (TryMatch (ref ptr, pc + skip))									goto Pass;							}							++ anch_ptr;						}					}					goto Fail;				}								case OpCode.False: {					goto Fail;				}				case OpCode.True: {					goto Pass;				}				case OpCode.Position: {					if (!IsPosition ((Position)program[pc + 1], ptr))						goto Fail;					pc += 2;					break;				}				case OpCode.String: {					bool reverse = (flags & OpFlags.RightToLeft) != 0;					bool ignore = (flags & OpFlags.IgnoreCase) != 0;					int len = program[pc + 1];					if (reverse) {						ptr -= len;						if (ptr < 0)							goto Fail;					}					else if (ptr + len > text_end)						goto Fail;					pc += 2;					for (int i = 0; i < len; ++ i) {						char c = text[ptr + i];						if (ignore)							c = Char.ToLower (c);						if (c != (char)program[pc ++])							goto Fail;					}					if (!reverse)						ptr += len;					break;				}				case OpCode.Reference: {					bool reverse = (flags & OpFlags.RightToLeft) != 0;					bool ignore = (flags & OpFlags.IgnoreCase) != 0;					int m = GetLastDefined (program [pc + 1]);					if (m < 0)						goto Fail;					int str = marks [m].Index;					int len = marks [m].Length;					if (reverse) {						ptr -= len;						if (ptr < 0)							goto Fail;					}					else if (ptr + len > text_end)						goto Fail;					pc += 2;					for (int i = 0; i < len; ++ i) {						if (ignore) {							if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))								goto Fail;						}						else {							if (text[ptr + i] != text[str + i])								goto Fail;						}					}					if (!reverse)						ptr += len;					break;				}				case OpCode.Character: case OpCode.Category:				case OpCode.Range: case OpCode.Set: {					if (!EvalChar (mode, ref ptr, ref pc, false))						goto Fail;					break;				}				case OpCode.In: {					int target = pc + program[pc + 1];					pc += 2;					if (!EvalChar (mode, ref ptr, ref pc, true))						goto Fail;					pc = target;					break;				}				case OpCode.Open: {					Open (program[pc + 1], ptr);					pc += 2;					break;				}				case OpCode.Close: {					Close (program[pc + 1], ptr);					pc += 2;					break;				}				case OpCode.Balance: {					Balance (program[pc + 1], program[pc + 2], ptr);					break;				}				case OpCode.IfDefined: {					int m = GetLastDefined (program [pc + 2]);					if (m < 0)						pc += program[pc + 1];					else						pc += 3;					break;				}				case OpCode.Sub: {					if (!Eval (Mode.Match, ref ptr, pc + 2))						goto Fail;					pc += program[pc + 1];					break;				}				case OpCode.Test: {					int cp = Checkpoint ();					int test_ptr = ptr;					if (Eval (Mode.Match, ref test_ptr, pc + 3))						pc += program[pc + 1];					else {						Backtrack (cp);						pc += program[pc + 2];					}					break;				}				case OpCode.Branch: {					OpCode branch_op;					do {						int cp = Checkpoint ();						if (Eval (Mode.Match, ref ptr, pc + 2))							goto Pass;												Backtrack (cp);												pc += program[pc + 1];						branch_op = (OpCode)(program[pc] & 0xff);					} while (branch_op != OpCode.False);					goto Fail;				}				case OpCode.Jump: {					pc += program[pc + 1];					break;				}				case OpCode.Repeat: {					this.repeat = new RepeatContext (						this.repeat,			// previous context						program[pc + 2],		// minimum						program[pc + 3],		// maximum						(flags & OpFlags.Lazy) != 0,	// lazy						pc + 4				// subexpression					);					if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))						goto Pass;					else {						this.repeat = this.repeat.Previous;						goto Fail;					}				}				case OpCode.Until: {					RepeatContext current = this.repeat;					int start = current.Start;					if (!current.IsMinimum) {						++ current.Count;						current.Start = ptr;						if (Eval (Mode.Match, ref ptr, repeat.Expression))							goto Pass;						current.Start = start;						-- current.Count;						goto Fail;					}					if (ptr == current.Start) {						// degenerate match ... match tail or fail						this.repeat = current.Previous;						if (Eval (Mode.Match, ref ptr, pc + 1))							goto Pass;											this.repeat = current;						goto Fail;					}					if (current.IsLazy) {						// match tail first ...						this.repeat = current.Previous;						int cp = Checkpoint ();						if (Eval (Mode.Match, ref ptr, pc + 1))							goto Pass;						Backtrack (cp);						// ... then match more						this.repeat = current;						if (!current.IsMaximum) {							++ current.Count;							current.Start = ptr;							if (Eval (Mode.Match, ref ptr, current.Expression))								goto Pass;							current.Start = start;							-- current.Count;							goto Fail;						}						return false;					}					else {						// match more first ...						if (!current.IsMaximum) {							int cp = Checkpoint ();							++ current.Count;							current.Start = ptr;							if (Eval (Mode.Match, ref ptr, current.Expression))								goto Pass;							current.Start = start;							-- current.Count;							Backtrack (cp);						}						// ... then match tail						this.repeat = current.Previous;						if (Eval (Mode.Match, ref ptr, pc + 1))							goto Pass;						this.repeat = current;						goto Fail;					}				}				case OpCode.FastRepeat: {					this.fast = new RepeatContext (						fast,						program[pc + 2],		// minimum						program[pc + 3],		// maximum						(flags & OpFlags.Lazy) != 0,	// lazy						pc + 4				// subexpression					);					fast.Start = ptr;					int cp = Checkpoint ();					pc += program[pc + 1];		// tail expression					ushort tail_word = program[pc];					int c1, c2;			// first character of tail operator					int coff;			// 0 or -1 depending on direction					OpCode tail_op = (OpCode)(tail_word & 0xff);					if (tail_op == OpCode.Character || tail_op == OpCode.String) {						OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);						if (tail_op == OpCode.String)							c1 = program[pc + 2];				// first char of string						else							c1 = program[pc + 1];				// character												if ((tail_flags & OpFlags.IgnoreCase) != 0)							c2 = Char.ToUpper ((char)c1);			// ignore case						else							c2 = c1;						if ((tail_flags & OpFlags.RightToLeft) != 0)							coff = -1;					// reverse						else							coff = 0;					}					else {						c1 = c2 = -1;						coff = 0;					}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?