📄 scanner.cs

📁 c#源代码
💻 CS
字号:
/*----------------------------------------------------------------------
Compiler Generator Coco/R,
Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
extended by M. Loeberbauer & A. Woess, Univ. of Linz
with improvements by Pat Terry, Rhodes University

This program is free software; you can redistribute it and/or modify it 
under the terms of the GNU General Public License as published by the 
Free Software Foundation; either version 2, or (at your option) any 
later version.

This program is distributed in the hope that it will be useful, but 
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
for more details.

You should have received a copy of the GNU General Public License along 
with this program; if not, write to the Free Software Foundation, Inc., 
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

As an exception, it is allowed to write an extension of Coco/R that is
used as a plugin in non-free software.

If not otherwise stated, any source code generated by Coco/R (other than 
Coco/R itself) does not fall under the GNU General Public License.
-----------------------------------------------------------------------*/

using System;
using System.IO;
using System.Collections;
using System.Text;

namespace at.jku.ssw.Coco {

public class Token {
	public int kind;    // token kind
	public int pos;     // token position in the source text (starting at 0)
	public int col;     // token column (starting at 0)
	public int line;    // token line (starting at 1)
	public string val;  // token value
	public Token next;  // AW 2003-03-07 Tokens are kept in linked list
}

public class Buffer {
	public const char EOF = (char)256;
	static byte[] buf;
	static int bufLen;
	static int pos;
	
	// CHANGES by M.KRUEGER
	public static int CountLines(int offset)
	{
		int line = 0;
		for (int i = 0; i <= offset; ++i) {
			if (buf[i] == '\n') {
				++line;
			}
		}
		return line;
	}	
	//EOC
	
	public static void Fill (Stream s) {
		bufLen = (int) s.Length;
		buf = new byte[bufLen];
		s.Read(buf, 0, bufLen); 
		pos = 0;
	}
	
	public static int Read () {
		if (pos < bufLen) return buf[pos++];
		else return EOF;                          /* pdt */
	}

	public static int Peek () {
		if (pos < bufLen) return buf[pos];
		else return EOF;                          /* pdt */
	}
	
	/* AW 2003-03-10 moved this from ParserGen.cs */
	public static string GetString (int beg, int end) {
		StringBuilder s = new StringBuilder(64);
		int oldPos = Buffer.Pos;
		Buffer.Pos = beg;
		while (beg < end) { s.Append((char)Buffer.Read()); beg++; }
		Buffer.Pos = oldPos;
		return s.ToString();
	}

	public static int Pos {
		get { return pos; }
		set {
			if (value < 0) pos = 0; 
			else if (value >= bufLen) pos = bufLen; 
			else pos = value;
		}
	}
}

public class Scanner {
	const char EOL = '\n';
	const int eofSym = 0; /* pdt */
	const int charSetSize = 256;
	const int maxT = 41;
	const int noSym = 41;
	static short[] start = {
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0, 11,  0, 10,  0,  0,  5, 29, 20,  0, 14,  0, 15, 28,  0,
	  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  0, 27, 17, 13, 18,  0,
	  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
	  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 21,  0, 22,  0,  0,
	  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
	  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 23, 19, 24,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  -1};


	static Token t;          // current token
	static char ch;          // current input character
	static int pos;          // column number of current character
	static int line;         // line number of current character
	static int lineStart;    // start position of current line
	static int oldEols;      // EOLs that appeared in a comment;
	static BitArray ignore;  // set of characters to be ignored by the scanner

	static Token tokens;     // the complete input token stream
	static Token pt;         // current peek token
	
	// CHANGES by M.KRUEGER
	public static string fileName;
	// EOC
	
	public static void Init (string fileName) {
		// CHANGES by M.KRUEGER
		Scanner.fileName = fileName;
		// EOC
		
		FileStream s = null;
		try {
			s = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read);
			Init(s);
		} catch (IOException) {
			Console.WriteLine("--- Cannot open file {0}", fileName);
			System.Environment.Exit(1);
		} finally {
			if (s != null) s.Close();
		}
	}
	
	public static void Init (Stream s) {
		Buffer.Fill(s);
		pos = -1; line = 1; lineStart = 0;
		oldEols = 0;
		NextCh();
		ignore = new BitArray(charSetSize+1);
		ignore[9] = true; ignore[10] = true; ignore[13] = true; ignore[32] = true; 
		
		//--- AW: fill token list
		tokens = new Token();  // first token is a dummy
		Token node = tokens;
		do {
			node.next = NextToken();
			node = node.next;
		} while (node.kind != eofSym);
		node.next = node;
		node.val = "EOF";
		t = pt = tokens;
	}
	
	static void NextCh() {
		if (oldEols > 0) { ch = EOL; oldEols--; } 
		else {
			ch = (char)Buffer.Read(); pos++;
			// replace isolated '\r' by '\n' in order to make
			// eol handling uniform across Windows, Unix and Mac
			if (ch == '\r' && Buffer.Peek() != '\n') ch = EOL;
			if (ch == EOL) { line++; lineStart = pos + 1; }
		}

	}


	static bool Comment0() {
		int level = 1, line0 = line, lineStart0 = lineStart;
		NextCh();
		if (ch == '*') {
			NextCh();
			for(;;) {
				if (ch == '*') {
					NextCh();
					if (ch == '/') {
						level--;
						if (level == 0) { oldEols = line - line0; NextCh(); return true; }
						NextCh();
					}
				} else if (ch == '/') {
					NextCh();
					if (ch == '*') {
						level++; NextCh();
					}
				} else if (ch == Buffer.EOF) return false;
				else NextCh();
			}
		} else {
			if (ch==EOL) {line--; lineStart = lineStart0;}
			pos = pos - 2; Buffer.Pos = pos+1; NextCh();
		}
		return false;
	}


	static void CheckLiteral() {
		switch (t.val) {
			case "COMPILER": t.kind = 6; break;
			case "IGNORECASE": t.kind = 7; break;
			case "CHARACTERS": t.kind = 8; break;
			case "TOKENS": t.kind = 9; break;
			case "PRAGMAS": t.kind = 10; break;
			case "COMMENTS": t.kind = 11; break;
			case "FROM": t.kind = 12; break;
			case "TO": t.kind = 13; break;
			case "NESTED": t.kind = 14; break;
			case "IGNORE": t.kind = 15; break;
			case "PRODUCTIONS": t.kind = 16; break;
			case "END": t.kind = 19; break;
			case "ANY": t.kind = 23; break;
			case "WEAK": t.kind = 27; break;
			case "SYNC": t.kind = 34; break;
			case "IF": t.kind = 35; break;
			case "CONTEXT": t.kind = 36; break;
			case "using": t.kind = 39; break;
			default: break;
		}
	}

	/* AW Scan() renamed to NextToken() */
	static Token NextToken() {
		while (ignore[ch]) NextCh();
		if (ch == '/' && Comment0()) return NextToken();
		t = new Token();
		t.pos = pos; t.col = pos - lineStart + 1; t.line = line; 
		int state = start[ch];
		StringBuilder buf = new StringBuilder(16);
		buf.Append(ch); NextCh();
		
		switch (state) {
			case -1: { t.kind = eofSym; goto done; } // NextCh already done /* pdt */
			case 0: { t.kind = noSym; goto done; }   // NextCh already done
			case 1:
				if ((ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z')) {buf.Append(ch); NextCh(); goto case 1;}
				else {t.kind = 1; t.val = buf.ToString(); CheckLiteral(); return t;}
			case 2:
				if ((ch >= '0' && ch <= '9')) {buf.Append(ch); NextCh(); goto case 2;}
				else {t.kind = 2; goto done;}
			case 3:
				{t.kind = 3; goto done;}
			case 4:
				{t.kind = 4; goto done;}
			case 5:
				if ((ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '&' || ch >= '(' && ch <= '[' || ch >= ']')) {buf.Append(ch); NextCh(); goto case 6;}
				else if (ch == 92) {buf.Append(ch); NextCh(); goto case 7;}
				else {t.kind = noSym; goto done;}
			case 6:
				if (ch == 39) {buf.Append(ch); NextCh(); goto case 9;}
				else {t.kind = noSym; goto done;}
			case 7:
				if ((ch >= ' ' && ch <= '~')) {buf.Append(ch); NextCh(); goto case 8;}
				else {t.kind = noSym; goto done;}
			case 8:
				if ((ch >= '0' && ch <= '9' || ch >= 'a' && ch <= 'f')) {buf.Append(ch); NextCh(); goto case 8;}
				else if (ch == 39) {buf.Append(ch); NextCh(); goto case 9;}
				else {t.kind = noSym; goto done;}
			case 9:
				{t.kind = 5; goto done;}
			case 10:
				if ((ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z')) {buf.Append(ch); NextCh(); goto case 10;}
				else {t.kind = 42; goto done;}
			case 11:
				if ((ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '!' || ch >= '#' && ch <= '[' || ch >= ']')) {buf.Append(ch); NextCh(); goto case 11;}
				else if ((ch == 10 || ch == 13)) {buf.Append(ch); NextCh(); goto case 4;}
				else if (ch == '"') {buf.Append(ch); NextCh(); goto case 3;}
				else if (ch == 92) {buf.Append(ch); NextCh(); goto case 12;}
				else {t.kind = noSym; goto done;}
			case 12:
				if ((ch >= ' ' && ch <= '~')) {buf.Append(ch); NextCh(); goto case 11;}
				else {t.kind = noSym; goto done;}
			case 13:
				{t.kind = 17; goto done;}
			case 14:
				{t.kind = 20; goto done;}
			case 15:
				{t.kind = 21; goto done;}
			case 16:
				{t.kind = 22; goto done;}
			case 17:
				{t.kind = 24; goto done;}
			case 18:
				{t.kind = 25; goto done;}
			case 19:
				{t.kind = 26; goto done;}
			case 20:
				{t.kind = 29; goto done;}
			case 21:
				{t.kind = 30; goto done;}
			case 22:
				{t.kind = 31; goto done;}
			case 23:
				{t.kind = 32; goto done;}
			case 24:
				{t.kind = 33; goto done;}
			case 25:
				{t.kind = 37; goto done;}
			case 26:
				{t.kind = 38; goto done;}
			case 27:
				{t.kind = 40; goto done;}
			case 28:
				if (ch == '.') {buf.Append(ch); NextCh(); goto case 16;}
				else if (ch == ')') {buf.Append(ch); NextCh(); goto case 26;}
				else {t.kind = 18; goto done;}
			case 29:
				if (ch == '.') {buf.Append(ch); NextCh(); goto case 25;}
				else {t.kind = 28; goto done;}

		}
		done: 
		t.val = buf.ToString();
		return t;
	}
	
	/* AW 2003-03-07 get the next token, move on and synch peek token with current */
	public static Token Scan () {
		t = pt = t.next;
		return t;
	}

	/* AW 2003-03-07 get the next token, ignore pragmas */
	public static Token Peek () {
		do {                      // skip pragmas while peeking
			pt = pt.next;
		} while (pt.kind > maxT);
		return pt;
	}
	
	/* AW 2003-03-11 to make sure peek start at current scan position */
	public static void ResetPeek () { pt = t; }

} // end Scanner

}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -