scanner.cs
来自「全功能c#编译器」· CS 代码 · 共 689 行
CS
689 行
using System;
using System.IO;
using System.Collections;
using System.Text;
namespace at.jku.ssw.Coco {
public class Token {
public int kind; // token kind
public int pos; // token position in the source text (starting at 0)
public int col; // token column (starting at 0)
public int line; // token line (starting at 1)
public string val; // token value
public Token next; // AW 2003-03-07 Tokens are kept in linked list
public Token () { }
public Token (int kind) { this.kind = kind; }
}
public class Buffer {
public const int eof = '\uffff';
static byte[] buf;
static int bufLen;
static int pos;
public static string fileName;
public static int CountLines(int offset)
{
int line = 0;
for (int i = 0; i <= offset; ++i) {
if (buf[i] == '\n') {
++line;
}
}
return line;
}
public static void Fill (string fileName) {
Buffer.fileName = fileName;
FileStream s = null;
try {
s = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read);
bufLen = (int) s.Length;
buf = new byte[bufLen];
s.Read(buf, 0, bufLen);
pos = 0;
} catch (IOException) {
Console.WriteLine("--- Cannot open file {0}", fileName);
System.Environment.Exit(0);
} finally {
if (s != null) s.Close();
}
}
public static int Read () {
if (pos < bufLen) return buf[pos++];
else return 0;
}
public static int Peek () {
if (pos < bufLen) return buf[pos];
else return 0;
}
/* AW 2003-03-10 moved this from ParserGen.cs */
public static string GetString (int beg, int end) {
StringBuilder s = new StringBuilder(64);
int oldPos = Buffer.Pos;
Buffer.Pos = beg;
while (beg < end) { s.Append((char)Buffer.Read()); beg++; }
Buffer.Pos = oldPos;
return s.ToString();
}
public static int Pos {
get { return pos; }
set {
if (value < 0) pos = 0;
else if (value >= bufLen) pos = bufLen;
else pos = value;
}
}
}
public class Scanner {
const char EOF = '\0';
const char EOL = '\n';
const int maxT = 42;
const int noSym = 42;
static short[] start = {
32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 27, 11, 0, 10, 0, 0, 5, 21, 22, 0, 15, 0, 16, 14, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 31, 18, 13, 19, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 23, 0, 24, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 25, 20, 26, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0};
static Token t; // current token
static char ch; // current input character
static int pos; // column number of current character
static int line; // line number of current character
static int lineStart; // start position of current line
static int oldEols; // EOLs that appeared in a comment;
static BitArray ignore; // set of characters to be ignored by the scanner
/* ML ----- begin */
static Token tokens; // the complete input token stream
static Token pt; // current peek token
static int peekCount = 0;
public static int PeekCount { get { return peekCount; } }
public static void Init (String fileName) {
Buffer.Fill(fileName);
pos = -1; line = 1; lineStart = 0;
oldEols = 0;
NextCh();
ignore = new BitArray(256);
ignore[9] = true; ignore[10] = true; ignore[13] = true; ignore[32] = true;
/* AW 2003-03-07 fill token list */
tokens = new Token(); // first token is a dummy
Token node = tokens;
do {
node.next = NextToken();
node = node.next;
} while (node.kind != 0); /* AW: 0 => EOF */
t = pt = tokens;
}
static void NextCh() {
if (oldEols > 0) { ch = EOL; oldEols--; }
else {
ch = (char)Buffer.Read(); pos++;
// replace isolated '\r' by '\n' in order to make
// eol handling uniform across Windows, Unix and Mac
if (ch == '\r' && Buffer.Peek() != '\n') ch = EOL;
else if (ch > '\u007f') ch = '?';
if (ch == EOL) { line++; lineStart = pos + 1; }
}
}
static bool Comment0() {
int level = 1, line0 = line, lineStart0 = lineStart;
NextCh();
if (ch == '*') {
NextCh();
for(;;) {
if (ch == '*') {
NextCh();
if (ch == '/') {
level--;
if (level == 0) { oldEols = line - line0; NextCh(); return true; }
NextCh();
}
} else if (ch == '/') {
NextCh();
if (ch == '*') {
level++; NextCh();
}
} else if (ch == EOF) return false;
else NextCh();
}
} else {
if (ch==EOL) {line--; lineStart = lineStart0;}
pos = pos - 2; Buffer.Pos = pos+1; NextCh();
}
return false;
}
static void CheckLiteral() {
switch (t.val) {
case "COMPILER": t.kind = 6; break;
case "PRODUCTIONS": t.kind = 7; break;
case "END": t.kind = 10; break;
case "CHARACTERS": t.kind = 11; break;
case "TOKENS": t.kind = 12; break;
case "PRAGMAS": t.kind = 13; break;
case "COMMENTS": t.kind = 14; break;
case "FROM": t.kind = 15; break;
case "TO": t.kind = 16; break;
case "NESTED": t.kind = 17; break;
case "IGNORE": t.kind = 18; break;
case "TOKENNAMES": t.kind = 19; break;
case "ANY": t.kind = 23; break;
case "WEAK": t.kind = 27; break;
case "SYNC": t.kind = 34; break;
case "IF": t.kind = 35; break;
case "CONTEXT": t.kind = 37; break;
case "using": t.kind = 40; break;
default: break;
}
}
/* AW Scan() renamed to NextToken() */
static Token NextToken() {
while (ignore[ch]) NextCh();
if (ch == '/' && Comment0()) return NextToken();
t = new Token();
t.pos = pos; t.col = pos - lineStart + 1; t.line = line;
int state = start[ch];
StringBuilder buf = new StringBuilder(16);
buf.Append(ch); NextCh();
switch (state) {
case 0: { t.kind = noSym; goto done; } // NextCh already done
case 1:
if ((ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z')) {buf.Append(ch); NextCh(); goto case 1;}
else {t.kind = 1; t.val = buf.ToString(); CheckLiteral(); return t;}
case 2:
if ((ch >= '0' && ch <= '9')) {buf.Append(ch); NextCh(); goto case 2;}
else {t.kind = 2; goto done;}
case 3:
{t.kind = 3; goto done;}
case 4:
{t.kind = 4; goto done;}
case 5:
if ((ch >= 1 && ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '&' || ch >= '(' && ch <= '[' || ch >= ']')) {buf.Append(ch); NextCh(); goto case 6;}
else if (ch == 92) {buf.Append(ch); NextCh(); goto case 7;}
else {t.kind = noSym; goto done;}
case 6:
if (ch == 39) {buf.Append(ch); NextCh(); goto case 9;}
else {t.kind = noSym; goto done;}
case 7:
if ((ch >= ' ' && ch <= '~')) {buf.Append(ch); NextCh(); goto case 8;}
else {t.kind = noSym; goto done;}
case 8:
if ((ch >= '0' && ch <= '9' || ch >= 'a' && ch <= 'f')) {buf.Append(ch); NextCh(); goto case 8;}
else if (ch == 39) {buf.Append(ch); NextCh(); goto case 9;}
else {t.kind = noSym; goto done;}
case 9:
{t.kind = 5; goto done;}
case 10:
if ((ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z')) {buf.Append(ch); NextCh(); goto case 10;}
else {t.kind = 43; goto done;}
case 11:
if ((ch >= 1 && ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '!' || ch >= '#' && ch <= '[' || ch >= ']')) {buf.Append(ch); NextCh(); goto case 11;}
else if ((ch == 10 || ch == 13)) {buf.Append(ch); NextCh(); goto case 4;}
else if (ch == '"') {buf.Append(ch); NextCh(); goto case 3;}
else if (ch == 92) {buf.Append(ch); NextCh(); goto case 12;}
else {t.kind = noSym; goto done;}
case 12:
if ((ch >= ' ' && ch <= '~')) {buf.Append(ch); NextCh(); goto case 11;}
else {t.kind = noSym; goto done;}
case 13:
{t.kind = 8; goto done;}
case 14:
if (ch == '.') {buf.Append(ch); NextCh(); goto case 17;}
else if (ch == ')') {buf.Append(ch); NextCh(); goto case 30;}
else {t.kind = 9; goto done;}
case 15:
{t.kind = 20; goto done;}
case 16:
{t.kind = 21; goto done;}
case 17:
{t.kind = 22; goto done;}
case 18:
{t.kind = 24; goto done;}
case 19:
{t.kind = 25; goto done;}
case 20:
{t.kind = 26; goto done;}
case 21:
if (ch == '.') {buf.Append(ch); NextCh(); goto case 29;}
else {t.kind = 28; goto done;}
case 22:
{t.kind = 29; goto done;}
case 23:
{t.kind = 30; goto done;}
case 24:
{t.kind = 31; goto done;}
case 25:
{t.kind = 32; goto done;}
case 26:
{t.kind = 33; goto done;}
case 27:
if (ch == '=') {buf.Append(ch); NextCh(); goto case 28;}
else {t.kind = noSym; goto done;}
case 28:
{t.kind = 36; goto done;}
case 29:
{t.kind = 38; goto done;}
case 30:
{t.kind = 39; goto done;}
case 31:
{t.kind = 41; goto done;}
case 32: {t.kind = 0; goto done;}
}
done:
t.val = buf.ToString();
return t;
}
/* AW 2003-03-07 get the next token, move on and synch peek token with current */
public static Token Scan () {
t = pt = t.next;
return t;
}
/* AW 2003-03-07 get the next token, ignore pragmas */
public static Token Peek () {
do { // skip pragmas while peeking
pt = pt.next;
} while (pt != null && pt.kind > maxT);
return pt;
}
/* AW 2003-03-11 to make sure peek start at current scan position */
public static void StartPeek () { pt = t; }
} // end Scanner
public delegate void ErrorCodeProc (int line, int col, int n);
public delegate void ErrorMsgProc (int line, int col, string msg);
public class Errors {
public static int count = 0; // number of errors detected
public static ErrorCodeProc SynErr = new ErrorCodeProc(DefaultCodeError); // syntactic errors
public static ErrorCodeProc SemErr = new ErrorCodeProc(DefaultCodeError); // semantic errors
public static ErrorMsgProc Error = new ErrorMsgProc(DefaultMsgError); // user defined string based errors
public static void Exception (string s) {
Console.WriteLine(s);
System.Environment.Exit(0);
}
static void DefaultCodeError (int line, int col, int n) {
Console.WriteLine("-- line {0} col {1}: error {2}", line, col, n);
count++;
}
static void DefaultMsgError (int line, int col, string s) {
Console.WriteLine("-- line {0} col {1}: {2}", line, col, s);
count++;
}
} // Errors
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?