📄 scanner.cs
字号:
/*----------------------------------------------------------------------
Compiler Generator Coco/R,
Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
extended by M. Loeberbauer & A. Woess, Univ. of Linz
with improvements by Pat Terry, Rhodes University
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
As an exception, it is allowed to write an extension of Coco/R that is
used as a plugin in non-free software.
If not otherwise stated, any source code generated by Coco/R (other than
Coco/R itself) does not fall under the GNU General Public License.
-----------------------------------------------------------------------*/
using System;
using System.IO;
using System.Collections;
using System.Text;
namespace at.jku.ssw.Coco {
public class Token {
public int kind; // token kind
public int pos; // token position in the source text (starting at 0)
public int col; // token column (starting at 0)
public int line; // token line (starting at 1)
public string val; // token value
public Token next; // AW 2003-03-07 Tokens are kept in linked list
}
public class Buffer {
public const char EOF = (char)256;
static byte[] buf;
static int bufLen;
static int pos;
// CHANGES by M.KRUEGER
public static int CountLines(int offset)
{
int line = 0;
for (int i = 0; i <= offset; ++i) {
if (buf[i] == '\n') {
++line;
}
}
return line;
}
//EOC
public static void Fill (Stream s) {
bufLen = (int) s.Length;
buf = new byte[bufLen];
s.Read(buf, 0, bufLen);
pos = 0;
}
public static int Read () {
if (pos < bufLen) return buf[pos++];
else return EOF; /* pdt */
}
public static int Peek () {
if (pos < bufLen) return buf[pos];
else return EOF; /* pdt */
}
/* AW 2003-03-10 moved this from ParserGen.cs */
public static string GetString (int beg, int end) {
StringBuilder s = new StringBuilder(64);
int oldPos = Buffer.Pos;
Buffer.Pos = beg;
while (beg < end) { s.Append((char)Buffer.Read()); beg++; }
Buffer.Pos = oldPos;
return s.ToString();
}
public static int Pos {
get { return pos; }
set {
if (value < 0) pos = 0;
else if (value >= bufLen) pos = bufLen;
else pos = value;
}
}
}
public class Scanner {
const char EOL = '\n';
const int eofSym = 0; /* pdt */
const int charSetSize = 256;
const int maxT = 41;
const int noSym = 41;
static short[] start = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 11, 0, 10, 0, 0, 5, 29, 20, 0, 14, 0, 15, 28, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 27, 17, 13, 18, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 21, 0, 22, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 23, 19, 24, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1};
static Token t; // current token
static char ch; // current input character
static int pos; // column number of current character
static int line; // line number of current character
static int lineStart; // start position of current line
static int oldEols; // EOLs that appeared in a comment;
static BitArray ignore; // set of characters to be ignored by the scanner
static Token tokens; // the complete input token stream
static Token pt; // current peek token
// CHANGES by M.KRUEGER
public static string fileName;
// EOC
public static void Init (string fileName) {
// CHANGES by M.KRUEGER
Scanner.fileName = fileName;
// EOC
FileStream s = null;
try {
s = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read);
Init(s);
} catch (IOException) {
Console.WriteLine("--- Cannot open file {0}", fileName);
System.Environment.Exit(1);
} finally {
if (s != null) s.Close();
}
}
public static void Init (Stream s) {
Buffer.Fill(s);
pos = -1; line = 1; lineStart = 0;
oldEols = 0;
NextCh();
ignore = new BitArray(charSetSize+1);
ignore[9] = true; ignore[10] = true; ignore[13] = true; ignore[32] = true;
//--- AW: fill token list
tokens = new Token(); // first token is a dummy
Token node = tokens;
do {
node.next = NextToken();
node = node.next;
} while (node.kind != eofSym);
node.next = node;
node.val = "EOF";
t = pt = tokens;
}
static void NextCh() {
if (oldEols > 0) { ch = EOL; oldEols--; }
else {
ch = (char)Buffer.Read(); pos++;
// replace isolated '\r' by '\n' in order to make
// eol handling uniform across Windows, Unix and Mac
if (ch == '\r' && Buffer.Peek() != '\n') ch = EOL;
if (ch == EOL) { line++; lineStart = pos + 1; }
}
}
static bool Comment0() {
int level = 1, line0 = line, lineStart0 = lineStart;
NextCh();
if (ch == '*') {
NextCh();
for(;;) {
if (ch == '*') {
NextCh();
if (ch == '/') {
level--;
if (level == 0) { oldEols = line - line0; NextCh(); return true; }
NextCh();
}
} else if (ch == '/') {
NextCh();
if (ch == '*') {
level++; NextCh();
}
} else if (ch == Buffer.EOF) return false;
else NextCh();
}
} else {
if (ch==EOL) {line--; lineStart = lineStart0;}
pos = pos - 2; Buffer.Pos = pos+1; NextCh();
}
return false;
}
static void CheckLiteral() {
switch (t.val) {
case "COMPILER": t.kind = 6; break;
case "IGNORECASE": t.kind = 7; break;
case "CHARACTERS": t.kind = 8; break;
case "TOKENS": t.kind = 9; break;
case "PRAGMAS": t.kind = 10; break;
case "COMMENTS": t.kind = 11; break;
case "FROM": t.kind = 12; break;
case "TO": t.kind = 13; break;
case "NESTED": t.kind = 14; break;
case "IGNORE": t.kind = 15; break;
case "PRODUCTIONS": t.kind = 16; break;
case "END": t.kind = 19; break;
case "ANY": t.kind = 23; break;
case "WEAK": t.kind = 27; break;
case "SYNC": t.kind = 34; break;
case "IF": t.kind = 35; break;
case "CONTEXT": t.kind = 36; break;
case "using": t.kind = 39; break;
default: break;
}
}
/* AW Scan() renamed to NextToken() */
static Token NextToken() {
while (ignore[ch]) NextCh();
if (ch == '/' && Comment0()) return NextToken();
t = new Token();
t.pos = pos; t.col = pos - lineStart + 1; t.line = line;
int state = start[ch];
StringBuilder buf = new StringBuilder(16);
buf.Append(ch); NextCh();
switch (state) {
case -1: { t.kind = eofSym; goto done; } // NextCh already done /* pdt */
case 0: { t.kind = noSym; goto done; } // NextCh already done
case 1:
if ((ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z')) {buf.Append(ch); NextCh(); goto case 1;}
else {t.kind = 1; t.val = buf.ToString(); CheckLiteral(); return t;}
case 2:
if ((ch >= '0' && ch <= '9')) {buf.Append(ch); NextCh(); goto case 2;}
else {t.kind = 2; goto done;}
case 3:
{t.kind = 3; goto done;}
case 4:
{t.kind = 4; goto done;}
case 5:
if ((ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '&' || ch >= '(' && ch <= '[' || ch >= ']')) {buf.Append(ch); NextCh(); goto case 6;}
else if (ch == 92) {buf.Append(ch); NextCh(); goto case 7;}
else {t.kind = noSym; goto done;}
case 6:
if (ch == 39) {buf.Append(ch); NextCh(); goto case 9;}
else {t.kind = noSym; goto done;}
case 7:
if ((ch >= ' ' && ch <= '~')) {buf.Append(ch); NextCh(); goto case 8;}
else {t.kind = noSym; goto done;}
case 8:
if ((ch >= '0' && ch <= '9' || ch >= 'a' && ch <= 'f')) {buf.Append(ch); NextCh(); goto case 8;}
else if (ch == 39) {buf.Append(ch); NextCh(); goto case 9;}
else {t.kind = noSym; goto done;}
case 9:
{t.kind = 5; goto done;}
case 10:
if ((ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z')) {buf.Append(ch); NextCh(); goto case 10;}
else {t.kind = 42; goto done;}
case 11:
if ((ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '!' || ch >= '#' && ch <= '[' || ch >= ']')) {buf.Append(ch); NextCh(); goto case 11;}
else if ((ch == 10 || ch == 13)) {buf.Append(ch); NextCh(); goto case 4;}
else if (ch == '"') {buf.Append(ch); NextCh(); goto case 3;}
else if (ch == 92) {buf.Append(ch); NextCh(); goto case 12;}
else {t.kind = noSym; goto done;}
case 12:
if ((ch >= ' ' && ch <= '~')) {buf.Append(ch); NextCh(); goto case 11;}
else {t.kind = noSym; goto done;}
case 13:
{t.kind = 17; goto done;}
case 14:
{t.kind = 20; goto done;}
case 15:
{t.kind = 21; goto done;}
case 16:
{t.kind = 22; goto done;}
case 17:
{t.kind = 24; goto done;}
case 18:
{t.kind = 25; goto done;}
case 19:
{t.kind = 26; goto done;}
case 20:
{t.kind = 29; goto done;}
case 21:
{t.kind = 30; goto done;}
case 22:
{t.kind = 31; goto done;}
case 23:
{t.kind = 32; goto done;}
case 24:
{t.kind = 33; goto done;}
case 25:
{t.kind = 37; goto done;}
case 26:
{t.kind = 38; goto done;}
case 27:
{t.kind = 40; goto done;}
case 28:
if (ch == '.') {buf.Append(ch); NextCh(); goto case 16;}
else if (ch == ')') {buf.Append(ch); NextCh(); goto case 26;}
else {t.kind = 18; goto done;}
case 29:
if (ch == '.') {buf.Append(ch); NextCh(); goto case 25;}
else {t.kind = 28; goto done;}
}
done:
t.val = buf.ToString();
return t;
}
/* AW 2003-03-07 get the next token, move on and synch peek token with current */
public static Token Scan () {
t = pt = t.next;
return t;
}
/* AW 2003-03-07 get the next token, ignore pragmas */
public static Token Peek () {
do { // skip pragmas while peeking
pt = pt.next;
} while (pt.kind > maxT);
return pt;
}
/* AW 2003-03-11 to make sure peek start at current scan position */
public static void ResetPeek () { pt = t; }
} // end Scanner
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -