⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scanner.java

📁 cocorj09-一个Java语言分析器
💻 JAVA
字号:
package Coco;
import java.io.*;
import java.util.*;

class Token {
	int kind;    // token kind
	int pos;     // token position in the source text (starting at 0)
	int col;     // token column (starting at 0)
	int line;    // token line (starting at 1)
	String str;  // exact string value
	String val;  // token string value (uppercase if ignoreCase)
}

class Buffer {

// Portability - use the following for Java 1.0
//	static byte[] buf;  // Java 1.0
// Portability - use the following for Java 1.1
//	static char[] buf;  // Java 1.1

	static char[] buf;  // Java 1.1

	static int bufLen;
	static int pos;
	static final int eof = 65535;

	static void Fill(String name) {
		try {
			File f = new File(name); bufLen = (int) f.length();

// Portability - use the following for Java 1.0
//			BufferedInputStream s = new BufferedInputStream(new FileInputStream(f), bufLen);
//			buf = new byte[bufLen];  // Java 1.0
// Portability - use the following for Java 1.1
//			BufferedReader s = new BufferedReader(new FileReader(f), bufLen);
//			buf = new char[bufLen];  // Java 1.1

			BufferedReader s = new BufferedReader(new FileReader(f), bufLen);
			buf = new char[bufLen];  // Java 1.1

			int n = s.read(buf); pos = 0;
		} catch (IOException e) {
			System.out.println("--- cannot open file " + name);
			System.exit(0);
		}
	}

	static void Set(int position) {
		if (position < 0) position = 0; else if (position >= bufLen) position = bufLen;
		pos = position;
	}

	static int read() {
		if (pos < bufLen) return (int) buf[pos++]; else return eof;
	}
}

class Scanner {

	static ErrorStream err;  // error messages

	private static final char EOF = '\0';
	private static final char CR  = '\r';
	private static final char LF  = '\n';
	private static final int noSym = 43;
	private static final int[] start = {
	 28,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	  0,  0,  6,  0,  5,  0,  0,  7, 13, 14,  0, 10, 19, 11,  9,  0,
	  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0, 15,  8, 20,  0,
	  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
	  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 17,  0, 18, 16,  1,
	  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
	  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 24, 23, 25,  0,  0,
	  0};

	private static Token t;        // current token
	private static char strCh;     // current input character (original)
	private static char ch;        // current input character (for token)
	private static char lastCh;    // last input character
	private static int pos;        // position of current character
	private static int line;       // line number of current character
	private static int lineStart;  // start position of current line
	private static BitSet ignore;  // set of characters to be ignored by the scanner
	private static int offset = 0; // 1 - MsDos, 0 - Unix/Mac

	static void Init (String file, ErrorStream e) {
		ignore = new BitSet(128);
		ignore.set(9); ignore.set(10); ignore.set(13); ignore.set(32); 
		
		err = e;
		Buffer.Fill(file);
		pos = -1; line = 1; lineStart = 0; lastCh = 0;
		NextCh();
	}

	static void Init (String file) {
		Init(file, new ErrorStream());
	}

	private static void NextCh() {
		lastCh = ch;
		strCh = (char) Buffer.read(); pos++;
		ch = strCh;
		if (ch == LF && lastCh == CR) offset = 1; // MS-Dos format
		if ((ch == CR) || (ch == LF) && (lastCh != CR)) {line++; lineStart = pos + 1;}
		if (ch > '\u007f') {
			if (ch == '\uffff') ch = EOF;
			else {
				Scanner.err.SemErr(-1, line, (pos + 1 - lineStart - offset));
				ch = ' ';
			}
		}
	}

	private static boolean Comment() {
		int level, line0, lineStart0; char startCh;
		level = 1; line0 = line; lineStart0 = lineStart;
		if (ch == '/') {
			NextCh();
			if (ch == '*') {
				NextCh();
				for(;;) {
					if (ch == '*') {
						NextCh();
						if (ch == '/') {
							level--;
							if (level == 0) {NextCh(); return true;}
							NextCh();
						}
					} else if (ch == '/') {
						NextCh();
						if (ch == '*') {
							level++; NextCh();
						}
					} else if (ch == EOF) return false;
					else NextCh();
				}
			} else {
				if (ch == CR || ch == LF) {line--; lineStart = lineStart0;}
				pos = pos - 2; Buffer.Set(pos+1); NextCh();
			}
		}

		return false;
	}

	private static void CheckLiteral(StringBuffer buf) {
		t.val = buf.toString();
		switch (t.val.charAt(0)) {
			case 'A': {
				if (t.val.equals("ANY")) t.kind = 23;
				break;}
			case 'C': {
				if (t.val.equals("CASE")) t.kind = 19;
				else if (t.val.equals("CHARACTERS")) t.kind = 10;
				else if (t.val.equals("CHR")) t.kind = 24;
				else if (t.val.equals("COMMENTS")) t.kind = 14;
				else if (t.val.equals("COMPILER")) t.kind = 5;
				else if (t.val.equals("CONTEXT")) t.kind = 40;
				break;}
			case 'E': {
				if (t.val.equals("END")) t.kind = 9;
				break;}
			case 'F': {
				if (t.val.equals("FROM")) t.kind = 15;
				break;}
			case 'I': {
				if (t.val.equals("IGNORE")) t.kind = 18;
				break;}
			case 'N': {
				if (t.val.equals("NAMES")) t.kind = 12;
				else if (t.val.equals("NESTED")) t.kind = 17;
				break;}
			case 'P': {
				if (t.val.equals("PRAGMAS")) t.kind = 13;
				else if (t.val.equals("PRODUCTIONS")) t.kind = 6;
				break;}
			case 'S': {
				if (t.val.equals("SYNC")) t.kind = 39;
				break;}
			case 'T': {
				if (t.val.equals("TO")) t.kind = 16;
				else if (t.val.equals("TOKENS")) t.kind = 11;
				break;}
			case 'W': {
				if (t.val.equals("WEAK")) t.kind = 36;
				break;}
		}
	}

	static Token Scan() {
		int state, apx;
		StringBuffer buf;
		while (ignore.get((int)ch)) NextCh();
		if ((ch == '/') && Comment()) return Scan();
		t = new Token();
		t.pos = pos; t.col = pos - lineStart + 1 - offset; t.line = line;
		buf = new StringBuffer();
		state = start[ch];
		apx = 0;
		loop: for (;;) {
			buf.append(strCh);
			NextCh();
			switch (state) {
				case 0:
					{t.kind = noSym; break loop;} // NextCh already done
				case 1:
					if (ch >= '0' && ch <= '9'
					  || ch >= 'A' && ch <= 'Z'
					  || ch == '_'
					  || ch >= 'a' && ch <= 'z') {break;}
					else {t.kind = 1; CheckLiteral(buf); break loop;}
				case 2:
					{t.kind = 2; break loop;}
				case 3:
					{t.kind = 3; break loop;}
				case 4:
					if (ch >= '0' && ch <= '9') {break;}
					else {t.kind = 4; break loop;}
				case 5:
					if (ch >= '0' && ch <= '9'
					  || ch >= 'A' && ch <= 'Z'
					  || ch == '_'
					  || ch >= 'a' && ch <= 'z') {break;}
					else {t.kind = 44; break loop;}
				case 6:
					if (ch <= 9
					  || ch >= 11 && ch <= 12
					  || ch >= 14 && ch <= '!'
					  || ch >= '#') {break;}
					else if (ch == 10
					  || ch == 13) {state = 3; break;}
					else if (ch == '"') {state = 2; break;}
					else {t.kind = noSym; break loop;}
				case 7:
					if (ch <= 9
					  || ch >= 11 && ch <= 12
					  || ch >= 14 && ch <= '&'
					  || ch >= '(') {break;}
					else if (ch == 10
					  || ch == 13) {state = 3; break;}
					else if (ch == 39) {state = 2; break;}
					else {t.kind = noSym; break loop;}
				case 8:
					{t.kind = 7; break loop;}
				case 9:
					if (ch == '.') {state = 12; break;}
					else if (ch == '>') {state = 22; break;}
					else if (ch == ')') {state = 27; break;}
					else {t.kind = 8; break loop;}
				case 10:
					{t.kind = 20; break loop;}
				case 11:
					{t.kind = 21; break loop;}
				case 12:
					{t.kind = 22; break loop;}
				case 13:
					if (ch == '.') {state = 26; break;}
					else {t.kind = 25; break loop;}
				case 14:
					{t.kind = 26; break loop;}
				case 15:
					if (ch == '.') {state = 21; break;}
					else {t.kind = 27; break loop;}
				case 16:
					{t.kind = 28; break loop;}
				case 17:
					{t.kind = 29; break loop;}
				case 18:
					{t.kind = 30; break loop;}
				case 19:
					{t.kind = 31; break loop;}
				case 20:
					{t.kind = 32; break loop;}
				case 21:
					{t.kind = 33; break loop;}
				case 22:
					{t.kind = 34; break loop;}
				case 23:
					{t.kind = 35; break loop;}
				case 24:
					{t.kind = 37; break loop;}
				case 25:
					{t.kind = 38; break loop;}
				case 26:
					{t.kind = 41; break loop;}
				case 27:
					{t.kind = 42; break loop;}
				case 28:
					{t.kind = 0; break loop;}
			}
		}
		t.str = buf.toString();
		t.val = t.str;
		return t;
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -