📄 scanner.java
字号:
/**
* @(#)Scanner.java 1.36 03/01/23
*
* Copyright 2003 Sun Microsystems, Inc. All rights reserved.
* SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
*/
package com.sun.tools.javac.v8.parser;
import java.io.*;
import com.sun.tools.javac.v8.util.*;
/**
* 这个类就是gjc的词法分析实现类。
*/
public class Scanner implements Tokens, LayoutCharacters {
/**
* The token, set by nextToken().
*/
int token;
/**
* The token's position. pos = line << Position.LINESHIFT + col.
* Line and column numbers start at 1.
*/
int pos;
/**
* The last character position of the token.
*/
int endPos;
/**
* The last character position of the previous token.
*/
int prevEndPos;
/**
* The position where a lexical error occurred;
*/
int errPos = Position.NOPOS;
/**
* The name of an identifier or token:
*/
Name name;
/**
* The radix of a numeric literal token.
*/
int radix;
/**
* Has a @deprecated been encountered in last doc comment?
* this needs to be reset by client.
*/
boolean deprecatedFlag = false;
/**
* A character buffer for literals.
*/
private char[] sbuf = new char[128];
private int sp;
/**
* The input buffer, index of next chacter to be read,
* index of one past last character in buffer.
*/
private char[] buf;
private int bp;
private int buflen;
/**
* The current character.
*/
private char ch;//当前要处理的字符,下面的line参数和col参数还定义了字符的位置,可以为报错时显示信息提供方便
/**
* The line number position of the current character.
*/
private int line;
/**
* The column number position of the current character.
*/
private int col;
/**
* The buffer index of the last converted unicode character
*/
private int unicodeConversionBp = 0;
/**
* The log to be used for error reporting.
*/
private final Log log;
/**
* The name table.
*/
private final Name.Table names;//存放标示符名字的字母表
/**
* The keyword table.
*/
private final Keywords keywords;
/**
* Documentation string of the current token.
*/
String docComment = null;
/**
* Buffer for doc comment.
*/
private char[] buffer;
/**
* Number of characters in doc comment buffer.
*/
private int count;
/**
*gjc词法分析程序的入口和初始化都是在其构造函数Scanner中实现的,
*它实现了将文件流读入内存缓冲区,预置当前字符的位置,并采用nextToken()
*方法读入下一个标示符。
*文件读入实现方法:为了保证读入并编译任意大的文件,其采用了动态分配空间
*的方法首先分配一个空间,在监测到空间不足后,分配的空间翻倍,以此类推,
*一直到分配空间满足需求为止,这样确实可以满足任意大的文件,但是有可能会
*造成严重的空间浪费,而且文件越大,浪费越明显。至于改进办法,目前我还没有
*想清楚,如何在空间节约和提高运行速率之间达到一个完美的平衡。
*/
public Scanner(Context context, InputStream in, String encoding) {
super();
this.log = Log.instance(context);
this.names = Name.Table.instance(context);
this.keywords = Keywords.instance(context);
try {
int bufsize = in.available() + 1;
if (buf == null || buf.length < bufsize)
buf = new char[bufsize];
buflen = 0;
InputStreamReader reader =//获取数据流
(encoding == null) ? new InputStreamReader(in) :
new InputStreamReader(in, encoding);
while (true) {
int nread = reader.read(buf, buflen, buf.length - buflen);
if (nread < 0)
nread = 0;
buflen = buflen + nread;
if (buflen < buf.length)
break;
char[] newbuf = new char[buflen * 2];//空间扩容
System.arraycopy(buf, 0, newbuf, 0, buflen);//读入部分内容整体coppy,不清楚在文件很大的时候运行是否很慢
buf = newbuf;
}
} catch (UnsupportedEncodingException e) {
lexError("unsupported.encoding", encoding);
buf = new char[1];
buflen = 0;
}
catch (IOException e) {
lexError("io.exception", e.toString());
buf = new char[1];
buflen = 0;
}
buf[buflen] = EOI;
line = 1;
col = 0;
bp = -1;
scanChar();//检查字符,并读入。
nextToken();//读入下一个标示符。
}
/**
* Report an error at the given position using the provided argument.
*/
private void lexError(int pos, String msg, String arg) {
log.error(pos, msg, arg);
token = ERROR;
errPos = pos;
}
/**
* Report an error at the given position.
*/
private void lexError(int pos, String key) {
lexError(pos, key, null);
}
/**
* Report an error at the current token position.
*/
private void lexError(String key) {
lexError(pos, key, null);
}
/**
* Report an error at the current token position using the provided
* argument.
*/
private void lexError(String key, String arg) {
lexError(pos, key, arg);
}
/**
* Report a warning at the given position.
*/
private void lexWarning(int pos, String key) {
log.warning(pos, key);
}
/**
* 将ascII码表示的数字转为其真实的数值
*/
private int digit(int base) {
char c = ch;
int result = Character.digit(c, base);
if (result >= 0 && c > 127) {
lexWarning(pos + 1, "illegal.nonascii.digit");
ch = "0123456789abcdef".charAt(result);
}
return result;
}
/**
* 实现unicode输入转换,由于是编译原理实习,重点在于分析词法分析算法实现,
* 所以对于这些边缘化的点没有特别的关注,大概了解其用途即可,具体实现不必细究
*/
private void convertUnicode() {
int startcol = col;
if (ch == '\\') {
bp++;
ch = buf[bp];
col++;
if (ch == 'u') {
do {
bp++;
ch = buf[bp];
col++;
} while (ch == 'u')
;
int limit = bp + 3;
if (limit < buflen) {
int d = digit(16);
int code = d;
while (bp < limit && d >= 0) {
bp++;
ch = buf[bp];
col++;
d = digit(16);
code = (code << 4) + d;
}
if (d >= 0) {
ch = (char) code;
unicodeConversionBp = bp;
return;
}
}
lexError(Position.make(line, startcol), "illegal.unicode.esc");
} else {
bp--;
ch = '\\';
col--;
}
}
}
/**
* scanChar:实现的是字符读入的功能,因为数据已经在Scanner中读入了
* 内存缓冲区,所以读取字符就简单多了,但注意要维护当前字符所在的位置,
* 读取时移动指针,将下一个字符赋值给ch就可以了。
*/
private void scanChar() {
bp++;
ch = buf[bp];
switch (ch) {
case '\r':
col = 0;
line++;
break;
case '\n':
if (bp == 0 || buf[bp - 1] != '\r') {
col = 0;
line++;
}
break;
case '\t':
col = (col / TabInc * TabInc) + TabInc;
break;
case '\\':
col++;
convertUnicode();
break;
default:
col++;
break;
}
}
/**
* 读取注释的下一个字符,跳过//标志
*/
private void scanCommentChar() {
scanChar();
if (ch == '\\') {
if (buf[bp + 1] == '\\' && unicodeConversionBp != bp) {
bp++;
col++;
} else {
convertUnicode();
}
}
}
/**
* 强制给缓冲区扩容
*/
private void expandCommentBuffer() {
char[] newBuffer = new char[buffer.length * 2];
System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
buffer = newBuffer;
}
/**
*
* 读取注释中的下一个字符,跳过//符号。
*/
private void scanDocCommentChar() {
scanChar();
if (ch == '\\') {
if (buf[bp + 1] == '\\' && unicodeConversionBp != bp) {
if (count == buffer.length)
expandCommentBuffer();
buffer[count++] = ch;
bp++;
col++;
} else {
convertUnicode();
}
}
}
/**
* 像缓冲区中读入一个字符,并在空间不足的时候给缓冲区扩容,gjc在每次读入数据时都采用了
* 缓冲区容积翻倍的办法,真的是最好的解决方法么?
*/
private void putChar(char ch) {
if (sp == sbuf.length) {
char[] newsbuf = new char[sbuf.length * 2];
System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
sbuf = newsbuf;
}
sbuf[sp++] = ch;
}
/**
* 调试目的而输出字符
*/
private void dch() {
System.err.print((char) ch);
System.out.flush();
}
/**
* 读取字符和字符串中的下一个各种命令标志
*/
private void scanLitChar() {
if (ch == '\\') {
if (buf[bp + 1] == '\\' && unicodeConversionBp != bp) {
bp++;
col++;
putChar('\\');
scanChar();
} else {
scanChar();
switch (ch) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
char leadch = ch;
int oct = digit(8);
scanChar();
if ('0' <= ch && ch <= '7') {
oct = oct * 8 + digit(8);
scanChar();
if (leadch <= '3' && '0' <= ch && ch <= '7') {
oct = oct * 8 + digit(8);
scanChar();
}
}
putChar((char) oct);
break;
case 'b':
putChar('\b');//各种字符串中的表示符号的处理方法
scanChar();
break;
case 't':
putChar('\t');
scanChar();
break;
case 'n':
putChar('\n');
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -