📄 scanner.java

📁 java编译器gjc源码 java编译环境
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/**
 * @(#)Scanner.java	1.36 03/01/23
 *
 * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
 * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
 */
package com.sun.tools.javac.v8.parser;
import java.io.*;

import com.sun.tools.javac.v8.util.*;


/**
 * 这个类就是gjc的词法分析实现类。
 */
public class Scanner implements Tokens, LayoutCharacters {

    /**
     * The token, set by nextToken().
     */
    int token;

    /**
     * The token's position. pos = line << Position.LINESHIFT + col.
     *  Line and column numbers start at 1.
     */
    int pos;

    /**
     * The last character position of the token.
     */
    int endPos;

    /**
     * The last character position of the previous token.
     */
    int prevEndPos;

    /**
     * The position where a lexical error occurred;
     */
    int errPos = Position.NOPOS;

    /**
     * The name of an identifier or token:
     */
    Name name;

    /**
     * The radix of a numeric literal token.
     */
    int radix;

    /**
     * Has a @deprecated been encountered in last doc comment?
     *  this needs to be reset by client.
     */
    boolean deprecatedFlag = false;

    /**
     * A character buffer for literals.
     */
    private char[] sbuf = new char[128];
    private int sp;

    /**
     * The input buffer, index of next chacter to be read,
     *  index of one past last character in buffer.
     */
    private char[] buf;
    private int bp;
    private int buflen;

    /**
     * The current character.
     */
    private char ch;//当前要处理的字符，下面的line参数和col参数还定义了字符的位置，可以为报错时显示信息提供方便

    /**
     * The line number position of the current character.
     */
    private int line;

    /**
     * The column number position of the current character.
     */
    private int col;

    /**
     * The buffer index of the last converted unicode character
     */
    private int unicodeConversionBp = 0;

    /**
     * The log to be used for error reporting.
     */
    private final Log log;

    /**
     * The name table.
     */
    private final Name.Table names;//存放标示符名字的字母表

    /**
     * The keyword table.
     */
    private final Keywords keywords;

    /**
     * Documentation string of the current token.
     */
    String docComment = null;

    /**
     * Buffer for doc comment.
     */
    private char[] buffer;

    /**
     * Number of characters in doc comment buffer.
     */
    private int count;

    /**
     *gjc词法分析程序的入口和初始化都是在其构造函数Scanner中实现的，
     *它实现了将文件流读入内存缓冲区，预置当前字符的位置，并采用nextToken()
     *方法读入下一个标示符。
     *文件读入实现方法：为了保证读入并编译任意大的文件，其采用了动态分配空间
     *的方法首先分配一个空间，在监测到空间不足后，分配的空间翻倍，以此类推，
     *一直到分配空间满足需求为止，这样确实可以满足任意大的文件，但是有可能会
     *造成严重的空间浪费，而且文件越大，浪费越明显。至于改进办法，目前我还没有
     *想清楚，如何在空间节约和提高运行速率之间达到一个完美的平衡。
     */
    public Scanner(Context context, InputStream in, String encoding) {
        super();
        this.log = Log.instance(context);
        this.names = Name.Table.instance(context);
        this.keywords = Keywords.instance(context);
        try {
            int bufsize = in.available() + 1;
            if (buf == null || buf.length < bufsize)
                buf = new char[bufsize];
            buflen = 0;
            InputStreamReader reader =//获取数据流
                    (encoding == null) ? new InputStreamReader(in) :
                    new InputStreamReader(in, encoding);
            while (true) {
                int nread = reader.read(buf, buflen, buf.length - buflen);
                if (nread < 0)
                    nread = 0;
                buflen = buflen + nread;
                if (buflen < buf.length)
                    break;
                char[] newbuf = new char[buflen * 2];//空间扩容
                System.arraycopy(buf, 0, newbuf, 0, buflen);//读入部分内容整体coppy，不清楚在文件很大的时候运行是否很慢
                buf = newbuf;
            }
        } catch (UnsupportedEncodingException e) {
            lexError("unsupported.encoding", encoding);
            buf = new char[1];
            buflen = 0;
        }
        catch (IOException e) {
            lexError("io.exception", e.toString());
            buf = new char[1];
            buflen = 0;
        }
        buf[buflen] = EOI;
        line = 1;
        col = 0;
        bp = -1;
        scanChar();//检查字符，并读入。
        nextToken();//读入下一个标示符。
    }

    /**
      * Report an error at the given position using the provided argument.
      */
    private void lexError(int pos, String msg, String arg) {
        log.error(pos, msg, arg);
        token = ERROR;
        errPos = pos;
    }

    /**
      * Report an error at the given position.
      */
    private void lexError(int pos, String key) {
        lexError(pos, key, null);
    }

    /**
      * Report an error at the current token position.
      */
    private void lexError(String key) {
        lexError(pos, key, null);
    }

    /**
      * Report an error at the current token position using the provided
      *  argument.
      */
    private void lexError(String key, String arg) {
        lexError(pos, key, arg);
    }

    /**
      * Report a warning at the given position.
      */
    private void lexWarning(int pos, String key) {
        log.warning(pos, key);
    }

    /**
      * 将ascII码表示的数字转为其真实的数值
      */
    private int digit(int base) {
        char c = ch;
        int result = Character.digit(c, base);
        if (result >= 0 && c > 127) {
            lexWarning(pos + 1, "illegal.nonascii.digit");
            ch = "0123456789abcdef".charAt(result);
        }
        return result;
    }

    /**
      * 实现unicode输入转换，由于是编译原理实习，重点在于分析词法分析算法实现，
      * 所以对于这些边缘化的点没有特别的关注，大概了解其用途即可，具体实现不必细究
      */
    private void convertUnicode() {
        int startcol = col;
        if (ch == '\\') {
            bp++;
            ch = buf[bp];
            col++;
            if (ch == 'u') {
                do {
                    bp++;
                    ch = buf[bp];
                    col++;
                } while (ch == 'u')
                    ;
                int limit = bp + 3;
                if (limit < buflen) {
                    int d = digit(16);
                    int code = d;
                    while (bp < limit && d >= 0) {
                        bp++;
                        ch = buf[bp];
                        col++;
                        d = digit(16);
                        code = (code << 4) + d;
                    }
                    if (d >= 0) {
                        ch = (char) code;
                        unicodeConversionBp = bp;
                        return;
                    }
                }
                lexError(Position.make(line, startcol), "illegal.unicode.esc");
            } else {
                bp--;
                ch = '\\';
                col--;
            }
        }
    }

    /**
      * scanChar：实现的是字符读入的功能，因为数据已经在Scanner中读入了
      * 内存缓冲区，所以读取字符就简单多了，但注意要维护当前字符所在的位置，
      * 读取时移动指针，将下一个字符赋值给ch就可以了。
      */
    private void scanChar() {
        bp++;
        ch = buf[bp];
        switch (ch) {
        case '\r':
            col = 0;
            line++;
            break;

        case '\n':
            if (bp == 0 || buf[bp - 1] != '\r') {
                col = 0;
                line++;
            }
            break;

        case '\t':
            col = (col / TabInc * TabInc) + TabInc;
            break;

        case '\\':
            col++;
            convertUnicode();
            break;

        default:
            col++;
            break;

        }
    }

    /**
      * 读取注释的下一个字符，跳过//标志
      */
    private void scanCommentChar() {
        scanChar();
        if (ch == '\\') {
            if (buf[bp + 1] == '\\' && unicodeConversionBp != bp) {
                bp++;
                col++;
            } else {
                convertUnicode();
            }
        }
    }

    /**
      * 强制给缓冲区扩容
      */
    private void expandCommentBuffer() {
        char[] newBuffer = new char[buffer.length * 2];
        System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
        buffer = newBuffer;
    }

    /**
      *
      * 读取注释中的下一个字符，跳过//符号。
      */
    private void scanDocCommentChar() {
        scanChar();
        if (ch == '\\') {
            if (buf[bp + 1] == '\\' && unicodeConversionBp != bp) {
                if (count == buffer.length)
                    expandCommentBuffer();
                buffer[count++] = ch;
                bp++;
                col++;
            } else {
                convertUnicode();
            }
        }
    }

    /**
      * 像缓冲区中读入一个字符，并在空间不足的时候给缓冲区扩容，gjc在每次读入数据时都采用了
      * 缓冲区容积翻倍的办法，真的是最好的解决方法么？
      */
    private void putChar(char ch) {
        if (sp == sbuf.length) {
            char[] newsbuf = new char[sbuf.length * 2];
            System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
            sbuf = newsbuf;
        }
        sbuf[sp++] = ch;
    }

    /**
      * 调试目的而输出字符
      */
    private void dch() {
        System.err.print((char) ch);
        System.out.flush();
    }

    /**
      * 读取字符和字符串中的下一个各种命令标志
      */
    private void scanLitChar() {
        if (ch == '\\') {
            if (buf[bp + 1] == '\\' && unicodeConversionBp != bp) {
                bp++;
                col++;
                putChar('\\');
                scanChar();
            } else {
                scanChar();
                switch (ch) {
                case '0':

                case '1':

                case '2':

                case '3':

                case '4':

                case '5':

                case '6':

                case '7':
                    char leadch = ch;
                    int oct = digit(8);
                    scanChar();
                    if ('0' <= ch && ch <= '7') {
                        oct = oct * 8 + digit(8);
                        scanChar();
                        if (leadch <= '3' && '0' <= ch && ch <= '7') {
                            oct = oct * 8 + digit(8);
                            scanChar();
                        }
                    }
                    putChar((char) oct);
                    break;

                case 'b':
                    putChar('\b');//各种字符串中的表示符号的处理方法
                    scanChar();
                    break;

                case 't':
                    putChar('\t');
                    scanChar();
                    break;

                case 'n':
                    putChar('\n');
12 3 下一页
💿 文件大小 240 K
👤 上传用户 buptbaishikele
📂 所属分类编译器/解释器
🏷️ 相关标签

#java #gjc #编译器 #源码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -