📄 scanner.java
字号:
/* * Copyright 1999-2006 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Sun designates this * particular file as subject to the "Classpath" exception as provided * by Sun in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. */package com.sun.tools.javac.parser;import java.io.*;import java.nio.*;import java.nio.ByteBuffer;import java.nio.charset.*;import java.nio.channels.*;import java.util.regex.*;import com.sun.tools.javac.util.*;import com.sun.tools.javac.code.Source;import static com.sun.tools.javac.parser.Token.*;import static com.sun.tools.javac.util.LayoutCharacters.*;/** The lexical analyzer maps an input stream consisting of * ASCII characters and Unicode escapes into a token sequence. * * <p><b>This is NOT part of any API supported by Sun Microsystems. If * you write code that depends on this, you do so at your own risk. * This code and its internal interfaces are subject to change or * deletion without notice.</b> */public class Scanner implements Lexer { private static boolean scannerDebug = false; /** A factory for creating scanners. */ public static class Factory { /** The context key for the scanner factory. */ public static final Context.Key<Scanner.Factory> scannerFactoryKey = new Context.Key<Scanner.Factory>(); /** Get the Factory instance for this context. */ public static Factory instance(Context context) { Factory instance = context.get(scannerFactoryKey); if (instance == null) instance = new Factory(context); return instance; } final Log log; final Name.Table names; final Source source; final Keywords keywords; /** Create a new scanner factory. */ protected Factory(Context context) { context.put(scannerFactoryKey, this); this.log = Log.instance(context); this.names = Name.Table.instance(context); this.source = Source.instance(context); this.keywords = Keywords.instance(context); } public Scanner newScanner(CharSequence input) { if (input instanceof CharBuffer) { return new Scanner(this, (CharBuffer)input); } else { char[] array = input.toString().toCharArray(); return newScanner(array, array.length); } } public Scanner newScanner(char[] input, int inputLength) { return new Scanner(this, input, inputLength); } } /* Output variables; set by nextToken(): */ /** The token, set by nextToken(). */ private Token token; /** Allow hex floating-point literals. */ private boolean allowHexFloats; /** The token's position, 0-based offset from beginning of text. */ private int pos; /** Character position just after the last character of the token. */ private int endPos; /** The last character position of the previous token. */ private int prevEndPos; /** The position where a lexical error occurred; */ private int errPos = Position.NOPOS; /** The name of an identifier or token: */ private Name name; /** The radix of a numeric literal token. */ private int radix; /** Has a @deprecated been encountered in last doc comment? * this needs to be reset by client. */ protected boolean deprecatedFlag = false; /** A character buffer for literals. */ private char[] sbuf = new char[128]; private int sp; /** The input buffer, index of next chacter to be read, * index of one past last character in buffer. */ private char[] buf; private int bp; private int buflen; private int eofPos; /** The current character. */ private char ch; /** The buffer index of the last converted unicode character */ private int unicodeConversionBp = -1; /** The log to be used for error reporting. */ private final Log log; /** The name table. */ private final Name.Table names; /** The keyword table. */ private final Keywords keywords; /** Common code for constructors. */ private Scanner(Factory fac) { this.log = fac.log; this.names = fac.names; this.keywords = fac.keywords; this.allowHexFloats = fac.source.allowHexFloats(); } private static final boolean hexFloatsWork = hexFloatsWork(); private static boolean hexFloatsWork() { try { Float.valueOf("0x1.0p1"); return true; } catch (NumberFormatException ex) { return false; } } /** Create a scanner from the input buffer. buffer must implement * array() and compact(), and remaining() must be less than limit(). */ protected Scanner(Factory fac, CharBuffer buffer) { this(fac, JavacFileManager.toArray(buffer), buffer.limit()); } /** * Create a scanner from the input array. This method might * modify the array. To avoid copying the input array, ensure * that {@code inputLength < input.length} or * {@code input[input.length -1]} is a white space character. * * @param fac the factory which created this Scanner * @param input the input, might be modified * @param inputLength the size of the input. * Must be positive and less than or equal to input.length. */ protected Scanner(Factory fac, char[] input, int inputLength) { this(fac); eofPos = inputLength; if (inputLength == input.length) { if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) { inputLength--; } else { char[] newInput = new char[inputLength + 1]; System.arraycopy(input, 0, newInput, 0, input.length); input = newInput; } } buf = input; buflen = inputLength; buf[buflen] = EOI; bp = -1; scanChar(); } /** Report an error at the given position using the provided arguments. */ private void lexError(int pos, String key, Object... args) { log.error(pos, key, args); token = ERROR; errPos = pos; } /** Report an error at the current token position using the provided * arguments. */ private void lexError(String key, Object... args) { lexError(pos, key, args); } /** Convert an ASCII digit from its base (8, 10, or 16) * to its value. */ private int digit(int base) { char c = ch; int result = Character.digit(c, base); if (result >= 0 && c > 0x7f) { lexError(pos+1, "illegal.nonascii.digit"); ch = "0123456789abcdef".charAt(result); } return result; } /** Convert unicode escape; bp points to initial '\' character * (Spec 3.3). */ private void convertUnicode() { if (ch == '\\' && unicodeConversionBp != bp) { bp++; ch = buf[bp]; if (ch == 'u') { do { bp++; ch = buf[bp]; } while (ch == 'u'); int limit = bp + 3; if (limit < buflen) { int d = digit(16); int code = d; while (bp < limit && d >= 0) { bp++; ch = buf[bp]; d = digit(16); code = (code << 4) + d; } if (d >= 0) { ch = (char)code; unicodeConversionBp = bp; return; } } lexError(bp, "illegal.unicode.esc"); } else { bp--; ch = '\\'; } } } /** Read next character. */ private void scanChar() { ch = buf[++bp]; if (ch == '\\') { convertUnicode(); } } /** Read next character in comment, skipping over double '\' characters. */ private void scanCommentChar() { scanChar(); if (ch == '\\') { if (buf[bp+1] == '\\' && unicodeConversionBp != bp) { bp++; } else { convertUnicode(); } } } /** Append a character to sbuf. */ private void putChar(char ch) { if (sp == sbuf.length) { char[] newsbuf = new char[sbuf.length * 2]; System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length); sbuf = newsbuf; } sbuf[sp++] = ch; } /** For debugging purposes: print character. */ private void dch() { System.err.print(ch); System.out.flush(); } /** Read next character in character or string literal and copy into sbuf. */ private void scanLitChar() { if (ch == '\\') { if (buf[bp+1] == '\\' && unicodeConversionBp != bp) { bp++; putChar('\\'); scanChar(); } else { scanChar(); switch (ch) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': char leadch = ch; int oct = digit(8); scanChar(); if ('0' <= ch && ch <= '7') { oct = oct * 8 + digit(8); scanChar(); if (leadch <= '3' && '0' <= ch && ch <= '7') { oct = oct * 8 + digit(8); scanChar(); } } putChar((char)oct); break; case 'b': putChar('\b'); scanChar(); break; case 't': putChar('\t'); scanChar(); break; case 'n': putChar('\n'); scanChar(); break; case 'f': putChar('\f'); scanChar(); break; case 'r': putChar('\r'); scanChar(); break; case '\'': putChar('\''); scanChar(); break; case '\"': putChar('\"'); scanChar(); break; case '\\': putChar('\\'); scanChar(); break; default: lexError(bp, "illegal.esc.char"); } } } else if (bp != buflen) { putChar(ch); scanChar(); } } /** Read fractional part of hexadecimal floating point number. */ private void scanHexExponentAndSuffix() { if (ch == 'p' || ch == 'P') { putChar(ch); scanChar(); if (ch == '+' || ch == '-') { putChar(ch); scanChar(); } if ('0' <= ch && ch <= '9') { do { putChar(ch); scanChar(); } while ('0' <= ch && ch <= '9'); if (!allowHexFloats) { lexError("unsupported.fp.lit"); allowHexFloats = true; } else if (!hexFloatsWork) lexError("unsupported.cross.fp.lit"); } else lexError("malformed.fp.lit"); } else { lexError("malformed.fp.lit"); } if (ch == 'f' || ch == 'F') { putChar(ch); scanChar(); token = FLOATLITERAL; } else { if (ch == 'd' || ch == 'D') { putChar(ch); scanChar(); } token = DOUBLELITERAL; } } /** Read fractional part of floating point number. */ private void scanFraction() { while (digit(10) >= 0) { putChar(ch); scanChar(); } int sp1 = sp; if (ch == 'e' || ch == 'E') { putChar(ch); scanChar(); if (ch == '+' || ch == '-') { putChar(ch); scanChar(); } if ('0' <= ch && ch <= '9') { do { putChar(ch); scanChar(); } while ('0' <= ch && ch <= '9'); return; } lexError("malformed.fp.lit"); sp = sp1; } } /** Read fractional part and 'd' or 'f' suffix of floating point number. */ private void scanFractionAndSuffix() { this.radix = 10; scanFraction(); if (ch == 'f' || ch == 'F') { putChar(ch); scanChar(); token = FLOATLITERAL; } else { if (ch == 'd' || ch == 'D') { putChar(ch); scanChar(); } token = DOUBLELITERAL; } } /** Read fractional part and 'd' or 'f' suffix of floating point number. */ private void scanHexFractionAndSuffix(boolean seendigit) { this.radix = 16; assert ch == '.'; putChar(ch); scanChar(); while (digit(16) >= 0) { seendigit = true; putChar(ch); scanChar(); } if (!seendigit) lexError("invalid.hex.number"); else scanHexExponentAndSuffix(); } /** Read a number. * @param radix The radix of the number; one of 8, 10, 16. */ private void scanNumber(int radix) { this.radix = radix; // for octal, allow base-10 digit in case it's a float literal int digitRadix = (radix <= 10) ? 10 : 16; boolean seendigit = false; while (digit(digitRadix) >= 0) { seendigit = true; putChar(ch); scanChar(); } if (radix == 16 && ch == '.') { scanHexFractionAndSuffix(seendigit); } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) { scanHexExponentAndSuffix(); } else if (radix <= 10 && ch == '.') { putChar(ch); scanChar(); scanFractionAndSuffix(); } else if (radix <= 10 && (ch == 'e' || ch == 'E' || ch == 'f' || ch == 'F' || ch == 'd' || ch == 'D')) { scanFractionAndSuffix(); } else { if (ch == 'l' || ch == 'L') { scanChar(); token = LONGLITERAL; } else { token = INTLITERAL; } } } /** Read an identifier. */ private void scanIdent() { boolean isJavaIdentifierPart; char high; do { if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch; // optimization, was: putChar(ch); scanChar(); switch (ch) { case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '$': case '_': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '\u0000': case '\u0001': case '\u0002': case '\u0003': case '\u0004': case '\u0005': case '\u0006': case '\u0007': case '\u0008': case '\u000E': case '\u000F': case '\u0010': case '\u0011': case '\u0012': case '\u0013': case '\u0014': case '\u0015': case '\u0016': case '\u0017': case '\u0018': case '\u0019': case '\u001B': case '\u007F': break; case '\u001A': // EOI is also a legal identifier part if (bp >= buflen) { name = names.fromChars(sbuf, 0, sp); token = keywords.key(name); return; } break; default: if (ch < '\u0080') { // all ASCII range chars already handled, above isJavaIdentifierPart = false; } else { high = scanSurrogates(); if (high != 0) { if (sp == sbuf.length) { putChar(high); } else { sbuf[sp++] = high;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -