📄 lexer.java
字号:
package java_cup;import java_cup.runtime.Symbol;import java.util.Hashtable;/** This class implements a small scanner (aka lexical analyzer or lexer) for * the JavaCup specification. This scanner reads characters from standard * input (System.in) and returns integers corresponding to the terminal * number of the next Symbol. Once end of input is reached the EOF Symbol is * returned on every subsequent call.<p> * Symbols currently returned include: <pre> * Symbol Constant Returned Symbol Constant Returned * ------ ----------------- ------ ----------------- * "package" PACKAGE "import" IMPORT * "code" CODE "action" ACTION * "parser" PARSER "terminal" TERMINAL * "non" NON "init" INIT * "scan" SCAN "with" WITH * "start" START "precedence" PRECEDENCE * "left" LEFT "right" RIGHT * "nonassoc" NONASSOC "%prec PRECENT_PREC * [ LBRACK ] RBRACK * ; SEMI * , COMMA * STAR * . DOT : COLON * ::= COLON_COLON_EQUALS | BAR * identifier ID {:...:} CODE_STRING * </pre> * All symbol constants are defined in sym.java which is generated by * JavaCup from parser.cup.<p> * * In addition to the scanner proper (called first via init() then with * next_token() to get each Symbol) this class provides simple error and * warning routines and keeps a count of errors and warnings that is * publicly accessible.<p> * * This class is "static" (i.e., it has only static members and methods). * * @version last updated: 7/3/96 * @author Frank Flannery */public class lexer { /*-----------------------------------------------------------*/ /*--- Constructor(s) ----------------------------------------*/ /*-----------------------------------------------------------*/ /** The only constructor is private, so no instances can be created. */ private lexer() { } /*-----------------------------------------------------------*/ /*--- Static (Class) Variables ------------------------------*/ /*-----------------------------------------------------------*/ /** First character of lookahead. */ protected static int next_char; /** Second character of lookahead. */ protected static int next_char2; /** Second character of lookahead. */ protected static int next_char3; /** Second character of lookahead. */ protected static int next_char4; /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** EOF constant. */ protected static final int EOF_CHAR = -1; /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Table of keywords. Keywords are initially treated as identifiers. * Just before they are returned we look them up in this table to see if * they match one of the keywords. The string of the name is the key here, * which indexes Integer objects holding the symbol number. */ protected static Hashtable keywords = new Hashtable(23); /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Table of single character symbols. For ease of implementation, we * store all unambiguous single character Symbols in this table of Integer * objects keyed by Integer objects with the numerical value of the * appropriate char (currently Character objects have a bug which precludes * their use in tables). */ protected static Hashtable char_symbols = new Hashtable(11); /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Current line number for use in error messages. */ protected static int current_line = 1; /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Character position in current line. */ protected static int current_position = 1; /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Character position in current line. */ protected static int absolute_position = 1; /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Count of total errors detected so far. */ public static int error_count = 0; /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Count of warnings issued so far */ public static int warning_count = 0; /*-----------------------------------------------------------*/ /*--- Static Methods ----------------------------------------*/ /*-----------------------------------------------------------*/ /** Initialize the scanner. This sets up the keywords and char_symbols * tables and reads the first two characters of lookahead. */ public static void init() throws java.io.IOException { /* set up the keyword table */ keywords.put("package", new Integer(sym.PACKAGE)); keywords.put("import", new Integer(sym.IMPORT)); keywords.put("code", new Integer(sym.CODE)); keywords.put("action", new Integer(sym.ACTION)); keywords.put("parser", new Integer(sym.PARSER)); keywords.put("terminal", new Integer(sym.TERMINAL)); keywords.put("non", new Integer(sym.NON)); keywords.put("init", new Integer(sym.INIT)); keywords.put("scan", new Integer(sym.SCAN)); keywords.put("with", new Integer(sym.WITH)); keywords.put("start", new Integer(sym.START)); keywords.put("precedence", new Integer(sym.PRECEDENCE)); keywords.put("left", new Integer(sym.LEFT)); keywords.put("right", new Integer(sym.RIGHT)); keywords.put("nonassoc", new Integer(sym.NONASSOC)); /* set up the table of single character symbols */ char_symbols.put(new Integer(';'), new Integer(sym.SEMI)); char_symbols.put(new Integer(','), new Integer(sym.COMMA)); char_symbols.put(new Integer('*'), new Integer(sym.STAR)); char_symbols.put(new Integer('.'), new Integer(sym.DOT)); char_symbols.put(new Integer('|'), new Integer(sym.BAR)); char_symbols.put(new Integer('['), new Integer(sym.LBRACK)); char_symbols.put(new Integer(']'), new Integer(sym.RBRACK)); /* read two characters of lookahead */ next_char = System.in.read(); if (next_char == EOF_CHAR) { next_char2 = EOF_CHAR; next_char3 = EOF_CHAR; next_char4 = EOF_CHAR; } else { next_char2 = System.in.read(); if (next_char2 == EOF_CHAR) { next_char3 = EOF_CHAR; next_char4 = EOF_CHAR; } else { next_char3 = System.in.read(); if (next_char3 == EOF_CHAR) { next_char4 = EOF_CHAR; } else { next_char4 = System.in.read(); } } } } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Advance the scanner one character in the input stream. This moves * next_char2 to next_char and then reads a new next_char2. */ protected static void advance() throws java.io.IOException { int old_char; old_char = next_char; next_char = next_char2; if (next_char == EOF_CHAR) { next_char2 = EOF_CHAR; next_char3 = EOF_CHAR; next_char4 = EOF_CHAR; } else { next_char2 = next_char3; if (next_char2 == EOF_CHAR) { next_char3 = EOF_CHAR; next_char4 = EOF_CHAR; } else { next_char3 = next_char4; if (next_char3 == EOF_CHAR) { next_char4 = EOF_CHAR; } else { next_char4 = System.in.read(); } } } /* count this */ absolute_position++; current_position++; if (old_char == '\n') { current_line++; current_position = 1; } } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Emit an error message. The message will be marked with both the * current line number and the position in the line. Error messages * are printed on standard error (System.err). * @param message the message to print. */ public static void emit_error(String message) { System.err.println("Error at " + current_line + "(" + current_position + "): " + message); error_count++; } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Emit a warning message. The message will be marked with both the * current line number and the position in the line. Messages are * printed on standard error (System.err). * @param message the message to print. */ public static void emit_warn(String message) { System.err.println("Warning at " + current_line + "(" + current_position + "): " + message); warning_count++; } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Determine if a character is ok to start an id. * @param ch the character in question. */ protected static boolean id_start_char(int ch) { /* allow for % in identifiers. a hack to allow my %prec in. Should eventually make lex spec for this frankf */ return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch == '_'); // later need to deal with non-8-bit chars here } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Determine if a character is ok for the middle of an id. * @param ch the character in question. */ protected static boolean id_char(int ch) { return id_start_char(ch) || (ch >= '0' && ch <= '9'); } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Try to look up a single character symbol, returns -1 for not found. * @param ch the character in question. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -