📄 lookaheadlexer.java
字号:
package edu.berkeley.cs164.lexer;import java.io.PushbackReader;import java.io.IOException;import java.util.Iterator;import java.util.Set;/** * LookaheadLexer will apply the maximal munch rule to find the * next token. Tokens returned are a pair of Token ID and lexeme String. * At the end of the input, Token.EOF is returned. * * If the input cannot be tokenized (i.e. doesn't match any tokens, an * UnmatchedException should be raised. * */public class LookaheadLexer { /** * A Token is a pair containing the TOKEN id and the lexeme text. * This is what a LookaheadLexer returns to the user. */ public static class Token { private int id; /** integer ID of a token type */ private String lexeme; /** string content of current token */ private int lineNumber; /** 为便于调试时更好地报告错误,加一个行号成员 */ /** * Constant Token to represent the end of the file. */ public final static Token EOF = new Token(TokenMap.EOF, "EOF", 0); protected Token(int id, String lexeme, int lineNum) { this.id = id; this.lexeme = lexeme; this.lineNumber = lineNum; } public int getId() { return id; } public String getLexeme() { return lexeme; } public int getLineNumber() { return lineNumber; } public String toString() { return "line " + lineNumber + ": ID = " + id + "\t\"" + lexeme + "\""; } } /** * Create a LookaheadLexer. * @param in Input file to be lexed. * @param state NFA states for the lexer. */ public LookaheadLexer(PushbackReader in, NFAState state) { this.in = in; this.start = state; } private PushbackReader in; /** input file for lexing */ private NFAState start; /** start state of the NFA */ /** * nextToken() will return the next token read in the input stream that matches, * using the maximal munch rule. At the end of input (end of file), nextToken() * will return Token.EOF. * * @return Returns a Token representing the next token in the input stream. * @throws LexerException is thrown if input does not match anything. */ public Token nextToken() throws LexerException { /* 当前读入的字符 */ char c = 0; /* 当前接受状态对应的Token的优先级 */ int curPriority = 0; /* 若已有Token可以接受,则记录其尾部在curToken中的位置 */ int curPosition = 0; /* 当前Token的暂存 */ String curToken = ""; /* 当前已有的Token对应的状态 */ NFAState curState = null; /* 当前的状态集 */ Set currentStates = start.epsilonClose(); while (true) { try { c = (char) in.read(); curToken += c; } catch (IOException e) { e.printStackTrace(); } //达到输入流结尾 //强制进行Token的提取 if ((c & 0xff) == 0xff) { //输入流的最后一个字符也已分析完毕 if (curToken.length() == 1) { return LookaheadLexer.Token.EOF; } break; } //获得当前状态经过c的epsilon空转换后所得的状态集 currentStates = NFAState.epsilonClosedNextStates(currentStates, (char) c); //NFA可不能接受当前Token,需要回退。 //此时curState为最终接受的Token对应的接受状态, //curPriority为该Token的优先权, //而curPosition则记录了该Token的长度, //即curToken中0 ~ curPosition-1长度的字符串为Token, //而curToken中多读进的部分则要在输入流中回退 if (currentStates.isEmpty()) { break; } for (Iterator iter = currentStates.iterator(); iter.hasNext();) { NFAState s = (NFAState) iter.next(); //如果当前状态集中有接受状态,则该状态对应的Token为NFA可匹配Token,需要对其进行记录 if (s.isFinal()) { //总是记录优先级最高的 if (s.getPriority() > curPriority) { curState = s; curPriority = s.getPriority(); curPosition = curToken.length(); } } } curPriority = 0; } //在输入流中回退curToken - Token的部分 try { in.unread(curToken.toCharArray(), curPosition, curToken.length() - curPosition); } catch (IOException e) { e.printStackTrace(); } //达到不能接受的时候,NFA却未能匹配一个Token,需要抛出异常 if (curState == null) { throw new UnmatchedException(); } //运行curState的Action curState.getAction().run(); //获得Token Token t = new Token(curState.getTokenId(), curToken.substring(0, curPosition), LexerCode.lineNum + 1); //如果该Token需要忽略,则返回抛弃之,继续返回下一个Token if (t.id == TokenMap.IGNORE) { return nextToken(); } else { return t; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -