📄 lookaheadlexer.java

📁 SkipOOMiniJOOL教学语言的编译器前端
💻 JAVA
字号:
package edu.berkeley.cs164.lexer;import java.io.PushbackReader;import java.io.IOException;import java.util.Iterator;import java.util.Set;/** * LookaheadLexer will apply the maximal munch rule to find the * next token. Tokens returned are a pair of Token ID and lexeme String. * At the end of the input, Token.EOF is returned.  *  * If the input cannot be tokenized (i.e. doesn't match any tokens, an * UnmatchedException should be raised. *  */public class LookaheadLexer {	/** 	 * A Token is a pair containing the TOKEN id and the lexeme text.	 * This is what a LookaheadLexer returns to the user.	 */	public static class Token {		private int    id;     /** integer ID of a token type */		private String lexeme; /** string content of current token */		private int    lineNumber; /** 为便于调试时更好地报告错误，加一个行号成员 */				/**		 * Constant Token to represent the end of the file.		 */		public final static Token EOF = new Token(TokenMap.EOF, "EOF", 0);				protected Token(int id, String lexeme, int lineNum) {						this.id     = id;			this.lexeme = lexeme;			this.lineNumber = lineNum;		}				public int getId() { 			return id;		}		public String getLexeme() {			return lexeme;		}		public int getLineNumber() {			return lineNumber;		}		public String toString() {			return "line " + lineNumber + ": ID = " + id + "\t\"" + lexeme + "\"";		}	}		/**	 * Create a LookaheadLexer.	 * @param in    Input file to be lexed.	 * @param state NFA states for the lexer.	 */	public LookaheadLexer(PushbackReader in, NFAState state) {		this.in    = in;		this.start = state;	}		private PushbackReader in;    /** input file for lexing */	private NFAState       start; /** start state of the NFA */	/**	 * nextToken() will return the next token read in the input stream that matches,	 * using the maximal munch rule. At the end of input (end of file), nextToken()	 * will return Token.EOF.	 * 	 * @return Returns a Token representing the next token in the input stream. 	 * @throws LexerException is thrown if input does not match anything.	 */	public Token nextToken() throws LexerException {		/* 当前读入的字符 */		char c = 0;		/* 当前接受状态对应的Token的优先级 */		int curPriority = 0;		/* 若已有Token可以接受，则记录其尾部在curToken中的位置 */		int curPosition = 0;		/* 当前Token的暂存 */		String curToken = "";		/* 当前已有的Token对应的状态 */		NFAState curState = null;		/* 当前的状态集 */		Set currentStates = start.epsilonClose();		while (true) {			try {				c = (char) in.read();				curToken += c;			} catch (IOException e) {				e.printStackTrace();			}			//达到输入流结尾			//强制进行Token的提取			if ((c & 0xff) == 0xff) {				//输入流的最后一个字符也已分析完毕				if (curToken.length() == 1) {					return LookaheadLexer.Token.EOF;				}				break;			}			//获得当前状态经过c的epsilon空转换后所得的状态集			currentStates = NFAState.epsilonClosedNextStates(currentStates,					(char) c);			//NFA可不能接受当前Token，需要回退。			//此时curState为最终接受的Token对应的接受状态，			//curPriority为该Token的优先权，			//而curPosition则记录了该Token的长度，			//即curToken中0 ~ curPosition-1长度的字符串为Token，			//而curToken中多读进的部分则要在输入流中回退			if (currentStates.isEmpty()) {				break;			}			for (Iterator iter = currentStates.iterator(); iter.hasNext();) {				NFAState s = (NFAState) iter.next();				//如果当前状态集中有接受状态，则该状态对应的Token为NFA可匹配Token，需要对其进行记录				if (s.isFinal()) {					//总是记录优先级最高的					if (s.getPriority() > curPriority) {						curState = s;						curPriority = s.getPriority();						curPosition = curToken.length();					}				}			}			curPriority = 0;		}		//在输入流中回退curToken - Token的部分		try {			in.unread(curToken.toCharArray(), curPosition, curToken.length()					- curPosition);		} catch (IOException e) {			e.printStackTrace();		}		//达到不能接受的时候，NFA却未能匹配一个Token，需要抛出异常		if (curState == null) {			throw new UnmatchedException();		}		//运行curState的Action		curState.getAction().run();		//获得Token		Token t = new Token(curState.getTokenId(), curToken.substring(0,				curPosition), LexerCode.lineNum + 1);		//如果该Token需要忽略，则返回抛弃之，继续返回下一个Token		if (t.id == TokenMap.IGNORE) {			return nextToken();		} else {			return t;		} 	}}
💿 文件大小 305 K
👤 上传用户 philipsx
📂 所属分类编译器/解释器
🏷️ 相关标签

#SkipOOMiniJOOL #语言 #编译器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -