⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lexicalanalyse.java

📁 用Java编写的词法分析器
💻 JAVA
字号:
package pk_lexical;

import java.io.*;
import java.util.*;
import static java.lang.Character.*;

public class LexicalAnalyse {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		File sourceProgram = new File("test.c");
		new Lexical(sourceProgram);
	}
}

class Lexical {

	public Lexical(File source) {

		this.init();
		scanner(source);
	}
	
	private int isType = 0;
	private String Type = null;
	private String constType = null;
	
	private RandomAccessFile sourceFile = null;
	private BufferedWriter objectWriter = null;
	private BufferedWriter symbolTableWriter = null;
	private BufferedWriter constTableWriter = null;

	public void scanner(File source) {

		try {
			sourceFile = new RandomAccessFile(source, "rw");
			FileWriter objectFile = new FileWriter("tokenStream.txt");
			objectWriter = new BufferedWriter(objectFile);
			FileWriter symbolTableFile = new FileWriter("symbolTable.txt");
			symbolTableWriter = new BufferedWriter(symbolTableFile);
			FileWriter constTableFile = new FileWriter("constTable.txt");
			constTableWriter = new BufferedWriter(constTableFile);

			int symbolKey = 0;
			int constKey = 0;
			int id_entry = 0;
			int const_entry = 0;
			
			while ((code = sourceFile.read()) != -1) {
				current++;
				
				if (Character.isLetter((char) code)) {
					token = idSort();

					if (keyWordsTable.contains(token)) {
						Token tokenUnit = new Token(token, tokenMap);
						tokenStream.add(tokenUnit);
						if (token.equals("int") || token.equals("double")){
							isType = 1;
							if (token.equals("int"))
								Type = "int";
							else
								Type = "double";
						}
					}

					else {
						if (!symbolMap.containsKey(token)){
							symbolMap.put(token, symbolKey++);							
						}
						id_entry = (Integer) symbolMap.get(token);
						Token tokenUnit = new Token("ID", id_entry, true,
								tokenMap);
						if (isType == 1 && !symbolTypeMap.containsKey(id_entry))
							symbolTypeMap.put(id_entry, Type);
						tokenStream.add(tokenUnit);
					}
				}

				else if (Character.isDigit((char) code)) {
					token = constSort();

					if (token == null) {
						errorLine.add(lineno);
						/*do {
							code = sourceFile.read();
							current++;
						} while ((char) code != '\n');
*/
						continue;
					}
										
					if (!constMap.containsKey(token))
						constMap.put(token, constKey++);
					const_entry = (Integer) constMap.get(token);
					Token tokenUnit = new Token("CONST", const_entry, false, tokenMap);
					tokenStream.add(tokenUnit);
					
					if (constType != null && !constTypeMap.containsKey(const_entry)){
						constTypeMap.put(const_entry, constType);
					}
				}

				else if ((char) code == '/') {
					boolean isComment = commentSort();
					if (isComment == false) {
						while ((char) code != '\n') {
							code = sourceFile.read();
							current++;
						}
						current--;
						sourceFile.seek(current);
						errorLine.add(lineno);
					}
				}

				else if (isSpaceChar((char) code)
						|| ((char) code == '\n')
						|| ((char) code == '\t')
						|| ((char) code == '\r')) {
					if ((char) code == '\n')
						lineno++;
				}

				else {
					token = boardSort();

					if (token == null) {
						errorLine.add(lineno);
						continue;
					} else {
						Token tokenUnit = new Token(token, -1, false, tokenMap);
						tokenStream.add(tokenUnit);
					}
				}
			}
			outputTokenStream();
		} catch (FileNotFoundException e) {
			System.out.println("SourceFile not Found!");
			System.exit(0);
		} catch (IOException e) {
			e.printStackTrace();
		}

		finally {
			try {
				sourceFile.close();
				objectWriter.close();
				symbolTableWriter.close();
				constTableWriter.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}

	private void outputTokenStream() throws IOException {
		
		System.out.printf("%s\t%s\t%s\n", "TokenCode", "TokenSort", "Attribute");
		for (int i = 0; i < tokenStream.size(); i++) {
			System.out.printf("%d\t\t", tokenStream.get(i).getTokenCode());
			System.out.printf("%s\t\t", tokenStream.get(i).getTokenSort() + " ");
			System.out.printf("%d\n", tokenStream.get(i).getAttribute());
						
			objectWriter.write(Integer.toString(tokenStream.get(i).getTokenCode()) + " ");
			objectWriter.write(tokenStream.get(i).getTokenSort()+" ");
			objectWriter.write(Integer.toString(tokenStream.get(i).getAttribute()));
			objectWriter.newLine();			
		}
		
		Iterator<String> iterator = symbolMap.keySet().iterator();
		while (iterator.hasNext()){
			
			String item = iterator.next().toString();
			int entry = (Integer)symbolMap.get(item);
			symbolTableWriter.write(entry+" ");
			symbolTableWriter.write(item);

			if (!symbolTypeMap.isEmpty() && (symbolTypeMap.get(entry) != null)){
				
				symbolTableWriter.write(" "+symbolTypeMap.get(entry).toString());
			}
			symbolTableWriter.newLine();
		}
		
		iterator = constMap.keySet().iterator();
		while (iterator.hasNext()){
			
			String item = iterator.next().toString();
			int entry = (Integer)constMap.get(item);
			constTableWriter.write(entry+" ");
			constTableWriter.write(item);

			if (!constTypeMap.isEmpty() && (constTypeMap.get(entry) != null)){
				
				constTableWriter.write(" "+constTypeMap.get(entry).toString());
			}
			constTableWriter.newLine();
		}
		
		if (errorLine.isEmpty() == true){
			System.out.println("Successfully Compiled!");
			return;
		}
		else{
			for (int j = 0; j < errorLine.size(); j++)
				System.out.println("Error in line: "+errorLine.get(j));
		}
	}

	private String idSort() throws IOException {

		String token_id = "";

		do {
			token_id += (char) code;
			code = sourceFile.read();
			current++;
		} while (isLetterOrDigit((char) code));
		current--;
		sourceFile.seek(current);

		return token_id;
	}

	private String constSort() throws IOException {

		String token_const = "";
		boolean isError = false;
		int state = 0;

		do {			
			token_const += (char) code;
			switch (state) {
			case 0:
				if ((char) code == '.')
					state = 1;
				else if (isLetter((char)code))
					state = 3;
				else
					state = 0;
				break;
			case 1:
				if (isDigit((char) code))
					state = 2;
				else
					state = 3;
				break;
			case 2:
				if (isDigit((char) code))
					state = 2;
				else
					state = 3;
				break;
			case 3:
				break;
			default:
				break;
			}

			code = sourceFile.read();
			current++;
			
		} while (isDigit((char) code) || (char) code == '.' || isLetter((char)code));
		
		if (state == 3)
			isError = true;
		current--;
		sourceFile.seek(current);
		
		if (state == 0)
			constType = "int";
		else if(state == 2)
			constType = "double";
		
		return (isError == true) ? null : token_const;
	}

	private boolean commentSort() throws IOException {

		boolean isComment;

		code = sourceFile.read();
		current++;
		if ((char) code == '/') {
			while ((char) code != '\n') {
				code = sourceFile.read();
				current++;
			}
			isComment = true;
			current--;
			sourceFile.seek(current);
		} else
			isComment = false;

		return isComment;
	}

	private String boardSort() throws IOException {

		String token_board = null;

		switch ((char) code) {
		case '(':
			token_board = "SLP";
			break;
		case ')':
			token_board = "SRP";
			break;
		case '{':
			token_board = "LP";
			break;
		case '}':
			token_board = "RP";
			break;
		case '+':
			code = sourceFile.read();
			current++;
			if ((char) code == '+')
				token_board = "INC";
			else {
				// 回退一个字符
				current--;
				sourceFile.seek(current);
				token_board = "ADD";
			}
			break;
		case '-':
			code = sourceFile.read();
			current++;
			if ((char) code == '-')
				token_board = "DEC";
			else {
				// 回退一个字符
				current--;
				sourceFile.seek(current);
				token_board = "SUB";
			}
			break;
		case '*':
			token_board = "FETCH";
			break;
		case '<':
			code = sourceFile.read();
			current++;
			switch ((char) code) {
			case '>':
				token_board = "NE";
				break;
			case '=':
				token_board = "LE";
				break;
			default:
				// 回退一个字符
				current--;
				sourceFile.seek(current);
				token_board = "LT";
				break;
			}
			break;
		case '>':
			code = sourceFile.read();
			current++;
			if ((char) code == '=')
				token_board = "GE";
			else {
				// 回退一个字符
				current--;
				sourceFile.seek(current);
				token_board = "GT";
			}
			break;
		case '=':
			code = sourceFile.read();
			current++;
			if ((char) code == '=')
				token_board = "EQ";
			else {
				// 回退一个字符
				current--;
				sourceFile.seek(current);
				token_board = "EVAL";
			}
			break;
		case '&':
			token_board = "AND";
			break;
		case '|':
			token_board = "OR";
			break;
		case '!':
			token_board = "NOT";
			break;
		case ',':
			token_board = "COMMA";
			break;
		case ';':
			isType = 0;
			Type = null;
			constType = null;
			token_board = "SEMI";
			break;
		default:
			break;
		}
		
		return token_board;
	}
	
/*	public static final String InvalidIdInfo = "Invalid identification";
	public static final String IllegalInfo = "Illegal identification";
	*/
	private void init() {

		String keyWords = "int double while prog do mod "
				+ "if then else true false and";

		StringTokenizer keySplit = new StringTokenizer(keyWords, " ");

		int value = 1;
		while (keySplit.hasMoreTokens()) {
			String keyword = keySplit.nextToken();
			keyWordsTable.add(keyword);
			tokenMap.put(keyword, value++);
		}
		
		String boardSymbol = "ID CONST SLP SRP LP RP " +
				"ADD SUB FETCH INC DEC NE EQ LE GE " +
				"GT LT EVAL AND OR NOT SEMI COMMA";
		StringTokenizer boardSplit = new StringTokenizer(boardSymbol, " ");
		while (boardSplit.hasMoreTokens()) {
			String board = boardSplit.nextToken();
			tokenMap.put(board, value++);
		}
	}
	
	private int lineno = 1;
	private int current = 0;
	private int code;
	private String token = "";

	public ArrayList<Token> tokenStream = new ArrayList<Token>();
	private ArrayList<Integer> errorLine = new ArrayList<Integer>();
	//private ArrayList<String> errorInfomation = new ArrayList<String>();
	public HashMap<String, Integer> symbolMap = new LinkedHashMap<String, Integer>();
	public HashMap<Integer, String> symbolTypeMap = new LinkedHashMap<Integer, String>();
	public HashMap<String, Integer> constMap = new LinkedHashMap<String, Integer>();
	public HashMap<Integer, String> constTypeMap = new LinkedHashMap<Integer, String>();
	public HashSet<String> keyWordsTable = new HashSet<String>();
	public HashMap<String, Integer> tokenMap = new LinkedHashMap<String, Integer>();	
}

class Token {
	private int token_Code;
	private String token_Sort;
	private int attribute;
	private int id_Entry;
	private int const_Entry;
	private boolean isIdOrConst;// true: ID false: Const

	public int getTokenCode() {
		return token_Code;
	}

	public String getTokenSort() {
		return token_Sort;
	}

	public int getAttribute() {
		return attribute;
	}

	private void setTokenCode(String tokenSort, HashMap<String, Integer> tokenMap) {
		this.token_Code = (Integer) tokenMap.get(tokenSort);
	}

	private void setAttribute() {
		this.attribute = (isIdOrConst == true) ? id_Entry : const_Entry;
	}

	public Token(String tokenSort, int Attribute, boolean isId, HashMap<String, Integer> tokenMap) {

		this.token_Sort = tokenSort;
		this.setTokenCode(tokenSort, tokenMap);
		this.isIdOrConst = isId;

		if (isIdOrConst)
			this.id_Entry = Attribute;
		else
			this.const_Entry = Attribute;

		setAttribute();
	}

	public Token(String keyWord, HashMap<String, Integer> tokenMap) {

		this.token_Sort = keyWord;
		this.id_Entry = -1;
		this.isIdOrConst = true;
		this.setTokenCode(token_Sort, tokenMap);
		setAttribute();
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -