📄 lexicalanalyze.java
字号:
/*
* 吕渊 200532580144
* 使用工具:eclipse
* Java SE 6
*/
import java.util.*;
public class LexicalAnalyze {
private String inputString = "";
//存储内容、类型、行号
public LinkedList<Tokens> list = new LinkedList<Tokens>();
public ArrayList<Integer> tokenStart = new ArrayList<Integer>();
public ArrayList<Integer> tokenEnd = new ArrayList<Integer>();
//存入list2的内容
public final int CMMK = 1;//保留字
public final int CMMN = 2;//数字
public final int CMMS = 3;//符号
public final int CMMI = 4;//标识符
public final int CMME = 5;//错误
public final int CMMSP = 6;//空格
public boolean lexicalError = false;
//是否注释不完整
private boolean commentNotEnd = false;
private int commentLastAt = 0;
//是否出现命名错误
private boolean wrongNamed = false;
int start = 0;
//构造方法
public LexicalAnalyze (String inputString) {
this.inputString = inputString;
lexicalAnalyse();
if (commentNotEnd == true || wrongNamed == true) lexicalError = true;
}
//词法分析
private void lexicalAnalyse(){
int line = 1;
//对每行依次进行分析存入list
boolean hasMoreLines;
do{
hasMoreLines = false;
for (int i = 0; i < inputString.length(); i++) {
if (inputString.charAt(i)=='\n') {
hasMoreLines = true;
if (i != 0)
lineLexicalAnalyse(inputString.substring(0,i), line);
start++;
if (i != inputString.length()-1) {
inputString = inputString.substring(i+1);
line++;
}
else
inputString = "";
break;
}
}
if (hasMoreLines == false)
lineLexicalAnalyse(inputString, line);
}while (hasMoreLines == true);
//以下为找出满足注释条件的部分
boolean checkComments;
boolean isComment = false;
int cBegin = -1;//注释开始位置
int cEnd = -1;//注释结束位置
do{
checkComments = false;
for (int i = 0; i < list.size(); i++) {
//判断是否注释开始
if (isComment == false && i < list.size()-1 && list.get(i).token.equals("/")) {
if (list.get(i+1).token.equals("*")) {
isComment = true;
cBegin = i;//注释开始位置
}
else if (list.get(i+1).token.equals("/")) {
int commentLine = list.get(i).lineNo;
for (int j = i; j < list.size(); j++) {
if (list.get(j).lineNo == commentLine) {
list.remove(j);
j--;
}
else break;
}
}
}
//判断是否注释结束
if (isComment == true && i-cBegin>2 &&
list.get(i).token.equals("/") &&
list.get(i-1).token.equals("*")) {
cEnd = i;//注释结束位置
for (int j = 0; j < cEnd - cBegin + 1; j++)
list.remove(cBegin);
isComment = false;
checkComments = true;
break;
}
}
}while (checkComments == true);
//注释未结束
if (isComment == true) {
commentNotEnd = true;
commentLastAt = cBegin;
}
else {
for (int i = 0; i < list.size(); i++) {
if (list.get(i).type == CMMSP) {
list.remove(i);
i--;
}
else if (list.get(i).type == CMME) {
wrongNamed = true;
break;
}
}
}
}
//分析单行
private void lineLexicalAnalyse(String str, int line){
//转换Tab为空格
str = str.replace('\t', ' ');
StringTokenizer strT = new StringTokenizer(str, " +-*/=<>();{}[],", true);
while (strT.hasMoreTokens()) {
Tokens newToken = new Tokens();
//存储Token
String strTTemp = strT.nextToken();
newToken.token = strTTemp;
newToken.lineNo = line;
//以下为识别过程
//Token为空格
if (strTTemp.equals(" ")) {
newToken.type = CMMSP;
newToken.start = start;
start++;
newToken.end = start;
list.add(newToken);
}
else if (strTTemp.contains(".")) {
newToken.type = CMME;
newToken.start = start;
start += strTTemp.length();
newToken.end = start;
list.add(newToken);
}
else {
//检查是否数字
try {
Integer.parseInt(strTTemp);
newToken.type = CMMN;
newToken.start = start;
start += strTTemp.length();
newToken.end = start;
list.add(newToken);
}
//非数字情况
catch(Exception e) {
//检查是否符号
if (strTTemp.equals("+") || strTTemp.equals("-") || strTTemp.equals("*") ||
strTTemp.equals("/") || strTTemp.equals("=") || strTTemp.equals("(") ||
strTTemp.equals(")") || strTTemp.equals(";") || strTTemp.equals("{") ||
strTTemp.equals("}") || strTTemp.equals("<") || strTTemp.equals(">") ||
strTTemp.equals("[") || strTTemp.equals("]") || strTTemp.equals(",")) {
if (list.size()>0 && strTTemp.equals("=") &&
list.get(list.size()-1).token.equals("=")) {
newToken.token = "==";
newToken.type = CMMS;
newToken.start = start-1;
start++;
newToken.end = start;
list.set(list.size()-1, newToken);
}
else if (list.size()>0 && strTTemp.equals(">") &&
list.get(list.size()-1).token.equals("<")){
newToken.token = "<>";
newToken.type = CMMS;
newToken.start = start-1;
start++;
newToken.end = start;
list.set(list.size()-1, newToken);
}
else {
newToken.type = CMMS;
newToken.start = start;
start++;
newToken.end = start;
list.add(newToken);
}
}
//检查是否保留字
else if ("if".equals(strTTemp) || "else".equals(strTTemp) ||
"while".equals(strTTemp) || "read".equals(strTTemp) ||
"write".equals(strTTemp) || "int".equals(strTTemp) ||
"real".equals(strTTemp)) {
newToken.type = CMMK;
newToken.start = start;
start += strTTemp.length();
newToken.end = start;
list.add(newToken);
}
else {
//检查是否满足标志符命名规则
boolean isIndentifier = true;
if (Character.isDigit(strTTemp.charAt(0)) ||
strTTemp.charAt(strTTemp.length()-1)=='_')
isIndentifier = false;
if (isIndentifier)
for (int i = 0; i < strTTemp.length(); i++){
if( !( isLetter(strTTemp.charAt(i)) ||
Character.isDigit(strTTemp.charAt(i)) ||
strTTemp.charAt(i)=='_' ) ) {
isIndentifier = false;
break;
}
}
if (isIndentifier == true) { //满足标志符命名规则
newToken.type = CMMI;
newToken.start = start;
start += strTTemp.length();
newToken.end = start;
list.add(newToken);
}
else { //不满足标志符命名规则
newToken.type = CMME;
newToken.start = start;
start += strTTemp.length();
newToken.end = start;
list.add(newToken);
}
}
}
}
}
list.add(new Tokens());
}
//判断是否为字母
private boolean isLetter(char ch) {
if ((ch >= 65 && ch <= 90)||(ch >= 97 && ch <= 122))
return true;
else
return false;
}
//词法分析输出内容
public ArrayList<String> toArrayList() {
ArrayList<String> toArrayList = new ArrayList<String>();
if (commentNotEnd == true) {
toArrayList.add(" 注释未结束 #" + commentLastAt);
tokenStart.add(list.get(commentLastAt).start);
tokenEnd.add(list.get(commentLastAt+1).end);
return toArrayList;
}
if (wrongNamed == true) {
toArrayList.add("出现错误内容:");
tokenStart.add(-2);
tokenEnd.add(-2);
for (Tokens value: list)
if (value.type == CMME) {
toArrayList.add(" " + value.token + " #" + value.lineNo);
tokenStart.add(value.start);
tokenEnd.add(value.end);
}
return toArrayList;
}
String[] array = {"保留字","数字","符号","标识符"};
//按照行进行输出
int lineNo = 0;
for (Tokens value: list)
if (value.type != CMMSP) {
if (value.lineNo != lineNo) {
lineNo = value.lineNo;
toArrayList.add("第" + lineNo + "行:");
tokenStart.add(-2);
tokenEnd.add(-2);
}
toArrayList.add(" " + value.token + " ---------- " +
array[value.type-1] + " #" + value.lineNo);
tokenStart.add(value.start);
tokenEnd.add(value.end);
}
return toArrayList;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -