📄 lexer.java
字号:
package cmm.cmmcc;
import cmm.collections.*;
/**
* 词法分析器类,继承字符扫描器类,实现单词类型接口
* @author Huang Xuanxing
*
*/
public class Lexer extends CharScanner implements ITokenTypes {
private static int length;//总长度
private char curCh;//当前字符
//public int endPlace;
/**
* 构造函数,初始化词法分析器
* @param inStr
*/
public Lexer(String inStr) {
super(inStr);
length = inStr.length();
}
/**
* 获得输入的全部字符的总长度
* @return
*/
public static int getLength(){
return length;
}
/**
* 获得一个token
* @return
*/
public Token nextToken() {
Token tk = null;//返回的Token的对象
//tokenStart();
tk = scan();
if(tk.getType() == 99){//过滤注释内容
tk = scan();
}
return tk;
}
/**
* 逐个字符扫描
*@return
* *************************************
* *************************************
* 通过rest来控制结束有问题 并且永远读不到文件结束符
* *************************************
* *************************************
*/
/*
public Token scan(){
Token tk = null;
int rest = 0;//还未被扫描的字符数
rest = length - CharQueue.head - 1;
if (rest > 0){//输入的字符大于一个
while(rest > 0){//有剩余字符
curCh = nextChar();//获取当前字符
tokenStart();//记录当前Token的行号列号
if (('a' <= curCh && 'z' >= curCh) || ('A' <= curCh && 'Z' >= curCh)){//当前字符为字母,判断是否为标识符
tk = isIdentifier(CharQueue.head - 1, curCh);
rest = length - CharQueue.head;
break;
}//if
//当前字符为数字,判断是否为数字
else if ('0' == curCh || '1' == curCh || '2' == curCh || '3' == curCh || '4' == curCh
|| '5' == curCh || '6' == curCh || '7' == curCh || '8' == curCh || '9' == curCh)
{
tk = isNumber(CharQueue.head - 1, curCh);
rest = length - CharQueue.head;
break;
}//else if
//当前字符为单元字段
else if ('*' == curCh || '(' == curCh || ')' == curCh || ';' == curCh || '{' == curCh
|| '}' == curCh || ']' == curCh || '[' == curCh || ',' == curCh || '&' == curCh
|| '|' == curCh || '#' == curCh)
{
tk = isSingleSymbol(CharQueue.head - 1, curCh);
rest = length - CharQueue.head;
break;
}//else if
//当前字符为'+'或'-',进入判断是操作符号还是正负数的判断
else if('+' == curCh || '-' == curCh){
//////////
tk = isAddOrMinus(CharQueue.head - 1, curCh);
rest = length - CharQueue.head;
break;
}
//当前字符可能为多元字段
else if ('=' == curCh || '<' == curCh || '>' == curCh || '/' == curCh || '!' == curCh || '"' == curCh)
{
tk = isMulSymbol(CharQueue.head - 1, curCh);
rest = length - CharQueue.head;
break;
}//else if
else if (curCh == 'ん')
{
tk = new Token(EOF,Character.toString(curCh),tokenColumn,tokenLine);
break;
}
//其他字符
else
{
tk = isSpecialSymbol(CharQueue.head - 1, curCh);
rest = length - CharQueue.head;
break;
}//else
}//while
}//if输入的字符大于一个
else//只输入一个字符
{
curCh = nextChar();//获取当前字符
switch (curCh)
{
case '*':
case '(':
case ')':
case ';':
case '{':
case '}':
case ',':
case '|':
case '&':
case '[':
case ']':
case '#':
tk = isSingleSymbol(CharQueue.head - 1, curCh);
break;
case '+':
case '-':
tk = isAddOrMinus(CharQueue.head - 1, curCh);
break;
case '=':
case '<':
case '>':
case '/':
case '"':
case '!':
//多元字符
tk = isMulSymbol(CharQueue.head - 1, curCh);
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
tk = isNumber(CharQueue.head - 1, curCh);
break;
}//switch
if (('a' <= curCh && 'z' >= curCh) || ('A' <= curCh && 'Z' >= curCh))
{
tk = isIdentifier(CharQueue.head - 1, curCh);
}//if
else if ('0' != curCh && '1' != curCh && '2' != curCh && '3' != curCh && '4' != curCh && '5' != curCh
&& '6' != curCh && '7' != curCh && '8' != curCh && '9' != curCh && '+' != curCh && '-' != curCh
&& '*' != curCh && '/' != curCh && '(' != curCh && ')' != curCh && '{' != curCh && '}' != curCh
&& '=' != curCh && '<' != curCh && '>' != curCh && '[' != curCh && ']' != curCh && ';' != curCh
&& '!' != curCh && '&' != curCh && '|' != curCh)
{
tk = isSpecialSymbol(CharQueue.head - 1, curCh);
}//else if
}//else 只输入一个字符
return tk;
}//scan
*/
public Token scan(){
Token tk = null;
//输入的字符大于一个
if (length > 0){
//获取当前字符
curCh = nextChar();
//记录当前Token的行号列号
tokenStart();
//当前字符为字母,判断是否为标识符
if (('a' <= curCh && 'z' >= curCh) || ('A' <= curCh && 'Z' >= curCh))
{
tk = isIdentifier(CharQueue.head - 1, curCh);
}//if
//当前字符为数字,判断是否为数字
else if ('0' == curCh || '1' == curCh || '2' == curCh || '3' == curCh || '4' == curCh
|| '5' == curCh || '6' == curCh || '7' == curCh || '8' == curCh || '9' == curCh)
{
tk = isNumber(CharQueue.head - 1, curCh);
}//else if
//当前字符为单元字段
else if ('*' == curCh || '(' == curCh || ')' == curCh || ';' == curCh || '{' == curCh
|| '}' == curCh || ']' == curCh || '[' == curCh || ',' == curCh || '&' == curCh
|| '|' == curCh || '#' == curCh)
{
tk = isSingleSymbol(CharQueue.head - 1, curCh);
}//else if
//当前字符为'+'或'-',进入判断是操作符号还是正负数的判断
else if('+' == curCh || '-' == curCh)
{
tk = isAddOrMinus(CharQueue.head - 1, curCh);
}
//当前字符可能为多元字段
else if ('=' == curCh || '<' == curCh || '>' == curCh || '/' == curCh || '!' == curCh || '"' == curCh)
{
tk = isMulSymbol(CharQueue.head - 1, curCh);
}//else if
//当前字符为文件终结符
else if (curCh == 'ん')
{
tk = new Token(EOF,Character.toString(curCh),tokenColumn,tokenLine);
}//else if
//其他字符
else
{
tk = isSpecialSymbol(CharQueue.head - 1, curCh);
}//else
}//if
//如果什么都没输
else
{
tk = new Token (EOF,"ん",1,1);
}
return tk;
}//scan
/**
* 判断是否是单个运算符
* @param startPlace
* @param curCh
* @return
*/
public Token isSingleSymbol(int startPlace, char curCh)
{
Token tk = null;
switch (curCh){
case ',':
tk = new Token(COMMA,Character.toString(curCh),tokenColumn,tokenLine);
break;
case ';':
tk = new Token(SEMI,Character.toString(curCh),tokenColumn,tokenLine);
break;
case '(':
tk = new Token(LPAR,Character.toString(curCh),tokenColumn,tokenLine);
break;
case ')':
tk = new Token(RPAR,Character.toString(curCh),tokenColumn,tokenLine);
break;
case '[':
tk = new Token(LSQU,Character.toString(curCh),tokenColumn,tokenLine);
break;
case ']':
tk = new Token(RSQU,Character.toString(curCh),tokenColumn,tokenLine);
break;
case '{':
tk = new Token(LBRA,Character.toString(curCh),tokenColumn,tokenLine);
break;
case '}':
tk = new Token(RBRA,Character.toString(curCh),tokenColumn,tokenLine);
break;
case '*':
tk = new Token(MULT,Character.toString(curCh),tokenColumn,tokenLine);
break;
case '&':
tk = new Token(AND,Character.toString(curCh),tokenColumn,tokenLine);
break;
case '|':
tk = new Token(OR,Character.toString(curCh),tokenColumn,tokenLine);
break;
case '#':
tk = new Token(DEC,Character.toString(curCh),tokenColumn,tokenLine);
break;
}
return tk;
}//isSingleSymbol()
/**
* 判断是否是多元运算符号
* @param startPlace
* @param curCh
*/
public Token isMulSymbol(int startPlace, char curCh)
{
Token tk = null;
String charBuffer = Character.toString(curCh);
switch (curCh)
{
case '\'':
if(LA(1)!= 'ん' && LA(1)!= '\r'&& LA(1) != '\t')
{
if(LA(2) == '\'')
{
if(LA(1) == ' ')
{
charBuffer += nextChar();
tk = new Token(VCHAR,"' '",tokenColumn,tokenLine);
}
else
{
charBuffer += nextChar();
charBuffer += nextChar();
tk = new Token(VCHAR,charBuffer,tokenColumn,tokenLine);
}
}
}
else
{
tk = new Token(INVALIDCHAR,charBuffer,tokenColumn,tokenLine);
}
break;
case '"':
//下一个字符是"
if(LA(1) == '"')
{
charBuffer += nextChar();
tk = new Token(VSTRING,charBuffer,tokenColumn,tokenLine);
}
//下一个字符是换行符或文件终结符
else if(LA(1) == '\r' || LA(1) == 'ん')
{
tk = new Token(INVALIDSTRING,charBuffer,tokenColumn,tokenLine);
}
//下一字符是其他字符
else
{
boolean key = true;
while(key)
{
//遇到空格制表符号,回溯补回相应数目的空格,并加入下一个其他字符
if(LA(1) == ' ' || LA(1) == '\t')
{
char chFollowBlanks = nextChar();
boolean backKey = true;
while(backKey)
{
int i = -1;
if(LA(i) == ' ')
{
charBuffer += " ";
i--;
}
else if(LA(i) == '\t')
{
charBuffer += " ";
i--;
}
else
{
charBuffer += chFollowBlanks;
backKey = false;
}
}
}
else if(LA(1) == '"')
{
charBuffer += nextChar();
tk = new Token(VSTRING,charBuffer,tokenColumn,tokenLine);
key = false;
}
else if(LA(1) == '\r' || LA(1) == 'ん')
{
tk = new Token(INVALIDSTRING,charBuffer,tokenColumn,tokenLine);
key = false;
}
else
{
charBuffer += nextChar();
}
}
}
break;
case '/':
if (startPlace + 1 < length)//当前字符不是最后一个字符
{
if (LA(1)=='*')//为多行注释标识符
{
charBuffer += nextChar();
if ((startPlace + 2) < length)// "/*"不是最后一个字符
{
for (int i = (startPlace + 2); i < length; i++)
{
if(LA(1)== 'ん')
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -