📄 lexer.java

📁 编译器中的词法分析
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
package cmm.cmmcc;

import cmm.collections.*;

/**
 * 词法分析器类，继承字符扫描器类，实现单词类型接口
 * @author Huang Xuanxing
 *
 */
public class Lexer extends CharScanner implements ITokenTypes {
	
	private static int length;//总长度
	private char curCh;//当前字符
	//public int endPlace;
	
	/**
	 * 构造函数,初始化词法分析器
	 * @param inStr
	 */
	public Lexer(String inStr) {
		super(inStr);
		length = inStr.length();
	}
	
	/**
	 * 获得输入的全部字符的总长度
	 * @return
	 */
	public static int getLength(){
		return length;
	}
	
	/**
	 * 获得一个token
	 * @return
	 */
	public Token nextToken() {
		Token tk = null;//返回的Token的对象
		//tokenStart();
		tk = scan();
		if(tk.getType() == 99){//过滤注释内容
			tk = scan();
		}
		return  tk;
	}
	
	/**
	 * 逐个字符扫描
	 *@return
	 * *************************************
	 * *************************************
	 * 通过rest来控制结束有问题 并且永远读不到文件结束符
	 * *************************************
	 * *************************************
	 */
	/*
	public Token scan(){
		Token tk = null;
		int rest = 0;//还未被扫描的字符数
		rest = length -  CharQueue.head - 1;
		
		if (rest > 0){//输入的字符大于一个
			while(rest > 0){//有剩余字符
				curCh = nextChar();//获取当前字符
				tokenStart();//记录当前Token的行号列号
				if (('a' <= curCh && 'z' >= curCh) || ('A' <= curCh && 'Z' >= curCh)){//当前字符为字母，判断是否为标识符
					tk = isIdentifier(CharQueue.head - 1, curCh);
					rest = length - CharQueue.head;
                    break;
                }//if
				//当前字符为数字，判断是否为数字
                else if ('0' == curCh || '1' == curCh || '2' == curCh || '3' == curCh || '4' == curCh
                    || '5' == curCh || '6' == curCh || '7' == curCh || '8' == curCh || '9' == curCh)
                {
                    tk = isNumber(CharQueue.head - 1, curCh);
                    rest = length - CharQueue.head;
                    break;
                }//else if
				//当前字符为单元字段
                else if ('*' == curCh || '(' == curCh || ')' == curCh || ';' == curCh || '{' == curCh 
                	|| '}' == curCh || ']' == curCh || '[' == curCh || ',' == curCh || '&' == curCh 
                	|| '|' == curCh || '#' == curCh)
                {
                    tk = isSingleSymbol(CharQueue.head - 1, curCh);
                    rest = length - CharQueue.head;
                    break;
                }//else if
				//当前字符为'+'或'-',进入判断是操作符号还是正负数的判断
                else if('+' == curCh || '-' == curCh){
                	//////////
                	tk = isAddOrMinus(CharQueue.head - 1, curCh);
                	rest = length - CharQueue.head;
                	break;
                }
				//当前字符可能为多元字段
                else if ('=' == curCh || '<' == curCh || '>' == curCh || '/' == curCh || '!' == curCh || '"' == curCh)
                {
                    tk = isMulSymbol(CharQueue.head - 1, curCh);
                    rest = length - CharQueue.head;
                    break;
                }//else if
                else if (curCh == 'ん')
                {
                	tk = new Token(EOF,Character.toString(curCh),tokenColumn,tokenLine);
                	break;
                }
				//其他字符
                else
                {
                    tk = isSpecialSymbol(CharQueue.head - 1, curCh);
                    rest = length - CharQueue.head;
                    break;
                }//else                 
            }//while
        }//if输入的字符大于一个

        else//只输入一个字符
        {
        	curCh = nextChar();//获取当前字符

            switch (curCh)
            {
                case '*':
                case '(':
                case ')':
                case ';':
                case '{':
                case '}':
                case ',':
                case '|':
                case '&':
                case '[':
                case ']':
                case '#':
                    tk = isSingleSymbol(CharQueue.head - 1, curCh);
                    break;
                case '+':
                case '-':
                	tk = isAddOrMinus(CharQueue.head - 1, curCh);
                	break;
                case '=':
                case '<':
                case '>':
                case '/':
                case '"':
                case '!':
                	//多元字符
                    tk = isMulSymbol(CharQueue.head - 1, curCh);
                    break;
                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    tk = isNumber(CharQueue.head - 1, curCh);
                    break;
            }//switch

            if (('a' <= curCh && 'z' >= curCh) || ('A' <= curCh && 'Z' >= curCh))
            {
                tk = isIdentifier(CharQueue.head - 1, curCh);
            }//if
            else if ('0' != curCh && '1' != curCh && '2' != curCh && '3' != curCh && '4' != curCh && '5' != curCh 
            	&& '6' != curCh && '7' != curCh && '8' != curCh && '9' != curCh && '+' != curCh && '-' != curCh 
            	&& '*' != curCh && '/' != curCh && '(' != curCh && ')' != curCh && '{' != curCh && '}' != curCh 
            	&& '=' != curCh && '<' != curCh && '>' != curCh && '[' != curCh && ']' != curCh && ';' != curCh
            	&& '!' != curCh && '&' != curCh && '|' != curCh)
            {
                tk = isSpecialSymbol(CharQueue.head - 1, curCh);
            }//else if
        }//else 只输入一个字符
		return tk;
    }//scan
	*/
	
	public Token scan(){
		Token tk = null;
		
		//输入的字符大于一个
		if (length > 0){			
			//获取当前字符
			curCh = nextChar();
			//记录当前Token的行号列号
			tokenStart();
			//当前字符为字母，判断是否为标识符
			if (('a' <= curCh && 'z' >= curCh) || ('A' <= curCh && 'Z' >= curCh))
			{
				tk = isIdentifier(CharQueue.head - 1, curCh);
            }//if
			
			//当前字符为数字，判断是否为数字
            else if ('0' == curCh || '1' == curCh || '2' == curCh || '3' == curCh || '4' == curCh
                || '5' == curCh || '6' == curCh || '7' == curCh || '8' == curCh || '9' == curCh)
            {
                tk = isNumber(CharQueue.head - 1, curCh);
            }//else if
			
			//当前字符为单元字段
            else if ('*' == curCh || '(' == curCh || ')' == curCh || ';' == curCh || '{' == curCh 
            	|| '}' == curCh || ']' == curCh || '[' == curCh || ',' == curCh || '&' == curCh 
                || '|' == curCh || '#' == curCh)
            {
                tk = isSingleSymbol(CharQueue.head - 1, curCh);
            }//else if
			
			//当前字符为'+'或'-',进入判断是操作符号还是正负数的判断
            else if('+' == curCh || '-' == curCh)
            {
                tk = isAddOrMinus(CharQueue.head - 1, curCh);
            }
			//当前字符可能为多元字段
            else if ('=' == curCh || '<' == curCh || '>' == curCh || '/' == curCh || '!' == curCh || '"' == curCh)
            {
                tk = isMulSymbol(CharQueue.head - 1, curCh);
            }//else if
			
			//当前字符为文件终结符
            else if (curCh == 'ん')
            {
                tk = new Token(EOF,Character.toString(curCh),tokenColumn,tokenLine);
            }//else if
			
			//其他字符
            else
            {
                tk = isSpecialSymbol(CharQueue.head - 1, curCh);
            }//else 
        }//if
		
		//如果什么都没输
		else
		{
			tk = new Token (EOF,"ん",1,1);
		}
		return tk;
    }//scan
	
	/**
	 * 判断是否是单个运算符
	 * @param startPlace
	 * @param curCh
	 * @return
	 */
    public Token isSingleSymbol(int startPlace, char curCh) 
    {
   	 	Token tk = null;
        switch (curCh){
        	case ',':        		
        		tk = new Token(COMMA,Character.toString(curCh),tokenColumn,tokenLine);
        		break;
        	case ';':
        		tk = new Token(SEMI,Character.toString(curCh),tokenColumn,tokenLine);
        		break;
        	case '(':
        		tk = new Token(LPAR,Character.toString(curCh),tokenColumn,tokenLine);
        		break;
        	case ')':
        		tk = new Token(RPAR,Character.toString(curCh),tokenColumn,tokenLine);
        		break;
        	case '[':
        		tk = new Token(LSQU,Character.toString(curCh),tokenColumn,tokenLine);
        		break;
            case ']':
            	tk = new Token(RSQU,Character.toString(curCh),tokenColumn,tokenLine);
        		break;
            case '{':
            	tk = new Token(LBRA,Character.toString(curCh),tokenColumn,tokenLine);
        		break;
            case '}':
            	tk = new Token(RBRA,Character.toString(curCh),tokenColumn,tokenLine);
        		break;
            case '*':
            	tk = new Token(MULT,Character.toString(curCh),tokenColumn,tokenLine);
        		break;
            case '&':
            	tk = new Token(AND,Character.toString(curCh),tokenColumn,tokenLine);
        		break;
            case '|':
            	tk = new Token(OR,Character.toString(curCh),tokenColumn,tokenLine);
        		break;
            case '#':
            	tk = new Token(DEC,Character.toString(curCh),tokenColumn,tokenLine);
        		break;
        }
        return tk;
    }//isSingleSymbol()

    /**
     * 判断是否是多元运算符号
     * @param startPlace
     * @param curCh
     */
    public Token isMulSymbol(int startPlace, char curCh)
    {   
    	Token tk = null;
    	String charBuffer = Character.toString(curCh);
        switch (curCh)
        {
        	case '\'':
        		if(LA(1)!= 'ん' && LA(1)!= '\r'&& LA(1) != '\t')
        		{
        			if(LA(2) == '\'')
        			{
        				if(LA(1) == ' ')
        				{
	        				charBuffer += nextChar();
	        				tk = new Token(VCHAR,"' '",tokenColumn,tokenLine);
        				}
        				else
        				{
        					charBuffer += nextChar();
        					charBuffer += nextChar();
        					tk = new Token(VCHAR,charBuffer,tokenColumn,tokenLine);
        				}
        			}
        		}
        		else
        		{
        			tk = new Token(INVALIDCHAR,charBuffer,tokenColumn,tokenLine);
        		}
        		break;
        	case '"':
        		//下一个字符是"
        		if(LA(1) == '"')
        		{
        			charBuffer += nextChar();
        			tk = new Token(VSTRING,charBuffer,tokenColumn,tokenLine);
        		}
        		//下一个字符是换行符或文件终结符
        		else if(LA(1) == '\r' || LA(1) == 'ん')
        		{
        			tk = new Token(INVALIDSTRING,charBuffer,tokenColumn,tokenLine);
        		}
        		//下一字符是其他字符
        		else
        		{
        			boolean key = true;
        			
        			while(key)
        			{
        				//遇到空格制表符号，回溯补回相应数目的空格，并加入下一个其他字符
        				if(LA(1) == ' ' || LA(1) == '\t')
        				{
        					char chFollowBlanks = nextChar();
        					boolean backKey = true;
        					
        					while(backKey)
        					{
        						int i = -1;
        						if(LA(i) == ' ')
        						{
        							charBuffer += " ";
        							i--;
        						}
        						else if(LA(i) == '\t')
        						{
        							charBuffer += "        ";
        							i--;
        						}
        						else
        						{
        							charBuffer += chFollowBlanks;
        							backKey = false;
        						}
        					}        					
        				}
        				else if(LA(1) == '"')
        				{
        					charBuffer += nextChar();
        					tk = new Token(VSTRING,charBuffer,tokenColumn,tokenLine);
        					key = false;
        				}
        				else if(LA(1) == '\r' || LA(1) == 'ん')
        				{
        					tk = new Token(INVALIDSTRING,charBuffer,tokenColumn,tokenLine);
        					key = false;
        				}
        				else
        				{
        					charBuffer += nextChar();
        				}
        			}
        		}
        		break;
            case '/':
                if (startPlace + 1 < length)//当前字符不是最后一个字符
                {
                    if (LA(1)=='*')//为多行注释标识符
                    {
                    	charBuffer += nextChar();

                        if ((startPlace + 2) < length)// "/*"不是最后一个字符
                        {
                            for (int i = (startPlace + 2); i < length; i++)
                            {
                            	if(LA(1)== 'ん')
                            	{
12 3 下一页
💿 文件大小 40 K
👤 上传用户 xiao11tian
📂 所属分类编译器/解释器
🏷️ 相关标签

#编译器 #分
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -