📄 lexicalanalyser.java

📁 JAVA 数学程序库提供常规的数值计算程序包
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
package jmathlib.core.interpreter;

import jmathlib.core.tokens.*;
import jmathlib.core.tokens.numbertokens.*;
import jmathlib.core.constants.*;


/**class for turning a string expression into a vector of tokens.
All public methods are static so there is no need
to instantiate it. it creates an instance of itself when the 
analyseExpression function is called.*/
public class LexicalAnalyser implements TokenConstants, ErrorCodes
{
    /**The expression being worked on*/
    private String exp="";

    /**bracketLevel is used to implement bracketing within expressions*/
    private int bracketLevel;
    
    /**the last token parsed*/
    private Token lastToken;

    /** previous valid character */
    private char previousChar;

    private int   charNo;

    /**If all characters are processed the value of EOCharsB is set to TRUE*/
    private boolean EOCharsB;

    //set up strings used to determine token type
    /**List of characters recognised as being digits*/ 
    private String  numberChars;
    //private String  operatorChars      = "+-/*^<>~=:"; 
    //private String  unaryOperatorChars = "!";
    
    /**List of alphanumeric characters*/
    private String  textChars;
    
    /**List of delimiters*/
    private String  delimiterChars; 

    /**The list of reserved words*/ 
    private String  reservedWords;

    /**special reserved words which act as delimiters*/
    private String delimiterWords;

    /** currently scanned line of code */
    private String codeLine = "";
    
    /**store the next scanned token*/
    private Token nextToken;

    /**store whether the next value should be negative*/
    private boolean negative;

    /**hide evaluation of invisible code (comments, signs of numbers) */
    private boolean invisibleCode;
    
    /**switch to enable parsing of 'spaces' and 'return' characters */
    private boolean parseWhitespaceSwitch = false;
    
    /**default constructor - creates the lexical analyser object with an empty string*/
    public LexicalAnalyser()
    {
        reservedWords      = " break do exit try catch continue ";
        reservedWords     += " for help history hold if load more return ";
        reservedWords     += " load dir ls save set show who whos ";
        reservedWords     += " cd chdir clear diary echo format ";
        reservedWords     += " type global isglobal ";
	    reservedWords     += " save switch while ";     // trailing " " is very important !!
        delimiterWords	   = " end endif else elseif endfunction endwhile endfor ";
        delimiterWords    += " case default otherwise endswitch ";
        delimiterChars     = ",()[];{}\n"; 
        textChars          = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890_";
        numberChars        = "0123456789";
    }

    /**Interface function used to analyse an expression
       @param expression - expression to be analysed   */
    public void analyseExpression(String expression)
    {
        //ErrorLogger.debugLine(expression);
        exp = expression.trim();
		
        EOCharsB = false;
    }

    /**@return the next token in the input string*/ 
    public Token getNextToken(int type)
    {
        // switch on/off parsing of whitespaces
        if (type==MATRIX)
        	parseWhitespaceSwitch = true;
        else
        	parseWhitespaceSwitch = false;

    	scanNextToken();
   		lastToken = nextToken;
        
        //if (nextToken !=null)
        //    ErrorLogger.debugLine("Lex "+nextToken.toString());
                
        return nextToken;
    }
    
    /**return string of currently scanned line of code for error message*/
    public String getScannedLineOfCode()
    {
    	return codeLine;
    }

/******************************************************************************
*                     internal methods                                        *      
*      e.g: sin(3+4)
*             |
*           pointer
*******************************************************************************
*
*  inspectNextChar() returns '(' and leaves the pointer at the current position
*      e.g: sin(3+4)
*             |
*           pointer
*******************************************************************************
*
*  getNextChar()  returns '(' and increased the pointer
*      e.g: sin(3+4)
*              |
*            pointer
*******************************************************************************
*  advance() only increases the pointer
*      e.g: sin(3+4)
*              |
*            pointer
******************************************************************************/

    /** return true if no more characters are available for parsing */
    public boolean EOChars() 
    {
        // True if End Of chars
        if (exp.length()==0) return true;
		
        return EOCharsB;
    } 

    /** return the next character in exp-array, but do not increment pointer 
        to next character. Only inspect next token, but don't process it    */
    private char inspectNextChar() 
    {
        if (charNo >= exp.length()) return ' ';
        return exp.charAt(charNo);  
    } 

    /** return next character of exp-array, also increase the pointer in the
        exp-array.                                                           */
    private char getNextChar() 
    {
        if (charNo>0) 
    	    previousChar = exp.charAt(charNo-1);
    	
    	if (charNo < exp.length() )
    	{
    	    charNo ++;
    	    EOCharsB = false;
    
    	    //build string of currently scanned line of code for error message
                codeLine += exp.charAt(charNo-1);
    	    if ((exp.charAt(charNo-1) == '\n') ||
                    (exp.charAt(charNo-1) == '\r')    )
                	codeLine = "";    
                       
    	    return exp.charAt(charNo-1);  
    	}
    	else
    	{
    	    EOCharsB = true; // end of chars reached
    	    return ' ';
    	}
    } 

    /** increase the pointer of the character array */
    private void advance()
    {
    	if (charNo < (exp.length()-1))
    	{
    	    charNo ++;
    		EOCharsB = false;
                
    		//build string of currently scanned line of code for error message
    		codeLine += exp.charAt(charNo-1);
         	if ((exp.charAt(charNo-1) == '\n') ||
    		    (exp.charAt(charNo-1) == '\r')    )
    		    codeLine = "";                
    	}
    	else
    	    EOCharsB = true; // end of chars reached
    }

    /* return the previous character */
    private char getPreviousChar()
    {
        return previousChar;
    }

    /**move back to the previouse character*/
    private void backTrack()
    {
        if(charNo > 0)
            charNo--;
    }

    //private char inspectNextNonWhitespaceChar() 
    //{
	//if (charNo >= exp.length()) return ' ';
	//return exp.charAt(charNo);  
    //} 

/*****************************************************************************/

    /** parse the array of characters (exp[]) to find next token     
	For each character it calls a series of functions until it
	finds one that can handle the character
	@return the next token contained within the expression string */
    private boolean scanNextToken() throws MathLibException
    {
        negative = false;  // reset sign indicator

        //Exit the loop as soon as a token has been found
        //  or no more characters are available
        while( !EOChars() )	
        {
            // next character to analyse
            char nextChar = getNextChar();

            // comments and '+' '-' signs of numbers ... are invisible
            invisibleCode = false;
            
            //call each function in turn on the next character
            //stop as soon as a function returns true to indicate it has handled the character
            boolean foundToken = handleEmptyLine(nextChar) ||
                                 handleSpace(nextChar)     ||
		                 		 handleComment(nextChar)   ||
								 handleNumber(nextChar)    ||
                                 handleOperator(nextChar)  ||
                                 handleText(nextChar)      ||
                                 handleString(nextChar)    ||
                                 handleDelimiter(nextChar) ||
                                 handleDotOperator(nextChar);

            if (!invisibleCode)
            {
            	//code is visible
                
                if(!foundToken)
            	{
                	//ignore any characters that it doesn't recognize
                	// could be something like cd /programs/word
                	ErrorLogger.debugLine("LexAna: don't know what to do with: >"+nextChar+"<");
            	}
            	else
            	{
            		// return a valid token
            		return true;
            	}
            }
        } // end while

        // no more tokens available
        nextToken = null;
        
        return false;

    } // end scanNextToken
    
//*************************************************************************************************
//Utility functions
//*************************************************************************************************
    /**
     * Sometimes a file starts with some empty line or comments.
     * This methods removes all returns,tabs,comments at the beginning of a file or after a new line
     */
    private boolean handleEmptyLine(char nextChar)
    {
        boolean foundToken = false;
        
        // check if previous char is start of file OR a line feed
        if ((getPreviousChar() ==   0  ) ||
            (getPreviousChar() == '\n' )   )
        {
            
            //nextChar = inspectNextChar();
            
            // loop until all whitespaces, comments are removed
            while (true)
            {
                
                // check for all types of invisible chars
                if  ((nextChar == ' ') ||
                     (nextChar == '\t')  ) 
                {
                    //remove multiple spaces and tabs
                    while ((inspectNextChar() == ' ') ||
                           (inspectNextChar() == '\t')  )
                    {
                        advance();
                    }
                    invisibleCode = true;
                    foundToken    = true;
                }
                else if (nextChar == '\n')
                {
                    // remove return
                    //nextChar = getNextChar();
                    invisibleCode = true;
                    foundToken    = true;
                }
                else if ((nextChar == '#') ||
                         (nextChar == '%')  )
                {
                    // comment for the rest of this line
                    // e.g.                   # some comment
                    // e.g. #some comment
                    while((inspectNextChar() != '\n') && (!EOChars()))
                    {
                        advance();
                    }
                    invisibleCode = true;
                    foundToken    = true;
                }
                else
                    break;
                
                // check if next char would be a whitespace or comment again
                nextChar = inspectNextChar();
                if ((nextChar == ' ')  ||
                    (nextChar == '\t') ||
                    (nextChar == '\n') ||
                    (nextChar == '#')  ||
                    (nextChar == '%')     )
                {
                    advance();
                }
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -