📄 lexer.cs

📁 charp compiler
💻 CS
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
                
                // Preprocessor directives have syntax:
                // #if <exp>
                // #elif <exp>
                // #define <symbol>
                // #undef <symbol>
                return new Token(e, CalcCurFileRange());                
            }
#endregion

            // Since we've skipped past all whitespace & comments, after here, we're
            // no longer at the start of the line.
            m_fStartOfLine = false;

                        
#region Identifiers (including 'true' & 'false')         
            // If start of an identifier?
            if (IsFirstIdChar(iCh))
            {
                string stId = "" + (char) iCh;
                
                iCh = Peek();
                while(IsIdChar(iCh))
                {
                    stId +=(char) iCh;
                    Read(); // consume
                    
                    iCh = Peek();
                }
                
                // Lookup for keyword (or bool)
                object o = m_keywords[stId];
                if (o != null)
                {
                    Token.Type e = (Token.Type) o;
                    
                    if (e == Token.Type.cBool) 
                    {                        
                        return new Token(e, (stId == "true"), CalcCurFileRange());
                    } else {
                        return new Token(e, CalcCurFileRange());
                    }
                }
                                                   
                // Return identifier
                return new Token(Token.Type.cId, stId, CalcCurFileRange());
            }
#endregion            
          
#region Characters            
            // Look for characters (inbetween single quotes)
            if (iCh == '\'')
            {
                char val = ReadFormattedChar();
                iCh = Read();
                if (iCh != '\'')
                    //Debug.Assert(false, "Unterminated character constant"); // @todo -legit
                    ThrowError(E_UnterminatedChar());
            
                return new Token(Token.Type.cChar, val, CalcCurFileRange());
            }
#endregion
            
#region String Literals            
            // Look for string literal            
            if (iCh == '\"')
            {
                System.Text.StringBuilder bld = new System.Text.StringBuilder();
                
                while(Peek() != '\"')
                {
                    if (Peek() == '\n')
                    {
                        //Debug.Assert(false, "Can't have newling in string literal"); // @todo- legit
                        ThrowError(E_NoNewlineInString());
                    }
                    
                    iCh = ReadFormattedChar();                    
                    bld.Append((char) iCh);
                }
                this.Read(); // eat closing quote.
            
                string stLiteral = bld.ToString();
                return new Token(Token.Type.cString, stLiteral, CalcCurFileRange());
            }
#endregion

#region Integers            
            // Ints
            // As Hex: 0xAAAAAAAA, 0XAAAAAAAA            
            if (iCh == '0')
            {
                if (Peek() == 'X' || Peek() == 'x')
                {
                    Read(); // consume 'x' or 'X'
                    int val = 0;
                                        
                    int d;
                    while((d = AsHexDigit(Peek())) != -1)
                    {
                        val = (val * 16 + d);
                        Read();
                    }
                    
                    return new Token(Token.Type.cInt, val, CalcCurFileRange());
                }            
            }
            int fSign = 1;
            /*
             * Lexer can't resolve negatives because '-1' could be '-' '1'.
            if (iCh == '-' && IsDigit(Peek()))
            {
                fSign = -1;                
                iCh = Read();
                // fall through to normal int case
            }
            */
            if (IsDigit(iCh))
            {
                int val = iCh - '0';
                 
                iCh = Peek();
                while(IsDigit(iCh))
                {
                    val = (val * 10) + (iCh - '0');
                    Read(); // consume                    
                    iCh = Peek();
                }
            
                return new Token(Token.Type.cInt, val * fSign, CalcCurFileRange());
            }
            Debug.Assert(fSign == 1); // if this isn't 1, then negative case didn't fall through
#endregion            
            
#region Operators             
            Token.Type type = Token.Type.cEOF;
            int i2 = Peek();
            
            switch (iCh)
            {
            case ':':
                type = Token.Type.cColon; break;

            case '?':
                type = Token.Type.cQuestion; break;                

            case '[':
            {
                int cDim = 1; // dimension starts at 1
                while(i2 == ',')
                {
                    cDim++;
                    Read();
                    i2 = Peek();
                }
                if (i2 == ']') {
                    Read();                    
                    return new Token(Token.Type.cLRSquare,cDim, CalcCurFileRange());
                } else {
                    if (cDim != 1)
                        break; // error.
                }
            }
                type = Token.Type.cLSquare; break;
                
            case ']':
                type = Token.Type.cRSquare; break;
                    
            case  '{':
                type = Token.Type.cLCurly; break;
            
            case  '}':
                type = Token.Type.cRCurly; break;
             
            case  '(':
                type = Token.Type.cLParen; break;
             
            case  ')':
                type = Token.Type.cRParen; break;
             
            case  ';':
                type = Token.Type.cSemi; break;
             
            case  ',':
                type = Token.Type.cComma; break;
             
            case  '.':
                type = Token.Type.cDot; break;
             
            case  '=':               
                if (i2 == '=') {
                    Read();
                    type = Token.Type.cEqu;
                } else 
                    type = Token.Type.cAssign;
            
                break;
            
            case '!':
                if (i2 == '=')
                {
                    Read();
                    type = Token.Type.cNeq;
                } else 
                    type = Token.Type.cNot;
                break;
            
            case '<': // <, <=, <<, <<=
                if (i2 == '=')
                {
                    Read();    
                    type = Token.Type.cLTE;
                } else if (i2 == '<')
                {
                    Read();
                    i2 = Peek();
                    if (i2 == '=')
                    {
                        Read();
                        type = Token.Type.cShiftLeftEqual;                         
                    } else
                        type = Token.Type.cShiftLeft;
                } else 
                    type = Token.Type.cLT;
                break;

                    
            case '>': // >, >=, >>, >>=
                if (i2 == '=')
                {
                    Read();    
                    type = Token.Type.cGTE;
                } 
                else if (i2 == '>')
                {
                    Read();
                    i2 = Peek();
                    if (i2 == '=')
                    {
                        Read();
                        type = Token.Type.cShiftRightEqual;                         
                    } 
                    else
                        type = Token.Type.cShiftRight;
                } 
                else 
                    type = Token.Type.cGT;              
                break;    

            case '&': // &, &=, &&
                if (i2 == '&')
                {
                    Read();    
                    type = Token.Type.cAnd;
                } else if (i2 == '=')
                {
                    Read();
                    type = Token.Type.cBitwiseAndEqual;
                } else
                    type = Token.Type.cBitwiseAnd;
                break;  
                
            case '|': // |, |=, ||
                if (i2 == '|')
                {
                    Read();    
                    type = Token.Type.cOr;
                } else if (i2 == '=')
                {
                    Read();
                    type = Token.Type.cBitwiseOrEqual;                
                } else 
                    type = Token.Type.cBitwiseOr;
                break;            
                      
            case '^': // ^, ^=                
                if (i2 == '=')
                {
                    Read();
                    type = Token.Type.cBitwiseXorEqual;                
                } 
                else 
                    type = Token.Type.cBitwiseXor;
                break;                 
            
            case  '+':         
                if (i2 == '+')
                {
                    Read();
                    type = Token.Type.cPlusPlus;
                } else if (i2 == '=')
                {
                      Read();
                      type = Token.Type.cPlusEqual;
                } else 
                    type = Token.Type.cPlus; 

                break;
            
            case  '-':            
                if (i2 == '-')
                {
                    Read();
                    type = Token.Type.cMinusMinus;
                } else if (i2 == '=')
                {
                    Read();
                    type = Token.Type.cMinusEqual;
                } 
                else 
                    type = Token.Type.cMinus; 

                break;
            
            case  '*':
                if (i2 == '=')
                {
                    Read();
                    type = Token.Type.cMulEqual;
                } else
                    type = Token.Type.cMul; 
                break;
                        
            case  '%':
                if (i2 == '=')
                {
                    Read();
                    type = Token.Type.cModEqual;
                } 
                else
                    type = Token.Type.cMod; 
                break;
            
            
            }
            if (type != Token.Type.cEOF)
                return new Token(type, CalcCurFileRange());
#endregion        
               
            // Here's an error
            m_fIsErrorMode = true;
            return new Token(Token.Type.cError, (char) iCh, Peek(), CalcCurFileRange());
        } while(true); // break out of this loop by returning a token
        
        
        
        
    } // End GetNextTokenWorker()


    // Read a character, checking for escapes
    char ReadFormattedChar()
    {               
        int iCh = this.Read();
        if (iCh == -1)
        {
            //Debug.Assert(false, "Unexpected EOF"); // @todo - legit   
            ThrowError(E_UnexpectedEOF()); // in character or string
        }
        
        if (iCh == '\\')
        {
            int iCh2 = Read();
            switch(iCh2)
            {
            case '\'':  iCh = '\''; break;
            case '\"':  iCh = '\"'; break;
            case '\\':  iCh = '\\'; break;
            case '0':   iCh = '\0'; break;
            case 'a':   iCh = '\a'; break;
            case 'b':   iCh = '\b'; break;
            case 'f':   iCh = '\f'; break;
            case 'n':   iCh = '\n'; break;
            case 'r':   iCh = '\r'; break;
            case 't':   iCh = '\t'; break;
            case 'v':   iCh = '\v'; break;
            case -1:
                ThrowError(E_UnexpectedEOF()); break;
            default:                
                ThrowError(E_UnrecognizedEscapeSequence((char) iCh));
                break;                     
            }
        }
        
        return (char) iCh;
    } // end of ReadChar()
    
}



} // end namespace ManualParser
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -