⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scan.cpp

📁 一个c语言的编译器的源代码
💻 CPP
字号:

// Lexer of the compiler
// function: translate the source into tokens
//
// Written by bood, boodweb@163.com, http://boodweb.126.com
// 2004-08-06

//
// 2004.5.21
// fixed: '!=' 's return type should be NEQ rather that LTEQ
//

#pragma warning(disable:4786)

#include <string>
#include <sstream>
#include <list>
#include "global.h"
#include "util.h"
#include "scan.h"

using namespace std;

string TokenString;     // Current token
TokenType token;        // Current token type

// Token History, used for saving the tokens scanned
list<TokenUnit> tokenHist;

// Point to the backup point(always the head of the list)
// For forward looking while do parsing
list<TokenUnit>::iterator tokenIt=tokenHist.end();


// Reserve words
ReserveWordsUnit ReservedWords[10]={
    {"if",IF},{"else",ELSE},{"while",WHILE},
    {"return",RETURN},{"void",VOID},{"int",INT},{""}
};

TokenType LookupReserved(string &token)
{
    int i=0;
    while(ReservedWords[i].word[0]!='\0')
    {
        if(token==ReservedWords[i].word)
            return ReservedWords[i].type;
        i++;
    }
    return ID;
}
//
// Main function of the lexer
//
// Return one token each time, meanwhile we save each
// token into a TokenUnit list, this makes no need to 
// do repeat scan while do forward looking. However, 
// it also costs memory, so improvements are needed
//
TokenType GetToken()
{
    TokenType rettype;

    // If the tokenIt is not NULL, i.e. a restoring is going on,
    // then just reads from that point in the token list
	if(tokenIt!=tokenHist.end()){
		rettype=(*tokenIt).token;
		TokenString=(*tokenIt).tokenstring;
		lineno=(*tokenIt).lineno;
		++tokenIt;
		return rettype;
	}

    // A DFA below, codes are almost trivial:)
    char ch;
    int save=1;
    ScanState state=START;
    TokenString="";
    while(state!=DONE)
    {
		istream *is = &fsource;
		ch=is->get();
		if(ch=='\n') lineno++;
        switch(state)
        {
        case START:
            if(isalpha(ch)) state=INID;
            else if(isdigit(ch)) state=INNUM;
            else if(ch=='<') state=INLTEQ;
            else if(ch=='>') state=INGTEQ;
            else if(ch=='=') state=INEQ;
            else if(ch=='!') state=INNEQ;
            else if(ch=='/') {
                if((ch=is->get())!='*') {
					is->putback(ch);
                    rettype=DIV;
                    state=DONE;
                }
                else{
                    state=INCOMMENT;
                    save=0;
                }
            }
            else if(ch==' ' || ch=='\n' || ch=='\t') {
				save=0;
			}
            else{
                switch(ch){
                case '+':
                    rettype=PLUS;
                    break;
                case '-':
                    rettype=MINUS;
                    break;
                case '*':
                    rettype=MUL;
                    break;
                case ';':
                    rettype=SEMI;
                    break;
                case ',':
                    rettype=COMMA;
                    break;
                case '(':
                    rettype=LPAREN;
                    break;
                case ')':
                    rettype=RPAREN;
                    break;
                case '[':
                    rettype=LSQUAR;
                    break;
                case ']':
                    rettype=RSQUAR;
                    break;
                case '{':
                    rettype=LBRACE;
                    break;
                case '}':
                    rettype=RBRACE;
                    break;
                case EOF:
                    save=0;
                    rettype=ENDFILE;
                    break;
                default:
                    save=0;
                    rettype=ERROR;
                    break;
                }
                state=DONE;
            }
            break;
        case INID:
            if(!isalpha(ch)) {
                is->putback(ch);
                rettype=ID;state=DONE;
                save=0;
            }
            break;
        case INNUM:
            if(!isdigit(ch)) {
                is->putback(ch);
                rettype=NUMBER;state=DONE;
                save=0;
            }
            break;
        case INLTEQ:
            if(ch=='=') {rettype=LTEQ;state=DONE;}
            else{is->putback(ch);rettype=LT;state=DONE;save=0;}
            break;
        case INGTEQ:
            if(ch=='=') {rettype=GTEQ;state=DONE;}
            else{is->putback(ch);rettype=GT;state=DONE;save=0;}
            break;
        case INEQ:
            if(ch=='=') {rettype=EQ;state=DONE;}
            else{is->putback(ch);rettype=ASSIGN;state=DONE;save=0;}
            break;
        case INNEQ:
            if(ch=='=') {rettype=NEQ;state=DONE;}
            else{is->putback(ch);rettype=ERROR;state=DONE;save=0;}
            break;
        case INCOMMENT:
            if(ch=='*'){
                if(is->get()=='/')
                    state=START;
            }
            save=0;
            break;
        }
        if(save==1) TokenString+=ch;
        else save=1;
    }
    if(rettype==ID){
        rettype=LookupReserved(TokenString);
    }

    // Save the token just identified
	TokenUnit tu;
	tu.lineno=lineno;
	tu.token=rettype;
	tu.tokenstring=TokenString;
	tokenHist.push_back(tu);

    ftoken<<TokenString<<endl;
    return rettype;
}

//
// Saving a backup point for later restore
//
// This function is essenial for the forward looking
// Though seeming simple, explanations are needed to
// understand it:)
//
// First, if no backup point is restored now('tokenIt'=end),
// we just return the current token for later restoring
// Then, if the backup point is restored, we also return
// the current token, but this time the token is just before
// 'tokenIt'
//
// Note!'Backup' and 'Restore' can be called in a nested order,
// however, you must call restore in the order of corresponding
// 'Backup' order, just like brackets do.
list<TokenUnit>::iterator Backup()
{
	list<TokenUnit>::iterator it;
	if(tokenIt==tokenHist.end()) it=--tokenHist.end();
	else {
		it=--tokenIt;
		++tokenIt;
	}
	return it;
}

// Restore a backup point, the parameter is the value
// returned by the 'Backup' function
void Restore(list<TokenUnit>::iterator &it)
{
	tokenIt=it;
	token=GetToken();
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -