⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scanner.cpp

📁 词法分析的程序。核心函数是一个状态切换的函数 CAjaxParserDlg::Route。状态切换函数解根据一个 DFA 来对输入的文本进行分析。也就是说
💻 CPP
字号:

/****************************************************/
/* File: scanner.cpp                                */
/* The scanner implementation for the C- compiler   */
/* Xiang Cui (sean)                                 */
/* 230030782                                        */
/****************************************************/

#include "scanner.h"
#include "map.h"

map<string,TokenType> keywords;

Myscanner::Myscanner(const char *FileName)
{

    keywords["else"]=ELSE;
    keywords["if"]=IF;
    keywords["int"]=INT;
    keywords["return"]=RETURN;
    keywords["void"]=VOID;
    keywords["while"]=WHILE;
    sf.open(FileName);
    if (!sf)
    {
        cout<<"File "<<FileName<<" not found"<<endl;
        exit(1);
    }
    save=true;
    state=START;
    tokens="";
    currentToken=ID;
    lineno=1;
}

TokenType Myscanner::scan() //sf is a ifstream object,C- sourcefile.

{

    tokens="";
    state=START;
    bool flag=true;
    //ofstream listingfile("listing.mns");
    //cout<<endl<<"C- COMPILATION: "<<sfn<<endl;
    //cout<<"COMPILATION START..."<<endl;
    while (state != DONE)
    {
        /* if (sf.eof())
         {
             currentToken=ENDFILE;
             cout<<"COMPILATION OVER..."<<endl;
             tokens="EOF";
             state = DONE;
             return currentToken;
         }
         else
         {  */
        sf.get(c);   //get a character from sourcefile
        if (sf.eof())
        {
            currentToken=ENDFILE;
            tokens="EOF";
            state = DONE;
            return currentToken;
        }
        if (c=='\n') //if current character is '\n',# of Line+1
        {lineno++;}

        save = true;
        switch (state)
        {
        case START:
            if (isdigit(c))//if current char is a digit,change state to INNUM
                state = INNUM;
            else if (isalpha(c))//if current char is a letter,change state to INID
                state = INID;
            //if current char is '=',change state to INEQ,need further recognization to
            //determine if it is a EQ token or a ASSIGN token
            else if (c == '=')
            {   // either ASSIGN or EQ
                tokens="=";
                sf.get(c);
                if ( c == '=' )
                {
                    currentToken = EQ;

                }
                else
                {

                    if(c=='\n') lineno--;

                    currentToken = ASSIGN;
                    // backup in the input
                    sf.unget();
                    c='\0';
                }
                state = DONE;
            }
            //if current char is '<',change state to INLT,need further recognization to
            //determine if it is a LT token or a LTEQ token
            else if (c == '<')
                state = INLT;
            //if current char is '>',change state to INGT,need further recognization to
            //determine if it is a GT token or a GTEQ token
            else if (c == '>')
                state = INGT;
            //if current char is '!',change state to INNEQ,need further recognization to
            //determine if it is a NEQ token or a ERROR token
            else if (c == '!')
                state = INNEQ;
            //if current char is '/',change state to INSLASH,need further recognization
            //to determine if it will be a LCMNT token or a DIV token
            else if (c == '/')

                state = INSLASH;
            //consider the whitespace characters
            else if ((c == ' ') || (c == '\t') || (c == '\n')||(c==13))

                save = false;
            else
            {
                //if not a multicharaters token,it's must be a single character one
                state = DONE;
                //recognize various single character tokens
                switch (c)
                {
                case EOF:
                    save = false;
                    currentToken = ENDFILE;

                    break;
                case '+':
                    currentToken = PLUS;
                    break;
                case '-':
                    currentToken = MINUS;
                    break;
                case '*':
                    currentToken = TIMES;
                    break;
                case '(':
                    currentToken = LPAREN;
                    break;
                case ')':
                    currentToken = RPAREN;
                    break;
                case ';':
                    currentToken = SEMI;
                    break;
                case ',':
                    currentToken = COMMA;
                    break;
                case '[':
                    currentToken = LSQR;
                    break;
                case ']':
                    currentToken = RSQR;
                    break;
                case '{':
                    currentToken = LCRLY;
                    break;
                case '}':
                    currentToken = RCRLY;
                    break;
                default:
                    currentToken = ERROR;
                    break;
                }
            }
            break;
            //deal with double characters tokens
        case INLCMNT:
            save = false;

            if (c == EOF)
            {
                state = DONE;
                currentToken = ENDFILE;
            }
            //if current state is in INLCMNT and input character
            //is '*',change state to INRCMNT,

            else if (c == '*') {state = INRCMNT;/* if(flag) {cout<<"/* ";flag=false;}*/}
            break;
        case INRCMNT:

            save = false;
            if (c == EOF)
            {
                state = DONE;
                currentToken = ENDFILE;
            }
            //if current state is in INRCMNT and input character
            //is '/',change state to START(comments is end),

            else if (c == '/') {save=false;state = DONE;currentToken = RCMNT;flag=false;}
            else state=INLCMNT;
            break;
        case INEQ:
            state = DONE;

            //if current state is INEQ and input character
            //is '=',currentToken =EQ
            if (c == '=')
                currentToken =EQ;
            else
            {   if(c=='\n') lineno--;
                sf.unget();
                c='\0';
                currentToken =ASSIGN;//otherwise it's a sort of assignment
            }
            break;
        case INNEQ:
            state = DONE;
            //if current state is INNEQ and input character is '=',
            //currentToken =NEQ
            if (c == '=')
                currentToken =NEQ;
            else
            {   if(c=='\n') lineno--;
                sf.unget();
                c='\0';
                currentToken =ERROR; //otherwise EORROR occured
            }
            break;
            //if current state is INLT and input character
            //is '=',currentToken =LTEQ
        case INLT:
            state = DONE;
            if (c == '=')
                currentToken =LTEQ;
            else
            {   if(c=='\n') lineno--;
                sf.unget();
                c='\0';
                currentToken =LT; //otherwise it's a lessthan token
            }
            break;
            //if current state is INGT and input character is '=',currentToken =GTEQ
        case INGT:
            state = DONE;
            if (c == '=')
                currentToken =GTEQ;
            else
            {   if(c=='\n') lineno--;
                sf.unget();
                c='\0';
                currentToken = GT; //otherwise it's a ">=" token
            }
            break;
            //if current state is INGT and input character
            //is '*',currentToken ='/*' ,and change state to INLCMNT
        case INSLASH:
            //!!!!!state = DONE;

            if (c == '*')
            {
                currentToken =LCMNT;
                state= INLCMNT;
                save=false;
            }
            else
            {   if(c=='\n') lineno--;
                sf.unget();
                c='\0';

                currentToken=DIV;//otherwise it's a "/" token
                state=DONE;
            }
            break;
            //if current state is INNUM and input character isn't
            //a digit,currentToken is a number ,change state to
            //DONE,back up current char as well
        case INNUM:

            if(isalpha(c))
            {
                currentToken = ERROR;

                state = ERR;

            }

            if (!isalpha(c)&&!isdigit(c))
            {   if(c=='\n') lineno--;
                sf.unget();
                c='\0';
                save = false;
                state = DONE;
                currentToken = NUM;

            }
            break;
            //if current state is INID and input character
            //isn't a letter,currentToken is a ID ,change
            //state to DONE,back up current char as well
        case INID:

            if(isdigit(c))
            {

                currentToken = ERROR;
                state = ERR;
            }
            if (!isalpha(c)&&!isdigit(c))
            {   if(c=='\n') lineno--;
                sf.unget();
                c='\0';
                save = false;
                state = DONE;
                currentToken = ID;
            }
            break;

        case ERR:

            if (!isalpha(c)&&!isdigit(c))
            {

                sf.unget();

                c='\0';
                state = DONE;
            }
            break;
        case DONE:
        default:
            state = DONE;
            currentToken = ERROR;
            break;
        }
        if (save)
            tokens =tokens+c;
        if ((state==DONE))
        {
            if (currentToken == ID)
                currentToken = reservedLookup(tokens);
            flag=false;
            cout<<"Line:"<<lineno<<" ";
            //printToken(currentToken,tokens);
            //state=START;
        if(currentToken== RCMNT){scan();}
            flag=true;
        }
    }
    //}
    return currentToken;
}




void Myscanner::printToken(TokenType token, string tokenString)
{


    switch (token)
    {
    case ELSE:
    case INT:
    case IF:
    case RETURN:
    case VOID:
    case WHILE:
        cout<<"reserved word: "<<tokenString<<endl;break;
    case ASSIGN: cout<<"="<<endl; break;
    case LT: cout<<"<"<<endl; break;
    case EQ: cout<<"=="<<endl; break;
    case LPAREN: cout<<"("<<endl; break;
    case RPAREN: cout<<")"<<endl; break;
    case SEMI: cout<<";"<<endl; break;
    case PLUS: cout<<"+"<<endl; break;
    case MINUS:cout<<"-"<<endl; break;
    case TIMES: cout<<"*"<<endl; break;

    case GT: cout<<">"<<endl; break;
    case DIV: cout<<"/"<<endl; break;
    case LTEQ: cout<<"<="<<endl; break;
    case GTEQ: cout<<">="<<endl; break;
    case NEQ: cout<<"!="<<endl; break;
    case LSQR: cout<<"["<<endl; break;
    case RSQR: cout<<"]"<<endl; break;
    case LCRLY: cout<<"{"<<endl; break;
    case RCRLY: cout<<"}"<<endl; break;
    case LCMNT: cout<<"/*"<<endl; break;
    case RCMNT: cout<<"*/"<<endl; break;
    case COMMA: cout<<","<<endl; break;

    case ENDFILE: cout<<"EOF"<<endl; break;
    case NUM:
        cout<<"NUM, val="<<tokenString<<endl;
        break;
    case ID:
        cout<<"ID, name="<<tokenString<<endl;
        break;
    case ERROR:
        cout<<"ERROR: "<<tokenString<<endl;
        break;
    default: /* should never happen */
        cout<<"ERROR: should never happen"<<endl;
    }
}

TokenType Myscanner::reservedLookup (string str)
{
    if (keywords.find(str)!=keywords.end())
    {
        return keywords[str];
    }
    return ID;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -