scanner.cpp

来自「使用C++实现的Java语言子集词法、语法、语义分析器」· C++ 代码 · 共 1,113 行 · 第 1/2 页
CPP
1,113 行
#include "scanner.h"

Scanner::Scanner(char * fn_in, char * fn_out, char * fn_err, char * fn_lex)
{
    token = -1;
    count_line = 0;
    count_file = 0;

    //将源文件读入缓冲区
    int ptr = 0;
    inFile.open(fn_in);
    if (!inFile.is_open())
    {
        return;
    }
    while (inFile.get(buf[ptr]))
    {
        ptr++;
    }
    inFile.close();

    //创建输出和错误文件
    FILE * fp;
    fp = fopen(fn_out, "w");
    fclose(fp);
    fp = fopen(fn_err, "w");
    fclose(fp);
    fp = fopen(fn_lex, "w");
    fclose(fp);
    outFile.open(fn_out, ios::out | ios::app);
    errFile.open(fn_err, ios::out | ios::app);
    lexFile.open(fn_lex, ios::out | ios::app);

    buf[ptr] = C_EOI;
    buflen = ptr;
    line = 1;
    col = 0;
    bp = -1;
    endPos.Reset();

    ScanChar();
    NextToken();
}

Scanner::~Scanner()
{
    if (!outFile.is_open())
    {
        outFile.close();
    }
    if (!errFile.is_open())
    {
        errFile.close();
    }
    if (!lexFile.is_open())
    {
        lexFile.close();
    }
}

void Scanner::ScanChar()
{
    int oldcol;
    bp++;
    ch = buf[bp];
    switch (ch)
    {
    case C_CR:
        oldcol = col;
        col = 0;
        line++;
        break;
    case C_LF:
        if (bp == 0 || buf[bp - 1] != C_CR)
        {
            oldcol = col;
            col = 0;
            line++;
        }
        break;
    case C_TAB:
        col = (col / TabInc * TabInc) + TabInc;
        break;
    default:
        col++;
        break;
    }
    endPos.Set(line, col - 1);
    if (endPos.col == -1)
    {
        endPos.Set(line - 1, oldcol);
    }
}

void Scanner::ScanNumber(int radix)
{
    this->radix = radix;
    int digitRadix = (radix <= 10) ? 10 : 16;
    while (Digit(digitRadix) >= 0)
    {
        PutChar(ch);
        ScanChar();
    }
    if (radix <= 10 && ch == '.')
    {
        PutChar(ch);
        ScanChar();
        ScanFractionAndSuffix();
    }
    else if
    (
        radix <= 10 &&
        (
            ch == 'e' || ch == 'E' ||
            ch == 'f' || ch == 'F' ||
            ch == 'd' || ch == 'D'
        )
    )
    {
        ScanFractionAndSuffix();
    }
    else
    {
        if (ch == 'l' || ch == 'L')
        {
            ScanChar();
            token = P_LONGLITERAL;
        }
        else
        {
            token = P_INTLITERAL;
        }
    }
}

void Scanner::PutChar(char ch)
{
    sbuf[sp++] = ch;
}

void Scanner::NextToken()
{
    int start;
    memset(sbuf, '\0', MAX_FILE);
    sp = 0;
    count_line++;
    count_file++;

    while (true)
    {
        int oldline = pos.line;
        pos.Set(line, col);
        if (pos.line > oldline)
        {
            count_line = 1;
        }
        start = bp;
        switch (ch)
        {
        case ' ':

        case C_TAB:

        case C_FF:

        case C_CR:

        case C_LF:
            ScanChar();
            break;

        case 'A':

        case 'B':

        case 'C':

        case 'D':

        case 'E':

        case 'F':

        case 'G':

        case 'H':

        case 'I':

        case 'J':

        case 'K':

        case 'L':

        case 'M':

        case 'N':

        case 'O':

        case 'P':

        case 'Q':

        case 'R':

        case 'S':

        case 'T':

        case 'U':

        case 'V':

        case 'W':

        case 'X':

        case 'Y':

        case 'Z':

        case 'a':

        case 'b':

        case 'c':

        case 'd':

        case 'e':

        case 'f':

        case 'g':

        case 'h':

        case 'i':

        case 'j':

        case 'k':

        case 'l':

        case 'm':

        case 'n':

        case 'o':

        case 'p':

        case 'q':

        case 'r':

        case 's':

        case 't':

        case 'u':

        case 'v':

        case 'w':

        case 'x':

        case 'y':

        case 'z':

        case '$':

        case '_':
            ScanIdent();
            return;

        case '0':
            ScanChar();
            if (ch == 'x' || ch == 'X')
            {
                ScanChar();
                if (Digit(16) < 0)
                {
                    LexError("Invalid hexadecimal number");
                }
                ScanNumber(16);
            }
            else
            {
                PutChar('0');
                ScanNumber(8);
            }
            return;

        case '1':

        case '2':

        case '3':

        case '4':

        case '5':

        case '6':

        case '7':

        case '8':

        case '9':
            ScanNumber(10);
            return;

        case '.':
            ScanChar();
            if ('0' <= ch && ch <= '9')
            {
                PutChar('.');
                ScanFractionAndSuffix();
            }
            else
            {
                token = P_DOT;
            }
            return;

        case ',':
            ScanChar();
            token = P_COMMA;
            return;

        case ';':
            ScanChar();
            token = P_SEMI;
            return;

        case '(':
            ScanChar();
            token = P_LPAREN;
            return;

        case ')':
            ScanChar();
            token = P_RPAREN;
            return;

        case '[':
            ScanChar();
            token = P_LBRACKET;
            return;

        case ']':
            ScanChar();
            token = P_RBRACKET;
            return;

        case '{':
            ScanChar();
            token = P_LBRACE;
            return;

        case '}':
            ScanChar();
            token = P_RBRACE;
            return;

        case '/':
            ScanChar();
            if (ch == '/')
            {
                do
                {
                    ScanCommentChar();
                }
                while (ch != C_CR && ch != C_LF && bp < buflen);
                break;
            }
            else if (ch == '*')
            {
                ScanChar();
                SkipComment();
                if (ch == '/')
                {
                    ScanChar();
                    break;
                }
                else
                {
                    LexError("Unclosed comment");
                    return;
                }
            }
            else if (ch == '=')
            {
                strcpy(name, "/=");
                token = P_SLASHEQ;
                ScanChar();
            }
            else
            {
                strcpy(name, "/");
                token = P_SLASH;
            }
            return;

        case '\'':
            ScanChar();
            if (ch == '\'')
            {
                LexError("Empty character");
            }
            else
            {
                if (ch == C_CR || ch == C_LF)
                {
                    LexError(pos, "Illegal line end in character");
                }
                ScanLitChar();
                if (ch == '\'')
                {
                    ScanChar();
                    token = P_CHARLITERAL;
                }
                else
                {
                    LexError(pos, "Unclosed character quote mark");
                }
            }
            return;

        case '\"':
            ScanChar();
            while (ch != '\"' && ch != C_CR && ch != C_LF && bp < buflen)
            {
                ScanLitChar();
            }
            if (ch == '\"')
            {
                token = P_STRINGLITERAL;
                ScanChar();
            }
            else
            {
                LexError(pos, "Unclosed string quote mark");
            }
            return;

        default:
            if (IsSpecial(ch))
            {
                ScanOperator();
            }
            else if (IsJavaIdentifierStart(ch))
            {
                ScanIdent();
            }
            else if (bp == buflen || ch == C_EOI && bp + 1 == buflen)
            {
                token = P_EOF;
            }
            else
            {
                LexError("Illegal character");
                ScanChar();
            }
            return;
        }
    }
}

void Scanner::ScanLitChar()
{
    if (ch == '\\')
    {
        if (buf[bp + 1] == '\\')
        {
            bp++;
            col++;
            PutChar('\\');
            ScanChar();
        }
        else
        {
            ScanChar();
            char leadch = ch;
            int oct = Digit(8);
            int hex = 0;
            switch (ch)
            {
            case '0':

            case '1':

            case '2':

            case '3':

            case '4':

            case '5':

            case '6':

            case '7':
                leadch = ch;
                oct = Digit(8);
                ScanChar();
                if ('0' <= ch && ch <= '7')
                {
                    oct = oct * 8 + Digit(8);
                    ScanChar();
                    if (leadch <= '3' && '0' <= ch && ch <= '7')
                    {
                        oct = oct * 8 + Digit(8);
                        ScanChar();
                    }
                }
                PutChar((char)oct);
                break;

            case 'u':
                ScanChar();
                if (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F'))
                {
                    hex = hex * 16 + Digit(16);
                    ScanChar();
                    if (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F'))
                    {
                        hex = hex * 16 + Digit(16);
                        ScanChar();
                        if (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F'))
                        {
                            hex = hex * 16 + Digit(16);
                            ScanChar();
                            if (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F'))
                            {
                                hex = hex * 16 + Digit(16);
                                ScanChar();
                            }
                        }
                    }
                }
                else
                {
                    tmpPos.Set(line, col);
                    LexError(tmpPos, "Illegal unicode character");
                    break;
                }
                PutChar((char)hex);
                break;
scanner.cpp - 源码说明

本页面展示了「使用C++实现的Java语言子集词法、语法、语义分析器」中的 scanner.cpp 源码文件，采用 C++ 编程语言编写，共 1,113 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?