📄 scanner.cpp

📁 使用C++实现的Java语言子集词法、语法、语义分析器
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
            case 'b':
                PutChar('\b');
                ScanChar();
                break;

            case 't':
                PutChar('\t');
                ScanChar();
                break;

            case 'n':
                PutChar('\n');
                ScanChar();
                break;

            case 'f':
                PutChar('\f');
                ScanChar();
                break;

            case 'r':
                PutChar('\r');
                ScanChar();
                break;

            case '\'':
                PutChar('\'');
                ScanChar();
                break;

            case '\"':
                PutChar('\"');
                ScanChar();
                break;

            case '\\':
                PutChar('\\');
                ScanChar();
                break;

            default:
                tmpPos.Set(line, col);
                LexError(tmpPos, "Illegal esc character");
                break;
            }
        }
    }
    else if (bp != buflen)
    {
        PutChar(ch);
        ScanChar();
    }
}

void Scanner::ScanOperator()
{
    while (true)
    {
        PutChar(ch);

        strncpy(name, sbuf, sp);
        name[sp] = '\0';

        if (NameToKey(name) == P_IDENTIFIER)
        {
            sp--;
            break;
        }
        token = NameToKey(name);

        ScanChar();
        if (!IsSpecial(ch))
        {
            break;
        }
    }
}

void Scanner::ScanFraction()
{
    while (Digit(10) >= 0)
    {
        PutChar(ch);
        ScanChar();
    }
    int sp1 = sp;
    if (ch == 'e' || ch == 'E')
    {
        PutChar(ch);
        ScanChar();
        if (ch == '+' || ch == '-')
        {
            PutChar(ch);
            ScanChar();
        }
        if ('0' <= ch && ch <= '9')
        {
            do
            {
                PutChar(ch);
                ScanChar();
            }
            while ('0' <= ch && ch <= '9');
            return;
        }
        LexError("Illegal float number");
        sp = sp1;
    }
}

void Scanner::ScanFractionAndSuffix() {
    ScanFraction();
    if (ch == 'f' || ch == 'F')
    {
        PutChar(ch);
        ScanChar();
        token = P_FLOATLITERAL;
    }
    else
    {
        if (ch == 'd' || ch == 'D')
        {
            PutChar(ch);
            ScanChar();
        }
        token = P_DOUBLELITERAL;
    }
}

void Scanner::ScanCommentChar()
{
    ScanChar();
    if (ch == '\\')
    {
        if (buf[bp + 1] == '\\')
        {
            bp++;
            col++;
        }
    }
}

void Scanner::ScanIdent()
{
    do
    {
        if (sp == MAX_FILE)
        {
            PutChar(ch);
        }
        else
        {
            sbuf[sp++] = ch;
        }
        ScanChar();
        switch (ch)
        {
        case 'A':

        case 'B':

        case 'C':

        case 'D':

        case 'E':

        case 'F':

        case 'G':

        case 'H':

        case 'I':

        case 'J':

        case 'K':

        case 'L':

        case 'M':

        case 'N':

        case 'O':

        case 'P':

        case 'Q':

        case 'R':

        case 'S':

        case 'T':

        case 'U':

        case 'V':

        case 'W':

        case 'X':

        case 'Y':

        case 'Z':

        case 'a':

        case 'b':

        case 'c':

        case 'd':

        case 'e':

        case 'f':

        case 'g':

        case 'h':

        case 'i':

        case 'j':

        case 'k':

        case 'l':

        case 'm':

        case 'n':

        case 'o':

        case 'p':

        case 'q':

        case 'r':

        case 's':

        case 't':

        case 'u':

        case 'v':

        case 'w':

        case 'x':

        case 'y':

        case 'z':

        case '$':

        case '_':

        case '0':

        case '1':

        case '2':

        case '3':

        case '4':

        case '5':

        case '6':

        case '7':

        case '8':

        case '9':
            break;

        default:
            if (!IsJavaIdentifierPart(ch) || bp >= buflen)
            {
                strncpy(name, sbuf, sp);
                name[sp] = '\0';
                token = NameToKey(name);
                return;
            }

        }
    }
    while (true);
}

void Scanner::SkipComment()
{
    while (bp < buflen)
    {
        switch (ch)
        {
        case '*':
            ScanChar();
            if (ch == '/')
            {
                return;
            }
            break;

        default:
            ScanCommentChar();
            break;
        }
    }
}

int Scanner::Digit(int base)
{
    char c = ch;
    int result;

    if (c >= '0' && c <= '7')
    {
        result = (int)(c - '0');
    }
    else if (c >= '8' && c <= '9')
    {
        if (base > 8)
        {
            result = (int)(c - '0');
        }
        else
        {
            result = -1;
        }
    }
    else if (c >= 'a' && c <= 'f')
    {
        if (base > 10)
        {
            result = (int)(c - 'a' + 10);
        }
        else
        {
            result = -1;
        }
    }
    else if (c >= 'A' && c <= 'F')
    {
        if (base > 10)
        {
            result = (int)(c - 'A' + 10);
        }
        else
        {
            result = -1;
        }
    }
    else
    {
        result = -1;
    }

    if (result >= 0 && c < 0)
    {
        tmpPos.Set(pos.line, pos.col + 1);
        LexError(tmpPos, "Non-ASCII digit");
    }
    return result;
}

bool Scanner::IsJavaIdentifierPart(const char & ch)
{
    return
    (
        (ch >= '0' && ch <= '9') ||
        (ch >= 'A' && ch <= 'Z') ||
        (ch >= 'a' && ch <= 'z') ||
        ch == '_' ||
        ch == '$'
    );
}

bool Scanner::IsJavaIdentifierStart(const char & ch)
{
    return
    (
        (ch >= 'A' && ch <= 'Z') ||
        (ch >= 'a' && ch <= 'z') ||
        ch == '_' ||
        ch == '$'
    );
}

bool Scanner::IsSpecial(const char & ch)
{
    switch (ch)
    {
    case '!':

    case '%':

    case '&':

    case '*':

    case '?':

    case '+':

    case '-':

    case ':':

    case '<':

    case '=':

    case '>':

    case '^':

    case '|':

    case '~':
        return true;

    default:
        return false;
    }
}

void Scanner::LexError(const Position & pos, const char * msg, const char * arg)
{
    token = P_ERROR;
    errFile << "Error: " << msg << endl;
    if (arg != NULL)
    {
        errFile << "    Content: " << arg << endl;
    }
    if (pos.line > 0 && pos.col > 0)
    {
        errFile << "    At line " << pos.line << ", column " << pos.col << endl;
    }
}

void Scanner::LexError(const Position & pos, const char * msg)
{
    LexError(pos, msg, NULL);
}

void Scanner::LexError(const char * msg)
{
    LexError(this->pos, msg, NULL);
}

void Scanner::LexError(const char * msg, const char * arg)
{
    LexError(this->pos, msg, arg);
}

void Scanner::OutputToken()
{
    int type = DetailToType(token);
    if (type == 0)
    {
        count_line--;
        count_file--;
        return;
    }
    outFile << setw(2) << dec << count_line << ": ";
    outFile << "(" << setw(3) << dec << pos.line << "," << setw(3) << dec << pos.col << ")-";
    outFile << "(" << setw(3) << dec << endPos.line << "," << setw(3) << dec << endPos.col << ") ";
    outFile << "(0x" << setw(3) << hex << type << ") ";
    outFile << "(" << setw(3) << dec << token << ") ";
    outFile << "[";
    int len = endPos.col - pos.col + 1;
    int start = bp - len;
    for (int j = start; j < start + len; j++)
    {
        outFile << buf[j];
    }
    outFile << "]";
    if (type >= T_ASSIGN && type <= T_BOUND)
    {
        outFile << "-[Priority:" << setw(2) << dec << GetPriority(type) << "]";
    }
    outFile << endl;
    //仅对一个子集进行语法、语义分析，给出语法、语义分析器所需的输入属性字文件
    if
    (
        token == P_WHILE ||
        token == P_IDENTIFIER ||
        token == P_INTLITERAL ||
        token == P_LPAREN ||
        token == P_RPAREN ||
        token == P_EQ ||
        token == P_GT ||
        token == P_LT ||
        token == P_PLUS ||
        token == P_SUB ||
        token == P_STAR ||
        token == P_SLASH ||
        token == P_SEMI
    )
    {
        lexFile << token << "\t" << pos.line << "\t" << pos.col << "\t";
        for (int j = start; j < start + len; j++)
        {
            lexFile << buf[j];
        }
        lexFile << endl;
    }
}

int Scanner::NameToKey(char * msg)
{
    for (int i = 0; i < TABLE_LENGTH; i++)
    {
        if (!strcmp(msg, ATTR_MAP[i].keyword))
        {
            return ATTR_MAP[i].detail;
        }
    }
    return P_IDENTIFIER;
}

int Scanner::DetailToType(int detail)
{
    for (int i = 0; i < TABLE_LENGTH; i++)
    {
        if (detail == ATTR_MAP[i].detail)
        {
            return ATTR_MAP[i].type;
        }
    }
    return 0;
}

int Scanner::GetPriority(int type)
{
    for (int i = 0; i < PRIORITY_LENGTH; i++)
    {
        if (type == PRI_MAP[i].type)
        {
            return PRI_MAP[i].priority;
        }
    }
    return 0;
}
上一页 12
💿 文件大小 1476 K
👤 上传用户 sun337146987
📂 所属分类 Java编程
🏷️ 相关标签

#Java #语言 #分析器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -