📄 compiler.cpp
字号:
/* compiler -- */
#include "flincl.h"
#include "stdio.h"
#include "shortstack.h"
#include "semantics.h"
#include "compiler.h"
#include "ctype.h"
#include "assert.h"
#include "string.h"
#define ctrace if (machine->trace_flag) trace
Compiler::Compiler()
{
}
bool Compiler::init(Machine_ptr m)
{
machine = m;
return semantics.init(machine, this);
}
void Compiler::expected_close_squote()
{
report_error("expected close single quote");
}
void Compiler::expected_close_quote()
{
report_error("expected close quote");
}
void Compiler::expected_char_const()
{
report_error("expected character constant");
}
void Compiler::expected_close_paren_or_formal()
{
report_error("expected close paren or formal parameter");
}
void Compiler::expected_close_paren()
{
report_error("expected close paren");
}
void Compiler::expected_close_bracket()
{
report_error("expected close bracket");
}
void Compiler::expected_equal()
{
report_error("expected equal");
}
void Compiler::expected_indent()
{
report_error("Compiler:: expected indent");
}
void Compiler::expected_newline()
{
report_error("expected newline");
}
void Compiler::expected_open_paren()
{
report_error("expected open paren");
}
void Compiler::expected_open_brace()
{
report_error("expected open brace");
}
void Compiler::expected_to()
{
report_error("expected to");
}
void Compiler::expected_token()
{
report_error("expected more");
}
void Compiler::expected_colon()
{
report_error("expected colon");
}
void Compiler::expected_comma()
{
report_error("expected comma");
}
void Compiler::expected_statement()
{
report_error("expected statement");
}
void Compiler::expected_method_or_field_selector()
{
report_error("expected method or field selector");
}
void Compiler::expected_decl()
{
report_error("expected declaration");
}
void Compiler::expected_hex_constant()
{
report_error("expected hexadecimal constant");
}
void Compiler::expected_oct_constant()
{
report_error("expected octal constant");
}
void Compiler::expected_number_constant()
{
report_error("expected number constant");
}
void Compiler::no_digit_in_hex()
{
report_error("no digits found in hexidecimal constant");
}
void Compiler::no_digit_in_exponent()
{
report_error("no digit found in exponent");
}
void Compiler::expected_id(char *msg)
{
char s[100];
strcpy(s, "expected class name");
strcat(s, msg);
report_error(s);
}
void Compiler::expected_variable()
{
report_error("expected variable name");
}
void Compiler::expected_expression()
{
report_error("expected expression");
}
void Compiler::indentation_error()
{
report_error("indentation error");
}
void Compiler::expected_equal_or_in()
{
report_error("expected = or in after for variable");
}
void Compiler::report_error(char *s)
{
char msg[128];
if (!token) {
sprintf(msg, "Error: %s", s);
} else {
if (token->typ == TOKEN_INDENT) strcpy(token->str, "[indentation]");
else if (token->typ == TOKEN_DEDENT) strcpy(token->str, "[dedentation]");
else if (token->typ == TOKEN_NEWLINE) strcpy(token->str, "[newline]");
sprintf(msg, "Error in line %d near \"%s\": %s",
line_number, token->str, s);
}
machine->error_msg(msg);
error_flag = true;
}
// get_token -- returns true if token is found
//
bool Compiler::get_token()
{
other_token();
if (unget_count) {
unget_count--;
// ctrace("Token: (ungot), %s\n", token->str);
return true;
}
token->typ = TOKEN_OTHER;
int i = 0;
int c;
if (dedent_count) {
token->typ = TOKEN_DEDENT;
dedent_count--;
token->str[0] = 0;
ctrace("Token: DEDENT\n");
return true;
}
while ((c = getc(inf)) != EOF) { // this loops over white space
i = 0;
token->str[i++] = c;
if (c == '#') {
while ((c = getc(inf)) != EOF && c != '\n') ;
if (c == '\n') ungetc(c, inf);
continue;
}
if (c == '\n') {
line_number++;
if (newline_flag) { // (user types from terminal)
if (interactive_mode) {
// blank line generates DEDENTs in interactive mode
while (0 < indentation[istack_top]) {
dedent_count++;
istack_top--;
}
if (0 != indentation[istack_top]) {
indentation_error();
}
if (dedent_count > 0) {
dedent_count--;
token->typ = TOKEN_DEDENT;
ctrace("Token: DEDENT");
} else {
continue; // ignore 2nd blank line
}
i = 0;
} else continue; // ignore blank lines
} else {
token->typ = TOKEN_NEWLINE;
newline_flag = true;
ctrace("Token: NEWLINE");
}
} else if (newline_flag) {
newline_flag = false;
int indent = 0;
while (isspace(c)) {
indent++;
if ((c = getc(inf)) == EOF) break;
}
ungetc(c, inf);
if (c == '#' || c == '\n') {
newline_flag = true;
continue; // ignore blank lines
}
// generate indent or dedent?
if (indent > indentation[istack_top]) {
token->typ = TOKEN_INDENT;
indentation[++istack_top] = indent;
i = 0; // take space out of token
ctrace("Token: INDENT");
} else if (indent < indentation[istack_top]) {
while (indent < indentation[istack_top]) {
dedent_count++;
istack_top--;
}
if (indent != indentation[istack_top]) {
indentation_error();
}
dedent_count--;
token->typ = TOKEN_DEDENT;
i = 0;
ctrace("Token: DEDENT");
} else { // no indentation, newline_flag is off
continue;
}
} else if (isspace(c)) {
continue; // ignore spaces after initial ones handled above
} else if (__iscsymf(c)) {
newline_flag = false;
/* get alphanumeric */
while ((c = getc(inf)) != EOF) {
if (__iscsym(c)) {
token->str[i++] = c;
} else {
ungetc(c, inf);
break;
}
}
token->typ = TOKEN_ID;
ctrace("Token: ID");
} else if (isdigit(c) || c == '-') {
char first_digit = 0;
newline_flag = 0; // index of first digit in string
if (c == '-') {
first_digit = 1;
}
/* get number */
while ((c = getc(inf)) != EOF) {
if (isdigit(c)) {
token->str[i++] = c;
} else {
break;
}
}
if (c == '.' || tolower(c) == 'e') {
token->typ = TOKEN_DOUBLE;
if (c == '.') token->str[i++] = c;
else ungetc(c, inf); // unget 'e'
while ((c = getc(inf)) != EOF) {
if (isdigit(c)) {
token->str[i++] = c;
} else {
ungetc(c, inf);
ctrace("Token: DOUBLE");
break;
}
}
// may be followed by exponent "e+006"
if ((c = getc(inf)) != EOF) {
if (tolower(c) == 'e') {
token->str[i++] = 'e';
if ((c = getc(inf)) != EOF) {
if ((c == '+') || (c == '-')) {
token->str[i++] = c;
} else {
ungetc(c, inf);
}
int expcount = 0;
while ((c = getc(inf)) != EOF) {
if (isdigit(c)) {
expcount++;
token->str[i++] = c;
} else {
ungetc(c, inf);
if (expcount == 0) {
token->str[i] = 0;
no_digit_in_exponent();
}
break;
}
}
}
}
}
} else if ((i == first_digit + 1) && // enough for "0x"
(token->str[first_digit] == '0') && // got the "0"
(tolower(c) == 'x')) { // got the "x"
token->typ = TOKEN_LONG;
token->str[i++] = c;
int digit_count = 0;
while ((c = getc(inf)) != EOF) {
if (isxdigit(c)) {
digit_count = 0;
token->str[i++] = c;
} else {
if (digit_count == 0) {
token->str[i] = 0;
no_digit_in_hex();
}
ungetc(c, inf);
ctrace("Token: LONG");
break;
}
}
} else {
ungetc(c, inf);
token->typ = TOKEN_LONG;
ctrace("Token: LONG");
}
} else if (c == '"') {
newline_flag = false;
// leave quotes in... i = 0; // skip the quote
int quote_count = 0;
token->typ = TOKEN_STRING;
while ((c = getc(inf)) != EOF) {
token->str[i++] = c;
if (c == '"') {
if (quote_count == 0) {
quote_count = 1;
} else /* quote_count == 1 */ {
i = i - 1; // erase second quote
quote_count = 0;
}
} else if (quote_count == 1) {
// we closed the quote
ungetc(c, inf);
i--;
ctrace("Token: STRING");
break;
} else if (c == '\\') { // escape character
c = getc(inf);
if (c == EOF) break;
token->str[i - 1] = c;
}
}
if (quote_count != 1) {
expected_close_quote();
} else {
i--; // back over the close quote
}
} else if (c == '\'') {
newline_flag = false;
// leave quotes in ... i = 0; // skip the quote
int quote_count = 0;
token->typ = TOKEN_SYMBOL;
while ((c = getc(inf)) != EOF) {
token->str[i++] = c;
if (c == '\'') {
if (quote_count == 0) {
quote_count = 1;
} else /* quote_count == 1 */ {
i = i - 1; // erase second quote
quote_count = 0;
}
} else if (quote_count == 1) {
// we closed the quote
ungetc(c, inf);
i--;
ctrace("Token: SYMBOL");
break;
} else if (c == '\\') { // escape character
c = getc(inf);
if (c == EOF) break;
token->str[i - 1] = c;
}
}
if (quote_count != 1) {
expected_close_quote();
} else {
i--; // back over the close quote
}
} else if (c == '>' || c == '<' || c == '!' || c == '=') {
newline_flag = false;
if ((c = getc(inf)) != EOF) {
if ((c == '=') ||
(token->str[i - 1] == '>' && c == '>') ||
(token->str[i - 1] == '<' && c == '<')) {
token->str[i++] = c;
} else ungetc(c, inf);
}
ctrace("Token: OTHER");
} else if (c == '*') {
newline_flag = false;
if ((c = getc(inf)) != EOF && (c == '*')) {
token->str[i++] = c;
} else ungetc(c, inf);
ctrace("Token: OTHER");
} else {
newline_flag = false;
ctrace("Token: OTHER");
}
token->str[i] = 0;
if (token->typ != TOKEN_NEWLINE) ctrace(", '%s'\n", token->str);
else ctrace(", '\\n'\n");
return !error_flag;
}
if (istack_top > 0) {
istack_top--;
token->typ = TOKEN_DEDENT;
token->str[0] = 0;
ctrace("Token: DEDENT\n");
return true;
}
ctrace("Token: none\n");
return false; // false and no error_flag means end of file
}
// unget_token -- back up to previous token
// (currently supports up to two levels of unget)
// This is more complex than I expected. token is a pointer
// to either token1 or token2, and unget_count tells how
// many levels we have ungotten. The key "trick" is that
// unget always changes token to the other buffer, and get
// always changes token back.
//
bool Compiler::unget_token()
{
assert(unget_count < 2);
// ctrace("unget_token\n");
unget_count++;
other_token();
return true;
}
// compile_file -- compile from opened file handle.
//
// interactive indicates that blank lines should terminate
// compilation of a top-level statement or declaration
// (normally blank lines are ignored, but this does not
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -