📄 scanner.cpp
字号:
//Head files
#include "stdio.h"
#include "math.h"
#include "string.h"
#include "stdlib.h"
#include "ctype.h"
#define INTEGER 1
#define REAL 2
#define IDENTIFIER 3
#define RELOP 4
#define ADDOP 5
#define MULOP 6
#define ASSIGNOP 7
#define SSIZE 1000
#define CODELEN 31
//Tables
struct
{
char lexeme[10];
int code;
} CODE[CODELEN]=
{
//Key words
"array", 0,
"begin", 1,
"do", 2,
"else", 3,
"end", 4,
"function", 5,
"if", 6,
"integer", 7,
"of", 8,
"procedure", 9,
"program", 10,
"real", 11,
"then", 12,
"var", 13,
"while", 14,
//Operator
"addop", 15,
"assignop", 16,
"mulop", 17,
"not", 18,//Also keyword
"relop", 19,
//Identifier
"id", 20,
"num", 21,
//Sign
":", 22,
"(", 23,
")", 24,
"[", 25,
"]", 26,
",", 27,
";", 28,
",", 29,
".", 30
};
char KEYWORD[][10] =
{
"array",
"begin",
"do",
"else",
"end",
"function",
"if",
"integer",
"not",
"procedure",
"program",
"real",
"then",
"var",
"while"
};
//Data structions
struct SYMBOL
{
union
{
struct
{
int HEAD;
int LENG;
} NAME;
union
{
int intVAL;
double dblVAL;
} VAL;
} CONTENT;
int KIND;
int ADDR;
} symbol[500];
struct TOKEN
{
int CODE;
int ENTRY;
};
FILE *fpSource, *fpToken, *fpSymbol;
int line, length, end;
char string[SSIZE]={""};
void SORT(char);
void RECOGID(char, TOKEN &);
void HANDLECOM(char, TOKEN &);
void RECOGDIG(char, TOKEN &);
void RECOGDEL(char, TOKEN &);
bool Islegal(char);
bool IsKeyWord(char *);
int KindNum(char *);
int LookUp(char *, int);
int LookUp(int, int);
int LookUp(double, int);
void PrintToken();
void main(int argc, char *argv[])
{
char ch;
//if(argc != 2)
//{
// printf("Usage:scanner source_file\n");
// return 1;
//}
fpSource = fopen("test.txt", "r");
fpToken = fopen("TOKEN.TXT","w");
fpSymb = fopen("SYMB.TXT","w");
fpSet = fopen("SET.TXT","w");
line = 1;
length = 0;
end = 0;
while(!feof(fpSource))
{
ch = getc(fpSource);
if(ch == ' ')
continue;
else if(ch == EOF)
break;
else if(ch != '\n')
SORT(ch);
else
line++;
}
fprintf(fpSet, "%s", string);
fclose(fpSource);
fclose(fpToken);
fclose(fpSymb);
fclose(fpSet);
PrintToken();
getchar();
}
void SORT(char ch)
{
TOKEN token;
if(isalpha(ch))
RECOGID(ch, token);
else if(ch == '{')
HANDLECOM(ch, token);
else if(isdigit(ch))
RECOGDIG(ch, token);
else
RECOGDEL(ch, token);
if(token.CODE != -1)
fwrite(&token, sizeof(TOKEN), 1, fpToken);
}
void RECOGID(char ch, TOKEN &token)
{
char WORD[10];
int index;
bool legal;
legal = false;
index = 0;
WORD[index++] = ch;
while(index < 10)
{
ch = getc(fpSource);
if(isalnum(ch))
WORD[index++] = ch;
else if(ch == EOF)
break;
else if(Islegal(ch))
{
printf("ERROR::legal symbol %c in Line %d\n", ch, line);
token.CODE = -1;
return;
}
else
{
fseek(fpSource, -1L, SEEK_CUR);
break;
}
}
if(index == 10)
{
while(true)
{
ch = getc(fpSource);
if(!Islegal(ch))
{
fseek(fpSource, -1L, SEEK_CUR);
break;
}
}
printf("WARNING::Identifier in Line %d is too long\n", line);
token.CODE = -1;
return;
}
WORD[index] = '\0';
if(IsKeyWord(WORD))
{
token.CODE = KindNum(WORD);
token.ENTRY = -1;
}
else
{
token.CODE = KindNum("id");
token.ENTRY = LookUp(WORD, IDENTIFIER);
}
}
void HANDLECOM(char ch, TOKEN &token)
{
while((ch=getc(fpSource)) != '}')
{
if(ch == EOF)
printf("ERROR::Comments in Line %d is not complete\n", line);
}
token.CODE = -1;
}
void RECOGDIG(char ch, TOKEN &token)
{
int intVal;
double dblVal=0.0;
bool real;
int exp;
intVal = ch-'0';
exp = 0;
real = false;
while(true)
{
ch = getc(fpSource);
if(ch == EOF)
break;
if(isdigit(ch) && !real)
intVal = intVal*10 + (ch-'0');
else if(isdigit(ch) && real)
{
double temp = pow(10.0, exp--);
dblVal += (double)(ch-'0') * temp;
}
else if(ch == '.')
{
real = true;
dblVal = (double)intVal;
exp--;
}
else
{
if(exp == -1)
{
printf("ERROR::Real number is not complete in Line %d\n", line);
token.CODE = -1;
return;
}
fseek(fpSource, -1L, SEEK_CUR);
break;
}
}
token.CODE = KindNum("num");
real ? token.ENTRY = LookUp(dblVal, REAL) : token.ENTRY = LookUp(intVal, INTEGER);
}
void RECOGDEL(char ch, TOKEN &token)
{
switch(ch)
{
case '=':
token.CODE = KindNum("relop");
token.ENTRY = LookUp("=", RELOP);
break;
case '<':
ch = getc(fpSource);
if(ch == '=')
{
token.ENTRY = LookUp("<=", RELOP);
}
else if(ch == '>')
token.ENTRY = LookUp("<>", RELOP);
else
{
token.ENTRY = LookUp("<", RELOP);
fseek(fpSource, -1L, SEEK_CUR);
}
token.CODE = KindNum("relop");
break;
case '>':
ch = getc(fpSource);
if(ch == '=')
token.ENTRY = LookUp(">=", RELOP);
else
{
token.ENTRY = LookUp(">", RELOP);
fseek(fpSource, -1L, SEEK_CUR);
}
token.CODE = KindNum("relop");
break;
case '+':
token.CODE = KindNum("addop");
token.ENTRY = LookUp("+", ADDOP);
break;
case '-':
token.CODE = KindNum("addop");
token.ENTRY = LookUp("-", ADDOP);
break;
case '|':
token.CODE = KindNum("addop");
token.ENTRY = LookUp("|", ADDOP);
break;
case '*':
token.CODE = KindNum("mulop");
token.ENTRY = LookUp("*", MULOP);
break;
case '/':
token.CODE = KindNum("mulop");
token.ENTRY = LookUp("/", MULOP);;
break;
case '%':
token.CODE = KindNum("mulop");
token.ENTRY = LookUp("%", MULOP);;
break;
case '&':
token.CODE = KindNum("addop");
token.ENTRY = LookUp("&", MULOP);;
break;
case ':':
ch = getc(fpSource);
if(ch == '=')
{
token.CODE = KindNum("assignop");
token.ENTRY = LookUp(":=", ASSIGNOP);
}
else
{
token.CODE = KindNum(":");
token.ENTRY = -1;
fseek(fpSource, -1L, SEEK_CUR);
}
break;
case '(':
token.CODE = KindNum("(");
token.ENTRY = -1;
break;
case ')':
token.CODE = KindNum(")");
token.ENTRY = -1;
break;
case '[':
token.CODE = KindNum("[");
token.ENTRY = -1;
break;
case ']':
token.CODE = KindNum("]");
token.ENTRY = -1;
break;
case ',':
token.CODE = KindNum(",");
token.ENTRY = -1;
break;
case ';':
token.CODE = KindNum(";");
token.ENTRY = -1;
break;
case '.':
token.CODE = KindNum(".");
token.ENTRY = -1;
break;
default:
printf("ERROR::legal symbol \'%c\' in Line %d\n", ch, line);
break;
}
}
int LookUp(char *WORD, int KIND)
{
int i, j, t, h;
char str[20];
for(i=0; i<length; i++)
{
t = symbol[i].CONTENT.NAME.LENG;
h = symbol[i].CONTENT.NAME.HEAD;
if(t == (int)strlen(WORD))
{
t = symbol[i].CONTENT.NAME.LENG;
h = symbol[i].CONTENT.NAME.HEAD;
for(int j=0; j<t; j++)
str[j] = string[h+j];
str[t]='\0';
if(!strcmp(WORD, str))
return i;
}
}
strcat(&string[end], WORD);
for(j=0; j<symbol[i].CONTENT.NAME.LENG; j++)
string[end+j] = WORD[j];
symbol[length].CONTENT.NAME.HEAD = end;
symbol[length].CONTENT.NAME.LENG = strlen(WORD);
symbol[length].KIND = KIND;
length++;
end += strlen(WORD);
fwrite(&symbol[i], sizeof(SYMBOL), 1,fpSymb);
return i;
}
int LookUp(int Val, int KIND)
{
int i;
for(i=0; i<length; i++)
if(symbol[i].KIND == INTEGER)
if(Val == symbol[i].CONTENT.VAL.intVAL)
return i;
symbol[length].CONTENT.VAL.intVAL = Val;
symbol[length].KIND = KIND;
length++;
fwrite(&symbol[i], sizeof(SYMBOL), 1,fpSymb);
return i;
}
int LookUp(double Val, int KIND)
{
int i;
for(i=0; i<length; i++)
if(symbol[i].KIND == REAL)
if(Val-symbol[i].CONTENT.VAL.dblVAL < 0.0000000001 && Val-symbol[i].CONTENT.VAL.dblVAL > -0.0000000001)
return i;
symbol[length].CONTENT.VAL.dblVAL = Val;
symbol[length].KIND = KIND;
length++;
fwrite(&symbol[i], sizeof(SYMBOL), 1,fpSymb);
return i;
}
int KindNum(char *str)
{
int i;
for(i=0; i<CODELEN; i++)
if(!strcmp(str, CODE[i].lexeme))
return CODE[i].code;
return -1;
}
bool IsKeyWord(char *WORD)
{
int i;
for(i=0; i<15; i++)
{
if(!strcmp(WORD, KEYWORD[i]))
{
return true;
}
}
return false;
}
bool Islegal(char ch)
{
switch(ch)
{
case '(':
case ')':
case '{':
case ':':
case ';':
case ',':
case '.':
case '=':
case '<':
case '>':
case '+':
case '-':
case '*':
case '%':
case '/':
case ' ':
case '|':
case '&':
case '\n':
return false;
break;
default:
return true;
break;
}
}
void PrintToken()
{
int t,h,i;
char str[20];
fpToken = fopen("TOKEN.TXT","r");
TOKEN token;
printf("TOKEN 文件:\n");
for(;!feof(fpToken);)
{
fread(&token, sizeof(TOKEN), 1, fpToken);
if(feof(fpToken))
break;
printf("CODE: %2d \tENTRY: %2d \t %s \n", token.CODE, token.ENTRY, CODE[token.CODE].lexeme);
}
printf("\n");
printf("符号表:\n");
for(i=0; i<length; i++)
{
switch(symbol[i].KIND)
{
case INTEGER:
printf("Value: %-10d\tTYPE: INTEGER\n", symbol[i].CONTENT.VAL.intVAL);
break;
case REAL:
printf("Value: %-10g\tTYPE: REAL\n", symbol[i].CONTENT.VAL.dblVAL);
break;
default:
t = symbol[i].CONTENT.NAME.LENG;
h = symbol[i].CONTENT.NAME.HEAD;
for(int j=0; j<t; j++)
str[j] = string[h+j];
str[t]='\0';
printf("Name : %-10s\tKIND: ", str);
switch(symbol[i].KIND)
{
case IDENTIFIER: printf("IDENTIFIER\n");break;
case RELOP: printf("RELATION OPERATOR\n");break;
case ADDOP: printf("ADDITION OPERATOR\n");break;
case MULOP: printf("MULTIPLICATION OPERATOR\n");break;
case ASSIGNOP: printf("ASSIGN OPERATOR\n");break;
}
break;
}
}
printf("\n");
rewind(fpSet);
printf("符号集:\n");
printf("%s\n",string);
printf("\n");
fclose(fpToken);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -