📄 lexicalanalysis.c
字号:
#include<stdio.h>
#include<ctype.h>
#include<string.h>
#include "GLOBAL.H"
#define STRMAX 3000
#define SYMMAX 300
FILE *fp;
char buffer[SIZE]; // 缓冲区
struct entry symtable[SYMMAX];
char lexemes[STRMAX];
int lastentry = 0;
char* forward; // 向前指针
char* lexeme_beginning; // 词素开始指针
char c;
int state = 0, start = 0;
int lexical_value = 0; // install_id() 和 install_num()返回的指针
char* token_beginning = 0; // 词素中首符号的索引
int lineno = 1;
int tokenval = NONE;
char lexbuf[BSIZE];
// 关键字数组,运行时插入到符号表中
struct entry keywords[] =
{
"char",CHAR,
"if",IF,
"int",INT,
"long",LONG,
"return",RETURN,
"const",CONST,
"case",CASE,
"continue",CONTINUE,
"default",DEFAULT,
"do",DO,
"double",DOUBLE,
"sizeof",SIZEOF,
"static",STATIC,
"struct",STRUCT,
"switch",SWITCH,
"typedef",TYPEDEF,
"union",UNION,
"unsigned",UNSIGNED,
"void",VOID,
"volatile",VOLATILE,
"while",WHILE,
"else",ELSE,
"enum",ENUM,
"extern",EXTERN,
"float",FLOAT,
"for",FOR,
"goto",GOTO,
"short",SHORT,
"signed",SIGNED,
"break",BREAK,
"auto",AUTO,
"register",REGISTER,
"no",NONE
};
// 出错记录函数
void error(char* m)
{
printf(" LINE %d: %s\n", lineno, m);
}
/* 将标识符插入符号表中 */
int insert(char s[], int tok)
{
int len,lastchar=-1;
len = strlen(s);
if(lastentry + 1 >= SYMMAX) error("symtable full");
if(lastchar + len + 1 >= STRMAX) error("lexemes array full");
lastentry++;
symtable[lastentry].token = tok;
symtable[lastentry].lexptr = &lexemes[lastchar+1];
lastchar = lastchar+len+1;
strcpy(symtable[lastentry].lexptr, s);
return lastentry;
}
/* 将关键字填入符号表 */
void init()
{
struct entry *p;
for(p = keywords; p->token != NONE; p++)
{
insert(p->lexptr, p->token);
}
}
/* 填充缓冲区 */
void filetobuffer()
{
fread(buffer,1,SIZE-1,fp);
buffer[SIZE-1]='\0';
}
/* 打印缓冲区 */
void printbuffer()
{
printf("-----------------------The buffer is:-----------------------\n%s\n",buffer);
printf("------------------------------------------------------------");
}
/* 返回超前扫描字符 */
char nextchar()
{
if(forward == NULL) /* 刚开始扫描 */
{
forward = buffer;
}
else forward++;
if(*forward == '\x0')
{
if( forward == &(buffer[SIZE - 1]) )
{
fread(buffer,1,SIZE/2 - 1,fp);
buffer[SIZE/2 - 1] = '\x0';
forward = buffer;
return *forward;
}
else return '\x0'; // 文件已经处理完了
}
return *forward;
}
/* 打印符号表 */
void printsymtable()
{
int index;
printf("\nThe symtable is:\n");
printf("lexptr\ttoken");
for(index = 1;symtable[index].lexptr != NULL;index++ )
{
printf("\n%s\t%d",symtable[index].lexptr,symtable[index].token);
}
}
/* 试图修复程序错误 */
void recover()
{
c = nextchar();
while(c != '\n')
{
c = nextchar();
if(c == '\x0')
{
return;
}
}
lineno++;
lexeme_beginning = forward + 1;
state = 0;
}
/* 回退amount个位置 */
void retract(int amount)
{ if(*forward != '\x0')
{
forward -= amount;
}
}
/* 查找符号表中是否已记录s标识符 */
int lookup(s)
char s[];
{
int p;
for(p = lastentry; p > 0; p--)
{
if(strcmp(symtable[p].lexptr, s) == 0)
return p;
}
return 0;
}
/* 插入符号表,这里只是简单输出 */
int install_id()
{
int b=0,p=0; int i,scmp,isk=0;
char* pointer=lexeme_beginning;
for(; pointer != (forward + 1); pointer++,b++)
{
if(*pointer == '\x0')
{
if(pointer == &(buffer[SIZE - 1]))
pointer = buffer;
// else
// {
// printf("wrong in install_id\n");
// getch();
// exit(1);
// }
}
lexbuf[b] = *pointer;
if(b >= BSIZE) error("compiler error");
// printf("%c",*pointer);
}
lexbuf[b] = EOS;
for(i = 0; i <lastentry; i++)
{
scmp =strcmp(keywords[i].lexptr, lexbuf);
if(scmp!=0){
isk=1;
// continue;
}
else if(scmp==0) {
isk=2;
break;
}
}
if(isk==1) printf("< ID,%s>\n",lexbuf);
if(isk==2) printf("< KEYWORDS,%s>\n",lexbuf);
p = lookup(lexbuf);
// if(p == 0) p = insert(lexbuf, ID);
tokenval = p;
lexeme_beginning = forward;
return symtable[p].token;
}
/* 用来存放num表项的值,这里只是简单输出 */
void install_num()
{
char* p = lexeme_beginning;
if(forward < lexeme_beginning)
{
printf("forward run too further than lexeme_beginning(in install_num())!");
getch();
exit(1);
}
printf("< NUM ,");
for(; p <= forward; p++)
{
if(*p != '\x0')
{
printf("%c",*p);
}
}
putchar('>');
putchar('\n');
}
/* 当前模式匹配失败,开始下一轮匹配 */
int fail()
{
forward = lexeme_beginning;
if(*forward == '\x0')
{
start = 100;
}
retract(1);
switch(start)
{
case 0: start= 9; break;
case 9: start= 12; break;
case 12: start= 20; break;
case 20: start= 25; break;
case 25: start= 28; break;
case 28:
error("Unknow Symbol!");//compiler error
recover();
start = 0;
break;
default:
printf("The file has been lexed!\n");
getch();
exit(0);
break;
}
return start;
}
/* 得到文件的下一个词素 */
int nexttoken()
{
while(1) {
switch (state)
{
case 0: c = nextchar(); //c is lookahead character
if (isspace(c) || c=='\r' || c== '\n')
{
if(c=='\n') lineno++;
state = 0;
lexeme_beginning++; //advance beginning of lexeme
}
else if (c == '<') state = 1;
else if (c == '=')
{
//state = 5;
printf("< RELOP,EQ>\n");
lexical_value = EQ;
return RELOP;
}
else if (c == '>') state = 6;
else state = fail();
break;
case 1: c = nextchar();
if (c=='=')
{
// state=2;
printf("< RELOP,LE>\n");
lexical_value = LE;
return RELOP;
}
else if(c=='>')
{
// state=3;
printf("< RELOP,NE>\n");
lexical_value = NE;
return RELOP;
}
else
{
// state=4;
retract(1);
printf("< RELOP,LT>\n");
lexical_value = LT;
return RELOP;
}
case 6: c = nextchar();
if(c=='=')
{
// state=7;
printf("< RELOP,GE>\n");
lexical_value = GE;
return GE;
}
else
{
// state=8;
retract(1);
printf("< RELOP,GT>\n");
lexical_value = GT;
return RELOP;
}
break;
case 9: c = nextchar();
if (isalpha(c))
state = 10;
else state = fail();
break;
case 10: c = nextchar();
if (isalpha(c) || isdigit(c))
state = 10;
else state = 11;
break;
case 11:
retract(1);
// lexical_value = install_id();
// return ( gettoken(lexical_value) ); //read token name from st
install_id();
return;
case 12: c=nextchar();
if(isdigit(c))
state=13;
else state=fail();
break;
case 13: c=nextchar();
if(isdigit(c)) state = 13;
else if(c == '.') state = 14;
else if(c == 'E') state=16;
else state=fail();
break;
case 14: c=nextchar();
if(isdigit(c)) state=15;
else state=fail();
break;
case 15: c=nextchar();
if(isdigit(c)) state=15;
else if (c=='E') state=16;
else state=fail();
break;
case 16: c=nextchar();
if(c=='+'||c=='-') state=17;
else if (c=isdigit(c)) state=18;
else state=fail();
break;
case 17: c=nextchar();
if(isdigit(c)) state=18;
else state=fail();
break;
case 18: c=nextchar();
if(isdigit(c)) state=18;
else
{
//state=19;
retract(1);
install_num();
return;
}
break;
case 20: c=nextchar();
if(isdigit(c)) state=21;
else state=fail();
break;
case 21: c=nextchar();
if(isdigit(c)) state=21;
else if (c=='.') state=22;
else state=fail();
break;
case 22: c=nextchar();
if(isdigit(c)) state=23;
else state=fail();
break;
case 23: c=nextchar();
if (isdigit(c)) state=23;
else
{
//state=24;
retract(1);
install_num();
return NUM;
}
break;
case 25: c=nextchar();
if(isdigit(c)) state=26;
else state=fail();
break;
case 26: c=nextchar();
if(isdigit(c)) state=26;
else
{
//state=27
retract(1);
install_num();
return NUM;
}
break;
// case 28: c=nextchar();
default: c=nextchar();
{
printf("< ,%c> ",c);
// state=fail();
error("Unknow Symbol!");
return c;
}
break;
}
}
}
int main()
{
int completed = 0;
char filename[50];
init();
printf("please enter the file name:");
gets(filename);
if((fp = fopen(filename,"r")) == NULL)
{
printf("\nError on open the file!");
getch();
exit(1);
}
/**/
filetobuffer();
// gets(buffer);
printbuffer(); //打印原文件内容
putchar('\n');
forward = NULL;
lexeme_beginning = buffer;
while(!completed)
{
state = 0;
start = 0;
nexttoken();
lexeme_beginning = forward;
lexeme_beginning++;
}
fclose(fp);
free(lexemes);
free(symtable);
free(buffer);
free(lexbuf);
printsymtable();
getch();
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -