📄 lex.cpp
字号:
#include "Lex.h"
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <math.h>
#include <conio.h>
//用于记录标志符的结构
typedef struct
{
char *idptr; //标志符指针
int idval; //标志符值
}Identifier ;
STATUS start = 1;//
STATUS state = 1;//chushi
const int BUFSIZE = 6000; //输入缓冲区的大小
const int MAX_IDNUM = 600; //最多允许的标志符数(不重复计数)
const int MAX_LENOF_WORD = 256; //单个标志符或者字符串的最大长度
const int MAX_LENOF_IDTABLE = 2000; //所有标志符(不重复计数)的字符总长度
char InputBuffer[BUFSIZE] ; //输入缓冲区数组
struct Identifier IdTable[MAX_IDNUM];
char WordBuffer[MAX_LENOF_WORD]; //用于暂存一个最小词法单元(词素)
char IdString[MAX_LENOF_IDTABLE] ; //用于存储所有标志符(包括关键字,不重复计数)的流式字符
FILE *Srcfp; //指向C语言源文件
FILE *Lexfp; //指向经词法分析后的主目标文件
FILE *Idfp; //指向记录所有标志符的文件(不重复)
FILE *Errfp; //指向出错记录文件
int lineno = 0 ; //用于记录源代码中的行数
int lastid = 0; //当前分析出来的最后一个标志计数
int lastchar = -1;
char c;
char *token_beginning; //词素的开始指针
char *forward; //向前指针
//关键字表
Identifier keywords[] =
{
{"auto", AUTO},{"break",BREAK},{"case",CASE},{"char",CHAR},{"const",CONST},
{"continue",CONTINUE},{"default",DEFAULT},{"do",DO},{"double",DOUBLE},{"else",ELSE},
{"enum",ENUM},{"extern",EXTERN},{"float",FLOAT},{"for",FOR},{"goto",GOTO},
{"if",IF},{"int",INT},{"long",LONG},{"register",REGISTER},{"return",RETURN},
{"short",SHORT},{"signed",SIGNED},{"sizeof",SIZEOF},{"static",STATIC},{"struct",STRUCT},
{"switch",SWITCH},{"typedef",TYPEDEF},{"union",UNION},{"unsigned",UNSIGNED},{"void",VOID},
{"volatile",VOLATILE},{"while",WHILE},{"#include",INCLUDE},{"none",NONE}
};
/* 打印符号表 */
void OutputIdTable()
{
int index;
fprintf(Idfp,"The IdTable is:\n");
fprintf(Idfp,"idptr\tidval");
fprintf(Idfp,"\n-----\t-----");
for(index = 1;IdTable[index].idptr != NULL;index++ )
{
fprintf(Idfp,"\n%s\t%d",IdTable[index].idptr,IdTable[index].idval);
}
}
/* 将标识符插入符号表中 */
int insert(char s[], int tok)
{
int len;
len = strlen(s);
if(lastid + 1 >= MAX_IDNUM)
Error("identifier table is full");
if(lastchar + len + 1 >= MAX_LENOF_IDTABLE)
Error("IdString array full");
lastid++;
IdTable[lastid].idval = tok;
IdTable[lastid].idptr = &IdString[lastchar + 1];
lastchar = lastchar + len + 1;
strcpy(IdTable[lastid].idptr, s);
return lastid;
}
/* 查找biaozhi符号表中是否已记录s标识符 */
int lookup(char s[])
{
int p;
for(p = lastid; p > 0; p--)
{
if(strcmp(IdTable[p].idptr, s) == 0)
return p;
}
return 0;
}
bool InitIdTable() //将关键字表的所有关键字初始化到标志符数组中
{
struct Identifier *p;
for(p = keywords; p->idval != NONE; p++)
{
insert(p->idptr, p->idval);
}
return true;
}
/* 试图修复程序错误 */
bool Recover()
{
c = NextChar();
while(c != '\n')
{
c = NextChar();
if(c == '\x0')
{
return false;
}
}
lineno++;
token_beginning = forward + 1;
state = 1;
return true;
}
/* 填充缓冲区 */
void FillWholeBuffer()
{
fread(InputBuffer,BUFSIZE/2-1,1,Srcfp);//装入前半段缓冲区
InputBuffer[BUFSIZE/2 - 1] = '\x0';//
fread(&InputBuffer[BUFSIZE/2],BUFSIZE/2-1,1,Srcfp); //
InputBuffer[BUFSIZE - 1] = '\x0';
}
/* 插入符号表,这里只是简单输出 */
int install_id()
{
int b = 0,p = 0;
char* pointer = token_beginning;
fprintf(Lexfp,"< ID ,");
for(; pointer != (forward + 1); pointer++,b++)
{
if(*pointer == '\x0')
{
if(pointer == &(InputBuffer[BUFSIZE/2 - 1]))
pointer++;
else if(pointer == &(InputBuffer[BUFSIZE - 1]))
pointer = InputBuffer;
else
{
printf("wrong in install_id");
getch();
exit(1);
}
}
WordBuffer[b] = *pointer;
if(b >= MAX_LENOF_WORD)
Error("compiler error: identifier too long");
fprintf(Lexfp,"%c",*pointer);
}
WordBuffer[b] = EOS;
p = lookup(WordBuffer);
if(p == 0)
p = insert(WordBuffer, ID);
fprintf(Lexfp,">\r\n");
token_beginning = forward;
return IdTable[p].idval;
}
/* 用来存放num表项的值,这里只是简单输出 */
void install_num()
{
char* p = token_beginning;
fprintf(Lexfp,"< NUM ,");
for(; p != forward+1; p++)
{
if(*p != '\x0')
{
fprintf(Lexfp,"%c",*p);
}
}
fprintf(Lexfp,">\n");
}
/* 记录字符常量 */
void install_char()
{
char* p = token_beginning;
fprintf(Lexfp,"< CHAR ,");
for(; p != forward+1; p++)
{
if(*p != '\x0')
{
fprintf(Lexfp,"%c",*p);
}
}
fprintf(Lexfp,">\n");
}
/* 记录字符串常量 */
void install_string()
{
char* p = token_beginning;
fprintf(Lexfp,"< STRING ,");
for(; p != forward+1; p++)
{
if(*p != '\x0')
{
fprintf(Lexfp,"%c",*p);
}
}
fprintf(Lexfp,">\n");
}
/* 回退amount个位置 */
bool Retract(int amount)
{
if(*forward != '\x0')
{
forward -= amount;
}
return true;
}
/* 当前模式匹配失败,开始下一轮匹配 */
int fail()
{
forward = token_beginning;
if(*forward == '\x0')
{
start = 1000; //yes
}
Retract(1);
switch(start)
{
case 0: start = 1; break;
case 1: start = 4; break;
case 4: start = 7; break;
case 7: start = 15; break;
case 15: start = 20; break;
case 20: start = 42; break;
case 42: start = 70; break;
case 70: start = 90; break;
case 90: start = 110; break;
case 110: start = 120; break;
case 120:
Error("Unknown symbol ");
Recover(); start = 1;break;
case 1000:
printf("The file has been lexed!\n");
OutputIdTable();
fclose(Srcfp);
fclose(Errfp);
fclose(Idfp);
fclose(Lexfp);
getch();
exit(0);
default:
getch();
exit(1);
}
return start;
}
/* 返回超前扫描字符 */
char NextChar()
{
if(forward == NULL) /* 刚开始扫描 */
{
forward = InputBuffer;
}
else
{
forward++;
}
if(*forward == '\x0')
{
if( forward == &(InputBuffer[BUFSIZE/2 - 1]) )
{
fread(&(InputBuffer[BUFSIZE/2]),1,BUFSIZE/2 - 1,Srcfp);
InputBuffer[BUFSIZE - 1] = '\x0';
forward++;
return *forward;
}
else if( forward == &(InputBuffer[BUFSIZE - 1]) )
{
fread(InputBuffer,1,BUFSIZE/2-1,Srcfp);
InputBuffer[BUFSIZE/2 - 1] = '\x0';
forward = InputBuffer;
return *forward;
}
else
{//此处可能存在漏洞
/*start = 1000;*/ /* 标示文件已经处理完了 */
return '\x0';
}
}
return *forward;
}
void Error(char *reason) //处理词法分析中的错误
{
fprintf(Errfp,"line %d: %s\n", lineno, reason);
return;
}
/* 得到文件的下一个词素 */
int NextToken()
{
while(1)
{
switch(state)
{
case 1:
c = NextChar();
if(isalpha(c) || (c == '_') || (c == '#'))
state = 2;
else state = fail();
break;
case 2:
c = NextChar();
if(isalpha(c) || isdigit(c) || (c == '_'))
state = 2;
else if(c == '.')
state = 140; //额外加的,所以状态号无规律
else
{
state = 3;
}
break;
case 3:
Retract(1);
install_id();
return ID;
case 140:
c = NextChar();
if(isalpha(c))
state = 141;
break;
case 141:
c = NextChar();
if(isalpha(c))
state = 141;
else
{
//state = 142
Retract(1);
install_id();
return ID;
}
case 4:
c = NextChar();
if(isspace(c))
{
if(c == '\n')
lineno++;
state = 5;
token_beginning++;
}
else if(c == '/')
{
state = 60;
}
else if(c == ';' || c == ',' || c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c == ']')
{
state = 64;
}
else
state = fail();
break;
case 5:
c = NextChar();
if(isspace(c))
{
if(c == '\n')
lineno++;
state = 5;
token_beginning++;
}
else
{
state = 6;
}
break;
case 6:
Retract(1);
return BLANK;
case 60:
c = NextChar();
if(c == '*')
{
state = 61;
}
else
{
state = fail();
}
break;
case 61:
c = NextChar();
if(c != '*')
{
state = 61;
token_beginning++;
}
else
{
state = 62;
}
break;
case 62:
c = NextChar();
if(c == '/')
{
return COMMENT;
}
else
{
/* state = fail();*/
state = 61;
}
break;
case 64:
fprintf(Lexfp,"<%c, >\n",*forward);
return BOUND;
case 7:
c = NextChar();
if(isdigit(c))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -