📄 lexan.c
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "util.h"
#include "lexan.h"
#include "errmsg.h"
# define BUFFER_SIZE 1024
FILE *fp;
/*定义缓冲区*/
char buffer[BUFFER_SIZE*2];
/*初始化缓冲区*/
void initBuffer(){
buffer[BUFFER_SIZE-1]=EOF;
buffer[BUFFER_SIZE*2-1]=EOF;
}
/*装载缓存 双缓存形式*/
void LoadBuffer(int n){
int i,end;
char c;
i=(n==1)?0:BUFFER_SIZE;/*选择装载缓冲段*/
end=(n==1)?(BUFFER_SIZE-1):(BUFFER_SIZE*2-1);
for(;i<end;i++){
c=fgetc(fp);
if(c==EOF)
buffer[i]=EOF;
else
buffer[i]=c;
if(buffer[i]==EOF) break;
}
}
/*定义字符表*/
struct SYMBOL symtable[MAX_SYMBOL_TABLE];
int lastSymbol=0;
int tokenValue; /*存放字符表项索引*/
int charPos=0; /*字符位置(第几个字符)*/
int tokPos =0; /*提取记号的开始位置(开始字符是文件中第几个字符)*/
char lexbuf[TOKEN_SIZE]; /*记号缓冲*/
int state=0,start=0; /*当前状态,开始状态*/
int forward=-1,begin=0; /*缓冲区双指针*/
/*记号列表*/
char* toknames[] = {
"ID", "LITERAL", "NUM", "COMMA", "COLON", "SEMICOLON", "LPAREN",
"RPAREN", "LBRACK", "RBRACK", "LBRACE", "RBRACE", "DOT", "PLUS",
"MINUS", "TIMES", "DIVIDE", "EQ", "NEQ", "LT", "LE", "GT", "GE",
"AND", "OR", "ASSIGN", "MOD","PROGRAM","PROCEDURE","FUNCTION","BEGIN","END","IF", "THEN", "ELSE", "WHILE", "FOR",
"TO", "DOWNTO","DO", "VAR", "TYPE","ARRAY","CONST",NULL
};
/*返回记号字符串形势*/
char* tokname(tok) {
return tok<257 || tok>300 ? "BAD_TOKEN" : toknames[tok-257];
}
/*判断是否是关键字,是返回索引,否则-1*/
int isKeyword(char *s){
int i;
for(i=0;;i++){
if(toknames[i]==NULL) break;
if(stricmp(toknames[i],s)==0){
return i;
}
}
return -1;
}
/*判断字符表中是否存在该项*/
int isExist(int tokType,char *s){
int p;
for(p=lastSymbol;p>0;p--)
if(stricmp(symtable[p].tokPtr,s)==0&&(symtable[p].token==tokType))
return p;
return 0;
}
/*从缓冲区取得记号到lexbuf[]*/
void getLexbuf(){
int i,j=0;
for(i = begin; i <= forward; i ++){
if(buffer[i]!=EOF)
lexbuf[j++] = buffer[i];
}
lexbuf[j]='\0';
}
/*插入记号到符号表*/
int insertSymbol(int token,char *s){
char *p=(char*)checked_malloc(strlen(s)+1);
if(lastSymbol+1>=MAX_SYMBOL_TABLE)
error(0,"SYSBOL TABLE FULL");
strcpy(p,s);
lastSymbol++; /*从1开始*/
symtable[lastSymbol].token=token;
symtable[lastSymbol].tokPtr=p;
return lastSymbol;
}
/*装载记号到符号表,若已存在,则返回索引*/
int installSymbol(tokType){
int p;
getLexbuf();
p=isExist(ID,lexbuf);
if(p==0)
p=insertSymbol(tokType,lexbuf);
return p;
}
/*同上装载字符串常量,去处常量两边的单引号*/
int installLiteral(){
int p;
begin++;
forward--;
p=installSymbol(LITERAL);
begin--;
forward++;
return p;
}
/*在缓冲区取得下一个字符*/
char nextchar(){
forward++;
charPos++;
if(buffer[forward]==EOF){ /*切换缓冲区*/
if(forward==BUFFER_SIZE-1){
LoadBuffer(2);
forward++;
}
else if(forward==BUFFER_SIZE*2-1){
LoadBuffer(1);
forward=0;
}
else{
return EOF;}
}
return buffer[forward]; /*返回缓冲区字符*/
}
/*回退一个字符*/
void back(){
forward--;
charPos--;
}
/*调整tokPos,设置为当前记号的首位置*/
void adjustPos(){
tokPos=charPos-(forward-begin);
};
/*返回匹配失败到达的下一个状态*/
int fail(){
charPos=charPos-(forward-begin)-1;
forward=begin-1; /*因为在nextchar()里forward要先加一*/
switch(start){
case 0: start=9;break;
case 9: start=12;break;
case 12:start=20;break;
case 20:start=25;break;
case 25:start=28;break;
case 28:start=45;break;
case 45:break;
default:
error(0,"ERROR");
}
return start;
}
/*由于双缓冲的原因,每段缓冲区末尾设置为EOF(在初始化函数reset()中完成),
这样的话forward读到末尾需要自动+1或置零来切换缓冲区,同时begin也会遇到这种情况
所以遇到单个字符识别完成后,用begin=forward+1来替换begin++完成指针向下读取动作,
以防止读到末尾EOF。
*/
/*取得下一个记号*/
int nexttoken(){
char c;
state=0;
while(1){
switch(state){
case 0:
start=0; /*设置开始状态*/
c=nextchar();
if(c==' '||c=='\t')
state=0,begin=forward+1;
else if(c=='\n')
state=0,adjustPos(),newLine(),begin=forward+1;
else if(c=='<') state=1;
else if(c=='=') state=5;
else if(c=='>') state=6;
else state=fail();
break;
case 1:c=nextchar(); /*case 2*/
if(c=='=') {
adjustPos();
begin=forward+1;
return LE;
}else if(c=='>'){
adjustPos();
begin=forward+1; /*case 3*/
return NEQ;
}else{ /*case 4*/
back();
adjustPos();
begin=forward+1;
return LT;
}
case 5:adjustPos();
begin=forward+1;
return EQ;
case 6:c=nextchar();
if(c=='='){
adjustPos();
begin=forward+1; /*case 7*/
return GE;
}else{ /*case 8*/
back();
adjustPos();
begin=forward+1;
return GT;
}
case 9:start=9;
c=nextchar();
if(isalpha(c)) state=10;
else state=fail();
break;
case 10:c=nextchar();
if(isalpha(c))state=10;
else if(isdigit(c)) state=10;
else state=11;
break;
case 11:
{ /*为了适应VC编译C源文件加括号*/
int p;
back();
getLexbuf();
if((p=isKeyword(lexbuf))!=-1){
adjustPos();
begin=forward+1;
return (p+257);
}else{
tokenValue=installSymbol(ID);
adjustPos();
begin=forward+1;
return ID;
}
}
case 12:start=12; /*开始匹配无符号数 最长匹配 科学技术法*/
c=nextchar();
if(isdigit(c)){
state=13;
}else{
state=fail();
}
break;
case 13:
c = nextchar();
if(isdigit(c))
state = 13;
else if(c == 'e'||c == 'E')
state = 16;
else if(c == '.')
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -