📄 scan.cpp
字号:
/*******************************************************
scan.cpp
武汉大学国际软件学院软件工程05级7班
崔灿
200532580235
2007-9-21
********************************************************/
#include"scan.h"
#include"globals.h"
#include<ctype.h>
#include<string.h>
#include <windows.h>
//bool scan_err;
CHAR chBuf[1024];
char temp[MAXTOKENLEN+1];
DWORD dwRead, dwWritten;
BOOL fSuccess;
//定义缓冲区的大小
#define BUFLEN 256
//定义缓冲区
//缓冲区里面每次存一行语句
//如果语句长度超过缓冲区,则会出错
static char lineBuf[BUFLEN];
//将要读取的一个字符在缓冲区的位置
static int linepos = 0;
//缓冲区中的语句的长度
static int bufSize = 0;
int i=0;
//返回缓冲区里面的下一个字符
//如果缓冲区读完,则读取下一行,并打印下一行
//如果文件读完了则返回EOF
static char getNextChar(){
if(linepos<bufSize){
return lineBuf[linepos++];
}
else{
lineno++;
if(fgets(lineBuf,BUFLEN,source)!=NULL){
//fprintf(listing,"%4d: %s",lineno,lineBuf);
ZeroMemory(chBuf,strlen(chBuf));
sprintf(chBuf,"%4d: %s",lineno,lineBuf);
//for (;;)
//{
// Write to standard output.
if(scan_only)
fSuccess = WriteFile(hStdout, chBuf, strlen(chBuf), &dwWritten, NULL);
//fprintf(listing,"发送 %s \n",chBuf);
//fflush(listing);
//if (! fSuccess)
// break;
// Read from standard input.
/*if(!solely&&scan_only)
fSuccess = ReadFile(hStdin, chBuf, 1024, &dwRead, NULL); */
//fprintf(listing,"收到 %s \n",chBuf);
//fflush(listing);
//if (! fSuccess || dwRead == 0)
// break;
//if (dwRead != 0)
// break;
//}
bufSize = strlen(lineBuf);
if(lineBuf[bufSize-1]!='\n'){
fprintf(listing,"\n");
}
linepos = 0;
return lineBuf[linepos++];
}
else
return EOF;
}
}
//判断一个输入字符是否为合法的输入字符
static int isLegalChar(char c){
int i=0;
if(isalpha(c)||isdigit(c)){
i = 1;
}
else if(c=='+'||c=='-'||c=='*'||c=='/'||c=='<'||c=='>'||c=='='||c=='('||c==')'||c=='['||c==']'||c=='{'||c=='}'||c==';'||c==' '||c=='\n'||c==','){
i = 1;
}
return i;
}
//判断一个字符是否为分隔符
static int isSep(char c){
if(c=='+'||c=='-'||c=='*'||c=='/'||c=='<'||c=='>'||c=='='||c=='('||c==')'||c=='['||c==']'||c=='{'||c=='}'||c==';'||c==' '||c=='\n'||c==',')
return 1;
else return 0;
}
//判断一个标识符是否是关键字,如果是,返回关键字类型,如果不是,返回_ID
static _TokenType get_TokenType(char *c){
if(!strcmp(c,"if")) return _IF;
else if(!strcmp(c,"else")) return _ELSE;
else if(!strcmp(c,"while")) return _WHILE;
else if(!strcmp(c,"read")) return _READ;
else if(!strcmp(c,"write")) return _WRITE;
else if(!strcmp(c,"int")) return _INT;
else if(!strcmp(c,"real")) return _REAL;
else if(!strcmp(c,"return")) return _RETURN;
else return _ID;
}
static _TokenType getSingleChar_TokenType(char c){
if(c=='(') return _LSPAREN;
else if(c==')') return _RSPAREN;
else if(c=='[') return _LMPAREN;
else if(c==']') return _RMPAREN;
else if(c=='{') return _LBPAREN;
else if(c=='}') return _RBPAREN;
else if(c=='+') return _PLUS;
else if(c=='-') return _MINUS;
else if(c=='*') return _MUL;
else if(c==';') return _SEMI;
else if(c==',') return _COMMA;
else if(c=='>') return _BT;
else return _ERROR;
}
//把读取的最后一个字符放回缓冲区中
static void unGetNextChar(){
linepos--;
}
//打印标识符
void printToken(TokenRecord tr){
ZeroMemory(chBuf,strlen(chBuf));
strcpy(temp,tr.tv.c_str());
switch(tr.tp){
case _IF:
case _ELSE:
case _WHILE:
case _READ:
case _WRITE:
case _INT:
case _REAL:
case _RETURN:
//fprintf(listing,"reserver word: %s\n",temp);
sprintf(chBuf,"reserver word: %s\n",temp);
break;
case _ASSIGN:
//fprintf(listing,"=\n");
sprintf(chBuf,"=\n");
break;
case _LT:
//fprintf(listing,"<\n");
sprintf(chBuf,"=\n");
break;
case _EQ:
//fprintf(listing,"==\n");
sprintf(chBuf,"==\n");
break;
case _NE:
//fprintf(listing,"<>\n");
sprintf(chBuf,"<>\n");
break;
case _PLUS:
//fprintf(listing,"+\n");
sprintf(chBuf,"+\n");
break;
case _MINUS:
//fprintf(listing,"-\n");
sprintf(chBuf,"-\n");
break;
case _MUL:
//fprintf(listing,"*\n");
sprintf(chBuf,"*\n");
break;
case _DIV:
//fprintf(listing,"/\n");
sprintf(chBuf,"/\n");
break;
case _ID:
//fprintf(listing,"_ID, name=%s\n",temp);
sprintf(chBuf,"_ID, name=%s\n",temp);
break;
case _INUM:
//fprintf(listing,"_NUM, val=%s\n",temp);
sprintf(chBuf,"_INUM, val=%s\n",temp);
break;
case _RNUM:
//fprintf(listing,"_NUM, val=%s\n",temp);
sprintf(chBuf,"_RNUM, val=%s\n",temp);
break;
case _SEMI:
//fprintf(listing,";\n");
sprintf(chBuf,";\n");
break;
case _ERROR:
//fprintf(listing,"_ERROR: lawless token %s\n",temp);
sprintf(chBuf,"_ERROR: lawless token %s\n",temp);
break;
case _LSPAREN:
//fprintf(listing,"(\n");
sprintf(chBuf,"(\n");
break;
case _RSPAREN:
//fprintf(listing,")\n");
sprintf(chBuf,")\n");
break;
case _LMPAREN:
//fprintf(listing,"[\n");
sprintf(chBuf,"[\n");
break;
case _RMPAREN:
//fprintf(listing,"]\n");
sprintf(chBuf,"]\n");
break;
case _LBPAREN:
//fprintf(listing,"{\n");
sprintf(chBuf,"{\n");
break;
case _RBPAREN:
//fprintf(listing,"}\n");
sprintf(chBuf,"}\n");
break;
case _ENDFILE:
sprintf(chBuf,"EOF\n");
break;
case _COMMA:
sprintf(chBuf,",\n");
default:
break;
}
//for (;;)
//{
// Write to standard output.
fSuccess = WriteFile(hStdout, chBuf, strlen(chBuf), &dwWritten, NULL);
//fprintf(listing,"发送 %s \n",chBuf);
//fflush(listing);
//fprintf(listing,"%d",i++);
//fprintf(listing,"%s",out);
//fflush(listing);
//out[strlen(out)]='\0';
//if (! fSuccess)
// break;
// Read from standard input.
/*if(!solely)
fSuccess = ReadFile(hStdin, chBuf, 1024, &dwRead, NULL);*/
//fprintf(listing,"收到 %s \n",chBuf);
//fflush(listing);
//fprintf(listing,"%d\n",i++);
//fflush(listing);
//if (! fSuccess || dwRead == 0)
// break;
//if (dwRead != 0)
// break;
//}
}
//返回下一个标识符
TokenRecord getToken(){
//当前的记录
TokenRecord currentRecord;
//保存当前的标识符的值
//
char tokenString[MAXTOKENLEN+1];
//tokenString 里面下一个插入的位置
int index=0;
//state 自动机的状态
//1 开始状态
//2 识别标识符 _ID
//3 识别数字 _INT
//4 识别数字 _REAL
//5 识别数字 _REAL 4和5一起保证小数点后面有数字
//6 识别/ _DIV
//7 8 识别注释
//9 识别= _EQ
//10 识别== _ASSIGN
//12 识别< _LT, <> _NE
//13 错误 _ERROR
int state = 1;
while(1){
char c;
if(state!=10){
c = getNextChar();
}
switch(state){
//开始状态
case 1:
//如果读到文件尾 返回 ENDGFILE
if(c==-1){
currentRecord.tp = _ENDFILE;
state = 10;
}
//如果读到 空格,制表,换行 忽略
else if(c==' '||c=='\t'||c=='\n');
//如果读到字符串,判断为_ID,转向状态 2
else if(isalpha(c)){
tokenString[index++] = c;
state = 2;
}
//如果读到数字,判断为_NUM 转向状态3
else if(isdigit(c)){
tokenString[index++] = c;
state = 3;
}
//如果读到 /,为_DIV或者索引,转向状态6
else if(c=='/'){
tokenString[index++] = c;
state = 6;
}
//如果读到< 为小于或者不等于,转向状态12
else if(c=='<'){
tokenString[index++] = c;
state = 12;
}
//如果读到 =,为赋值或者相等,转向状态9
else if(c=='='){
tokenString[index++] = c;
state = 9;
}
//如果读到()[]{}+-*>;则可以判断已经读取完毕一个token,转向状态10
else if(c=='('||c==')'||c=='+'||c=='-'||c=='*'||c=='{'||c=='}'||c=='['||c==']'||c=='>'||c==';'||c==','){
currentRecord.tp = getSingleChar_TokenType(c);
tokenString[index++] = c;
state = 10;
}
//如果读到其他的非法符号,则为出错,转向状态13
else{
tokenString[index++] = c;
state = 13;
}
break;
//判别为标识符的状态
case 2:
//读取数字或者字母,继续读取下一个字符
if(isalpha(c)||isdigit(c)) tokenString[index++] = c;
//读取分隔符,则该token已经读完,转向状态10
else if(isSep(c)){
unGetNextChar();
tokenString[index]='\0';
currentRecord.tp = get_TokenType(tokenString);
state = 10;
}
//读到文件结尾,转向状态10
else if(c==-1){
tokenString[index]='\0';
currentRecord.tp = get_TokenType(tokenString);
state = 10;
}
//读到其他字符,出错,转向状态13
else{
tokenString[index++] = c;
state = 13;
}
break;
//判别为数字
case 3:
//读取一个数字,继续读取
if(isdigit(c)){
tokenString[index++] = c;
}
//读取一个小数点,匹配实数,转向状态4
else if(c=='.'){
tokenString[index++] = c;
state = 4;
}
//读取一个分隔符,认为该数字已读完,转向状态10
else if(isSep(c)){
unGetNextChar();
currentRecord.tp = _INUM;
state = 10;
}
//读到文件尾,则该数字以读完,转向状态10
else if(c==-1){
currentRecord.tp = _INUM;
state = 10;
}
//读取其他字符,出错,转向状态13
else{
tokenString[index++] = c;
state = 13;
}
break;
//处理实数的状态
case 4:
//读取一个数字,则小数点后面有数,转向状态5
if(isdigit(c)){
tokenString[index++] = c;
state = 5;
}
//读取其他字符,出错,转向状态13
else{
tokenString[index++] = c;
state = 13;
}
break;
//处理实数状态
case 5:
//读取一个数字,继续读取
if(isdigit(c)){
tokenString[index++] = c;
}
//读取一个分隔符,该数字读取完毕,转向状态10
else if(isSep(c)){
unGetNextChar();
currentRecord.tp = _RNUM;
state = 10;
}
//读取其他字符,出错
else{
tokenString[index++] = c;
state = 13;
}
break;
//处理注释
case 6:
if(c=='*'){
state = 7;
}
else if(isLegalChar(c)){
unGetNextChar();
currentRecord.tp = _DIV;
state = 10;
}
else{
tokenString[index++] = c;
state = 13;
}
break;
//结束注释
case 7:
if(c=='*'){
//unGetNextChar();
state = 8;
}
else;
break;
//结束注释
case 8:
if(c=='/'){
index = 0;
state = 1;
}
else if(c=='*'){
state = 8;
}
else{
state = 7;
}
break;
//
case 9:
//2个=,相等
if(c=='='){
currentRecord.tp = _EQ;
state = 10;
}
//赋值
else if(isLegalChar(c)){
unGetNextChar();
currentRecord.tp = _ASSIGN;
state = 10;
}
//输入非法字符
else{
tokenString[index++] = c;
state = 13;
}
break;
//完成,返回得到的token
case 10:
tokenString[index++]='\0';
currentRecord.tv.assign(tokenString);
//strcpy(currentRecord.tv.c_str(),tokenString);
//currentRecord.tv = tokenString;
currentRecord.lineno = lineno;
return currentRecord;
break;
//处理<>
case 12:
//如果输入〉则为不等于,结束
if(c=='>'){
currentRecord.tp = _NE;
state = 10;
}
//输入其他字符,为小于,结束
else if(isLegalChar(c)){
unGetNextChar();
currentRecord.tp = _LT;
state = 10;
}
//输入非法字符,错误,跳转到状态13
else {
tokenString[index++] = c;
state = 13;
}
break;
//错误处理
case 13:
//输入分隔符,查找下一个token
if(isSep(c)){
//unGetNextChar();
currentRecord.tp = _ERROR;
state = 10;
}
//文件结束,完成
else if(c==-1){
currentRecord.tp = _ENDFILE;
state = 10;
}
//无法识别,继续错误处理
else{
tokenString[index++] = c;
}
scan_err = true;
break;
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -