📄 scanner.cpp
字号:
#include "stdio.h"
#include "string.h"
#include "stdlib.h"
#include "ctype.h"
#define HalfSize 512
#define Y 1
#define N 0
class ReadFile {
private:
FILE *p;
char FileName[50];
public:
ReadFile(char[]);
FILE* Read();
void Close();
}; // 读入文件类
ReadFile::ReadFile(char name[]) {
strcpy(FileName, name);
}
FILE* ReadFile::Read() {
if ((p = fopen(FileName, "r")) == NULL) {
printf("can not open the file!\n");
exit(0);
}
else return p;
}
void ReadFile::Close() {
fclose(p);
}
class Scanner {
private:
char Buffer1[HalfSize]; // 缓冲区一
char Buffer2[HalfSize]; // 缓冲区二
FILE *fp_read; // 文件读指针
FILE *fp_write; // 文件写指针
int point_f; // 向前搜索指针
int point_b; // 单词起始位置指针
char WordCache[512]; // 单词缓冲区
int word_line; // 每行单词数
int line; // 行数
int word_sum; // 文件单词总数
public:
Scanner(FILE *p);
void Analysis();
private:
void DisplayTotal();
void UntRead(char&); // 后退一个字符
char NextChar(); // 下一个字符
void OutToFile(int); // 把结果输出到文件
void CheckKeyWord(); // 检查是否为关键字
void GetStr(); // 将分析得到的单词存入单词缓冲区
void Ending(int,char&,int); // 分析完每个单词后收尾工作
void DisplayLineInfo();
}; // 词法分析类
Scanner::Scanner(FILE *p) {
int i;
fp_read = p;
fp_write = NULL;
point_f = -1;
point_b = 0;
word_line = 0;
line = 1;
word_sum = 0;
for(i = 0; i < HalfSize; i++) { //清空缓冲区
Buffer1[i] = '\0';
Buffer2[i] = '\0';
}
fread(Buffer1,sizeof(Buffer1),1,fp_read);
}
char Scanner::NextChar() {
int i;
if (point_f == HalfSize - 1) {
for(i = 0; i < HalfSize; i++) { //清空缓冲区
Buffer2[i] = '\0';
}
if(!feof(fp_read)) {
fread(Buffer2,sizeof(Buffer2),1,fp_read);
}
else return NULL;
point_f++;
}
else if(point_f >= 2 * HalfSize - 1)
{
for(i = 0; i < HalfSize; i++) { //清空缓冲区
Buffer1[i] = '\0';
}
if(!feof(fp_read)) {
fread(Buffer1,sizeof(Buffer1),1,fp_read);
}
else return NULL;
point_f = 0;
}
else {
point_f++;
}
if(point_f < HalfSize) {
return Buffer1[point_f];
}
else {
return Buffer2[point_f - HalfSize];
}
}
void Scanner::UntRead(char& char1) {
if (point_f <= 0)
point_f = 2 * HalfSize -1;
else point_f--;
if(point_f < HalfSize) {
char1 = Buffer1[point_f];
}
else {
char1 = Buffer2[point_f - HalfSize];
}
}
void Scanner::GetStr() {
int i, j = 0, e;
for (i = 0; i < 512; i++)
WordCache[i] = '\0';
if (point_b > point_f)
e = point_f + 2 * HalfSize;
else
e = point_f;
i = point_b;
while (i <= e) {
if (i < HalfSize && Buffer1[i] != '\n') {
WordCache[j] = Buffer1[i];
j++;
}
else if (i < 2 * HalfSize && Buffer2[i-HalfSize] != '\n') {
WordCache[j] = Buffer2[i - HalfSize];
j++;
}
else if (Buffer2[i-HalfSize] != '\n') {
WordCache[j] = Buffer1[i - 2 * HalfSize];
j++;
}
i++;
}
point_b = point_f + 1;
}
void Scanner::CheckKeyWord() {
char *KeyWord[] = {"abstract","boolean","break","byte","case","catch","char",
"class","const","continue","default","do","double","else","extends",
"final","finally","float","for","goto","if","implements","import",
"instanceof","int","interface","long","native","new","null","package",
"private","protected","public","return","short","static","super","switch",
"synchronized","this","throw","throws","transient","try","void","volatile","while"};
int i,token,flag = 0;
GetStr();
for (i = 0 ; i < 48; i++) {
if (strcmp(WordCache,KeyWord[i]) == 0) {
token = 0x103;
flag = 1;
break;
}
}
if (((strcmp(WordCache,"true") == 0) || (strcmp(WordCache,"false") == 0)) && flag == 0)
token = 0x105;
else if (flag ==0) token = 0x104;
OutToFile(token);
word_line++;
}
void Scanner::OutToFile(int t) {
if (fp_write == NULL) {
if((fp_write = fopen("scanner_output.txt","w")) == NULL) {
printf("cannot write to the file!\n");
exit(0);
}
}
fprintf(fp_write,"<0x%x,%s> ",t,WordCache);
}
void Scanner::Ending(int token, char &char1, int option) {
int i;
GetStr();
if (option == 1) {
OutToFile(token);
word_line++;
for (i = 0; i < 512; i++)
WordCache[i] = '\0';
}
char1 = NextChar();
}
void Scanner::DisplayLineInfo() {
if (fp_write == NULL) {
if((fp_write = fopen("scanner_output.txt","w")) == NULL) {
printf("cannot write to the file!\n");
exit(0);
}
}
fprintf(fp_write,"[Line:%d, Total Word:%d]\n\n",line,word_line);
word_sum += word_line;
word_line = 0;
}
void Scanner::Analysis() {
char char1;
int state = 0, token, ct;
char1 = NextChar();
while (char1 != NULL)
{
switch(state) {
case 0:
if (isalpha(char1) || char1=='$' || char1=='_') {
state = 1;
break;
} // 标志符,关键字
else if (char1 == '*') {
state = 3;
break;
} // *,*=
else if (char1 == '>') {
state = 6;
break;
} // >,>=,>>,>>=,>>>,>>>=
else if (char1 == '%') {
state = 15;
break;
} // %,%=
else if (char1 == '<') {
state = 18;
break;
} // <,<=,<<,<<=
else if (char1 == '~') {
state = 24;
break;
} // ~
else if (char1 == '|') {
state = 25;
break;
} // |,||,|=
else if (char1 == '&') {
state = 29;
break;
} // &,&&,&=
else if (char1 == '^') {
state = 33;
break;
} // ^,^=
else if (char1 == '=') {
state = 36;
break;
} // =,==
else if (char1 == '!') {
state = 39;
break;
} // !,!=
else if (char1 == '?' || char1 == ':') {
state = 41;
break;
} // ?:
else if (char1 == '/') {
state = 43;
break;
} // /,/=,//注释,/*注释
else if (char1 == ',') {
state = 51;
break;
} // ,
else if (char1 == ';') {
state = 52;
break;
} // ;
else if (char1 == '{' || char1 == '}') {
state = 53;
break;
} // {,}
else if (char1 == '[' || char1 == ']' || char1 == '(' || char1 == ')') {
state = 54;
break;
} // [,],(,)
else if (char1 == '"') {
state = 55;
break;
} // 字符串
else if (char1 == '\'') {
state = 57;
break;
} //字符
else if (char1 == '+') {
state = 60;
break;
} //+,++,+=
else if (char1 == '-') {
state = 64;
break;
} //-,--,-=
else if (char1 == '0') {
state = 68;
break;
} // 整数,浮点数
else if (char1 >= '1' && char1 <= '9') {
state = 75;
break;
} // 整数,浮点数
else if (char1 == '.') {
state = 83;
break;
} // .,整数,浮点数
else if (char1 == '\n') {
if (word_line != 0)
DisplayLineInfo();
line++;
Ending(0,char1,0);
break;
} // 换行符
else if (char1 == ' ' || char1 == '\t') {
state = 0;
Ending(0,char1,0);
break;
} // 空格/\t
else {
state = 100;
break;
} // 错误
case 1:
while (isalnum(char1) || char1 == '_' || char1 == '$')
char1 = NextChar();
state = 2;
UntRead(char1);
break;
case 2: // 0x
state = 0;
CheckKeyWord();
char1 = NextChar();
break; // 关键字,标志符
case 3:
char1 = NextChar();
if (char1 == '=')
state = 5;
else {
state = 4;
UntRead(char1);
}
break;
case 4: case 16: case 44: //0x11b
state = 0;
token = 0x11b;
Ending(token,char1,1);
break; // *,%,/
case 5: case 11: case 14: case 17: case 23: case 28: case 32: case 35: case 37: case 45: case 63: case 67: //0x110
state = 0;
token = 0x110;
Ending(token,char1,1);
break; // *=,>>=,>>>=,%=,<<=,|=,&=,^=,=
case 6:
char1 = NextChar();
if (char1 == '=')
state = 8;
else if (char1 == '>')
state = 9;
else {
state = 7;
UntRead(char1);
}
break;
case 7: case 8: case 19:case 20: //0x118
state = 0;
token = 0x118;
Ending(token,char1,1);
break; // >,>=,<,<=
case 9:
char1 = NextChar();
if (char1 == '=')
state = 11;
else if (char1 == '>')
state = 12;
else {
state = 10;
UntRead(char1);
}
break;
case 10: case 13: case 22: //0x119
state = 0;
token = 0x119;
Ending(token,char1,1);
break; // >>,>>>,<<
case 12:
char1 = NextChar();
if (char1 == '=')
state = 14;
else {
state = 13;
UntRead(char1);
}
break;
case 15:
char1 = NextChar();
if (char1 == '=')
state = 17;
else {
state = 16;
UntRead(char1);
}
break;
case 18:
char1 = NextChar();
if (char1 == '=')
state = 20;
else if (char1 == '<')
state = 21;
else {
state = 19;
UntRead(char1);
}
break;
case 21:
char1 = NextChar();
if (char1 == '=')
state = 23;
else {
state = 22;
UntRead(char1);
}
break;
case 24: case 62: case 66: case 92: //0x11c
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -