⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scanner.cpp

📁 自己写的JAVA词法分析器
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include "stdio.h"
#include "string.h"
#include "stdlib.h"
#include "ctype.h"

#define HalfSize 512
#define Y 1
#define N 0

class ReadFile {
private:
	FILE *p;
	char FileName[50];
public:
	ReadFile(char[]);
	FILE* Read();
	void Close();
};  // 读入文件类

ReadFile::ReadFile(char name[]) {
	strcpy(FileName, name);
}

FILE* ReadFile::Read() {
	if ((p = fopen(FileName, "r")) == NULL) {
		printf("can not open the file!\n");
		exit(0);
	} 
	else return p;
}

void ReadFile::Close() {
	fclose(p);
}

class Scanner {
private: 
	char Buffer1[HalfSize];  // 缓冲区一
    char Buffer2[HalfSize];  // 缓冲区二
	FILE *fp_read;  // 文件读指针
	FILE *fp_write;  // 文件写指针
	int point_f;  // 向前搜索指针
	int point_b;  // 单词起始位置指针
	char WordCache[512];  // 单词缓冲区
	int word_line;    // 每行单词数
	int line;      // 行数
	int word_sum;  // 文件单词总数
    
public:    
	Scanner(FILE *p);
    void Analysis();
private:
	void DisplayTotal();
	void UntRead(char&);  // 后退一个字符
	char NextChar();  // 下一个字符
	void OutToFile(int);  // 把结果输出到文件
	void CheckKeyWord();  // 检查是否为关键字
	void GetStr();  // 将分析得到的单词存入单词缓冲区
	void Ending(int,char&,int);  // 分析完每个单词后收尾工作
	void DisplayLineInfo();
};  // 词法分析类

Scanner::Scanner(FILE *p) {
	int i;
	fp_read = p;
	fp_write = NULL;
	point_f = -1;
	point_b = 0;
	word_line = 0;
	line = 1;
	word_sum = 0;
	for(i = 0; i < HalfSize; i++) {  //清空缓冲区 
		Buffer1[i] = '\0';
        Buffer2[i] = '\0';
	}
	fread(Buffer1,sizeof(Buffer1),1,fp_read);
}

char Scanner::NextChar() {
	int i;
    if (point_f == HalfSize - 1) {
		for(i = 0; i < HalfSize; i++) {  //清空缓冲区
			Buffer2[i] = '\0';
		}
		if(!feof(fp_read)) {
			fread(Buffer2,sizeof(Buffer2),1,fp_read);
		}
		else return NULL;
		point_f++;
	}
	else if(point_f >= 2 * HalfSize - 1)
	{
		for(i = 0; i < HalfSize; i++) {  //清空缓冲区
			Buffer1[i] = '\0';
		}
		if(!feof(fp_read)) {
			fread(Buffer1,sizeof(Buffer1),1,fp_read);
		}
		else return NULL;
		point_f = 0;
	}
	else {
		point_f++;
	}
    if(point_f < HalfSize) {
		return Buffer1[point_f];
	}
	else {
		return Buffer2[point_f - HalfSize];
	}
}

void Scanner::UntRead(char& char1) {
	if (point_f <= 0) 
		point_f = 2 * HalfSize -1;
	else point_f--;
	if(point_f < HalfSize) {
		char1 = Buffer1[point_f];
	}
	else {
		char1 = Buffer2[point_f - HalfSize];
	}
}

void Scanner::GetStr() {
    int i, j = 0, e;
	for (i = 0; i < 512; i++)
		WordCache[i] = '\0';
    if (point_b > point_f)
		e = point_f + 2 * HalfSize;
	else 
		e = point_f;
	i = point_b;
	while (i <= e) {
		if (i < HalfSize && Buffer1[i] != '\n') {
			WordCache[j] = Buffer1[i];
			j++;
		}
		else if (i < 2 * HalfSize && Buffer2[i-HalfSize] != '\n') {
			WordCache[j] = Buffer2[i - HalfSize];
			j++;
		}
		else if (Buffer2[i-HalfSize] != '\n') {
			WordCache[j] = Buffer1[i - 2 * HalfSize];
			j++;
		}
		i++;
	}
	point_b = point_f + 1;
}


void Scanner::CheckKeyWord() {
    char *KeyWord[] = {"abstract","boolean","break","byte","case","catch","char",
					    "class","const","continue","default","do","double","else","extends",
						"final","finally","float","for","goto","if","implements","import",
						"instanceof","int","interface","long","native","new","null","package",
						"private","protected","public","return","short","static","super","switch",
                        "synchronized","this","throw","throws","transient","try","void","volatile","while"};

	int i,token,flag = 0;
	GetStr();
	for (i = 0 ; i < 48; i++) {
		if (strcmp(WordCache,KeyWord[i]) == 0) {
			token = 0x103;
			flag = 1;
			break;
		}
	}
	if (((strcmp(WordCache,"true") == 0) || (strcmp(WordCache,"false") == 0)) && flag == 0)
		token =  0x105;
	else if (flag ==0) token =  0x104;
	OutToFile(token);
	word_line++;
}

void Scanner::OutToFile(int t) {
    if (fp_write == NULL) {
		if((fp_write = fopen("scanner_output.txt","w")) == NULL) {
		    printf("cannot write to the file!\n");
		    exit(0);
		}
	}
	fprintf(fp_write,"<0x%x,%s> ",t,WordCache);
}

void Scanner::Ending(int token, char &char1, int option) {	
	int i;
	GetStr();
	if (option == 1) {
	    OutToFile(token);
		word_line++;
	for (i = 0; i < 512; i++)
		WordCache[i] = '\0';
	}
	char1 = NextChar();
}

void Scanner::DisplayLineInfo() {
    if (fp_write == NULL) {
		if((fp_write = fopen("scanner_output.txt","w")) == NULL) {
		    printf("cannot write to the file!\n");
		    exit(0);
		}
	}
	fprintf(fp_write,"[Line:%d, Total Word:%d]\n\n",line,word_line);
	word_sum += word_line;
	word_line = 0;
}

void Scanner::Analysis() {
	char char1;
	int state = 0, token, ct;
	char1 = NextChar();
	while (char1 != NULL)
	{
		switch(state) {
	    case 0:
		    if (isalpha(char1) || char1=='$' || char1=='_') {
			    state = 1;
		    	break;
			}  // 标志符,关键字
		    else if (char1 == '*') {
			    state = 3;
				break;
			}  // *,*=
			else if (char1 == '>') {
				state = 6;
				break;
			}  // >,>=,>>,>>=,>>>,>>>=
			else if (char1 == '%') {
				state = 15;
				break;
			}  // %,%=
			else if (char1 == '<') {
				state = 18;
				break;
			}  // <,<=,<<,<<=
			else if (char1 == '~') {
				state = 24;
				break;
			}  // ~
			else if (char1 == '|') {
				state = 25;
				break;
			}  // |,||,|=
			else if (char1 == '&') {
				state = 29;
				break;
			}  // &,&&,&=
			else if (char1 == '^') {
				state = 33;
				break;
			}  // ^,^=
			else if (char1 == '=') {
				state = 36;
				break;
			}  // =,==
			else if (char1 == '!') {
				state = 39;
				break;
			}  // !,!=
			else if (char1 == '?' || char1 == ':') {
				state = 41;
				break;
			}  // ?:
			else if (char1 == '/') {
				state = 43;
				break;
			}  // /,/=,//注释,/*注释
			else if (char1 == ',') {
				state = 51;
				break;
			}  // ,
			else if (char1 == ';') {
				state = 52;
				break;
			}  // ;
			else if (char1 == '{' || char1 == '}') {
				state = 53;
				break;
			}  // {,}
			else if (char1 == '[' || char1 == ']' || char1 == '(' || char1 == ')') {
				state = 54;
				break;
			}  // [,],(,)
			else if (char1 == '"') {
				state = 55;
				break;
			}  // 字符串
			else if (char1 == '\'') {
				state = 57;
				break;
			}  //字符
			else if (char1 == '+') {
				state = 60;
				break;
			}  //+,++,+=
			else if (char1 == '-') {
				state = 64;
				break;
			}  //-,--,-=
			else if (char1 == '0') {
				state = 68;
				break;
			}  // 整数,浮点数
			else if (char1 >= '1' && char1 <= '9') {
				state = 75;
				break;
			}  // 整数,浮点数
			else if (char1 == '.') {
				state = 83;
				break;
			}  // .,整数,浮点数
			else if (char1 == '\n') {
				if (word_line != 0)
				    DisplayLineInfo();
				line++;
				Ending(0,char1,0);
				break;
			}  // 换行符
			else if (char1 == ' ' || char1 == '\t') {
				state = 0;
				Ending(0,char1,0);
				break;
			}  // 空格/\t
			else {
				state = 100;
				break;
			}  // 错误
		case 1:
			while (isalnum(char1) || char1 == '_' || char1 == '$')
				char1 = NextChar();
			state = 2;
			UntRead(char1);
			break;
		case 2:      // 0x
			state = 0;
			CheckKeyWord();
			char1 = NextChar();
			break;  // 关键字,标志符
		case 3:
			char1 = NextChar();
			if (char1 == '=')
				state = 5;
			else  {
				state = 4;
				UntRead(char1);
			}
			break;
        case 4: case 16: case 44:                  //0x11b
			state = 0;
			token = 0x11b;
			Ending(token,char1,1);
			break; // *,%,/
		case 5: case 11: case 14: case 17: case 23: case 28: case 32: case 35: case 37: case 45: case 63: case 67:   //0x110
			state = 0;
			token = 0x110;
			Ending(token,char1,1);
			break; // *=,>>=,>>>=,%=,<<=,|=,&=,^=,=
		case 6:
			char1 = NextChar();
			if (char1 == '=')
				state = 8;
			else if (char1 == '>')
				state = 9;
            else {
				state = 7;
				UntRead(char1);
			}
			break;
		case 7: case 8: case 19:case 20:            //0x118
			state = 0;
			token = 0x118;
			Ending(token,char1,1);
			break; // >,>=,<,<=
		case 9:
			char1 = NextChar();
            if (char1 == '=') 
				state = 11;
			else if (char1 == '>')
				state = 12;
			else {
				state = 10;
				UntRead(char1);
			}
			break;
		case 10: case 13: case 22:                //0x119
			state = 0;
			token = 0x119;
			Ending(token,char1,1);
			break; // >>,>>>,<<
		case 12:
			char1 = NextChar();
			if (char1 == '=')
				state = 14;
			else {
				state = 13;
				UntRead(char1);
			}
			break;
		case 15:
			char1 = NextChar();
			if (char1 == '=')
				state = 17;
			else {
				state = 16;
				UntRead(char1);
			}
			break;
		case 18:
			char1 = NextChar();
			if (char1 == '=')
				state = 20;
			else if (char1 == '<')
				state = 21;
			else {
				state = 19;
				UntRead(char1);
			}
			break;
		case 21:
			char1 = NextChar();
			if (char1 == '=')
				state = 23;
			else {
				state = 22;
				UntRead(char1);
			}
			break;
		case 24: case 62: case 66: case 92:               //0x11c

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -