⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lexical.h

📁 一个简单的词法分析器
💻 H
字号:
#ifndef LEXICAL_H
#define LEXICAL_H

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

#define NUM_KEYWORD 25
#define MAX_TOKENLENGTH 120

char* readfile(char *path);

char* keywordtable[NUM_KEYWORD] = {"PROGRAM","ID","BEGIN","END","CONST","TRUE","FALSE","INTEGER","REAL",
"BOOLEAN","ARRAY","IF","THEN","ELSE","WHILE","DO","REPEAT","UNTIL",
"FOR","TO","READ","WRITE","AND","OR","NOT"};

int *consttable = NULL;
int count_for_const = 0;
int MAX_INTNUM = 120;

float *realtable = NULL;
int count_for_real = 0;
int MAX_REALNUM = 120;

char **marktable;
int count_for_mark = 0;
int MAX_MARKNUM = 120;

/*
read infile
*/
char *readfile(char *path)
{
	FILE *fp;
	int i = 0;
	int MAX_FILESIZE = 500;
	char *ch;
	char temp;
	if((fp = fopen(path,"r")) == NULL)
	{
		printf("open file %s error.\n",path);
		exit(0);
	}

	ch = (char*)malloc(MAX_FILESIZE);

	while( (temp = fgetc(fp)) != EOF){
		if(i == MAX_FILESIZE) {
			char* tmp = (char *)malloc(MAX_FILESIZE);
			for(int k = 0; k < MAX_FILESIZE; ++k){
				tmp[k] = ch[k];
			}
			MAX_FILESIZE *= 2;
			free(ch);
			ch = (char*)malloc(MAX_FILESIZE);
			for(int j = 0; j < i; ++j){
				ch[j] = tmp[j];
			}
			free(tmp);
		}
		ch[i] = temp;
		++i;
	}

	ch[i] = '\0';
	return ch;
}

bool IsLetter(char i){
	if((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z') || i == '_')
		return true;
	else return false;
}

bool IsDigit(char i){
	if(i >= '0' && i <= '9')
		return true;
	else return false;
}

/*
determine whether the current strToken is a reserved word
if not return -1
else return the index in the keyword table
*/
int Reserve(char *strToken){
	for(int i = 0; i < NUM_KEYWORD; ++i){
		if(strcmp(keywordtable[i], strToken) == 0)
			return i;
	}
	return -1;
}

/*
firstly search in the marktable for strToken,
if found, return offset
else insert strToken into marktable

if number of marks are equal to the size of marktable, do the extention
*/
int InsertID(char* strToken){
	int i;
	
	/*
	initial
	*/
	if(marktable == NULL){
		 marktable = (char**)malloc(MAX_MARKNUM * sizeof(char*));  
		 for(int m = 0; m < MAX_MARKNUM; ++m)   {  
			marktable[m] = (char*)malloc(MAX_TOKENLENGTH);  
		 }   
	}

	for( i = 0; i < count_for_mark; ++i){
		if(strcmp(strToken, marktable[i]) == 0)
			return i;
	}

	for( i = 0; i < (int)strlen(strToken); ++i){
		marktable[count_for_mark][i] = strToken[i];
	}
	
	marktable[count_for_mark][i] = '\0';

	++count_for_mark;


	if(count_for_mark == MAX_MARKNUM){			//do the extend of marktable
		
		char** tmp = (char**)malloc(MAX_MARKNUM * sizeof(char*));  
		for(int m = 0; m < MAX_MARKNUM; ++m)   {  
			tmp[m] = (char*)malloc(MAX_TOKENLENGTH);  
		}

		for(int k = 0; k < count_for_mark; ++k){
			tmp[k] = marktable[k];
		}

		MAX_MARKNUM *= 2;
		free(marktable);

		marktable = (char **)malloc(sizeof(char*) * MAX_MARKNUM);
		for(int m = 0; m < MAX_MARKNUM; ++m)   {  
			marktable[m] = (char*)malloc(MAX_TOKENLENGTH);  
		}

		for(int j = 0; j < count_for_mark; ++j){
			marktable[j] = tmp[j];
		}
		free(tmp);
	}

	return count_for_mark - 1;

}

/*
insert int into consttable,return offset
*/
int InsertInt(char* strToken){
	int length = (int)strlen(strToken);
	int dif = '1' - 1;
	int sum = 0;
	for(int i = 0; i < length; ++i){
		sum += (strToken[length - 1 - i] - dif) * (int)pow(10, (double)i);
	}

	/*
	initial
	*/
	if(consttable == NULL){
		consttable = (int *)malloc(sizeof(int) * MAX_INTNUM);
	}
	consttable[count_for_const] = sum;
	++count_for_const;

	/*
	do the extention
	*/
	if(count_for_const == MAX_INTNUM) {
		int* temp = (int *)malloc(sizeof(int) * MAX_INTNUM);
		for(int i = 0; i < count_for_const; ++i){
			temp[i] = consttable[i];
		}
		MAX_INTNUM *= 2;
		free(consttable);
		consttable = (int *)malloc(sizeof(int) * MAX_INTNUM);
		for(int j = 0; j < count_for_const; ++j){
			consttable[j] = temp[j];
		}
		free(temp);
	}
	return count_for_const - 1;
}

/*
insert real into realtable,return offset
*/
int InsertReal(char* strToken){
	int dif = '1' - 1;
	float sum = 0;
	int i;
	for(i = 0; ; ++i){
		if(strToken[i] == '.') break;
		sum = (strToken[i] - dif) + sum * 10;
	}
	for(int j = i + 1; j < (int)strlen(strToken); ++j){
		sum += (strToken[j] -dif) * (float)pow(10,(double)(i-j));
	}

	/*
	initial
	*/
	if(realtable == NULL){
		realtable = (float *)malloc(sizeof(float) * MAX_REALNUM);
	}
	realtable[count_for_real] = sum;
	++count_for_real;
	
	/*
	do the extention
	*/
	if(count_for_real == MAX_REALNUM) {
		float* temp = (float *)malloc(sizeof(float) * MAX_REALNUM);
		for(int i = 0; i < count_for_real; ++i){
			temp[i] = realtable[i];
		}
		MAX_REALNUM *= 2;
		free(realtable);
		realtable = (float*)malloc(sizeof(float) * MAX_REALNUM);
		for(int j = 0; j < count_for_real; ++j){
			realtable[j] = temp[j];
		}
		free(temp);
	}
	return count_for_real - 1;
}

/*
do Lexical Analysis
*/
void doLexAnalysis(char* string){
	void ProcError(int linenum, char ch);

	int linenum = 1;

	int i = -1, count = 0;

	char ch;
	char *strToken;

	while(i < (int)strlen(string) - 1){
		count = 0;
		strToken = (char*) malloc(MAX_TOKENLENGTH);
		++i;

		while((ch = string[i]) == ' '||(ch = string[i]) == '\t'){
			++i;
		}

		if(IsLetter(ch)){									//keyword or mark
			while(IsLetter(ch = string[i]) || IsDigit(ch = string[i])){
				strToken[count] = string[i];
				++i;
				++count;	
			}
			strToken[count] = '\0';
			--i;

			int reserve = Reserve(strToken);
			if(reserve == -1){
				int value = InsertID(strToken);
				printf("< $ID %s, %d >\n", strToken, value);
			}
			else {
				printf("< $(%d)%s, - >\n", reserve, keywordtable[reserve]);
			}
		}
		else if(IsDigit(ch)){								//int or float
			int flag = 0;
			int letter_error_flag = 0;
			int dot_error_flag = 0;
			while(IsDigit((ch = string[i])) || (ch = string[i]) == '.'){
				strToken[count] = string[i];
				if(string[i] == '.'){					
					if(dot_error_flag == 0){
						flag = 1;
						dot_error_flag = 1;
					}
					else{
						ProcError(linenum, ch);
					}
				}
				++i;
				++count;
				if(IsLetter((ch = string[i]))){				//handling the error like '2R'
					ProcError(linenum, ch);
					++i;
					letter_error_flag = 1;
				}
			}

			--i;
			strToken[count] = '\0';
			int value;
			if(letter_error_flag || dot_error_flag)
				continue;
			else{
				if(flag == 0){
					value = InsertInt(strToken);
					printf("< $INT, %d >\n", value);
				}
				else {
					value = InsertReal(strToken);
					printf("< $REAL, %d >\n", value);
				}
			}

		}
		else if(ch == '=')
			printf("< $EQUAL, - >\n");
		else if(ch == '+'){
			if(string[++i] == '+') 
				printf("< $INC, - >\n");
			else {
				--i;
				printf("< $PLUS, - >\n");
			}
		}
		else if(ch == '-'){
			if(string[++i] == '-')
				printf("< $DEC, - >\n");
			else{
				--i;
				printf("< $MINUS, - >\n");
			}
		}
		else if(ch == '*')
			printf("< $MULTI, - >\n");
		else if(ch == '/')
			printf("< $DIV, ->\n");
		else if(ch == ';')
			printf("< $SEMICOLON, - >\n");
		else if(ch == '(')
			printf("< $LPAR, - >\n");
		else if(ch == ')')
			printf("< $RPAR, - >\n");
		else if(ch == ':'){
			if(string[++i] == '=')
				printf("< $ASSIGN, - >\n");
			else {
				--i;
				printf("< $COLON, - >\n");
			}
		}
		else if(ch == ',')
			printf("< $COMMA, - >\n");
		else if(ch == '[')
			printf("< $LBRACE, - >\n");
		else if(ch == ']')
			printf("< $RBRACE, - >\n");
		else if(ch == '\n')
			++linenum;
		else if(ch == '<'){
			if(string[++i] == '=')
				printf("< $NOTMORE, - >\n");
			else if(string[++i] == '>')
				printf("< $NOTEQUAL, - >\n");
			else {
				--i;
				printf("< $LESS, - >\n");
			}
		}
		else if(ch == '>'){
			if(string[++i] == '=')
				printf("< $NOTLESS, - >\n");
			else{
				--i;
				printf("< $MORE, - >\n");
			}
		}
		else ProcError(linenum, ch);

		free(strToken);
	}
}

void ProcError(int linenum, char ch){
	printf("There is error(s) in LINE %d\n", linenum);
	printf("The error occurs at char '%c'\n", ch);
}

#endif


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -