⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scanner.cpp

📁 一个子集的词法分析器。输入程序
💻 CPP
字号:
//Head files
#include "stdio.h"
#include "math.h"
#include "string.h"
#include "stdlib.h"
#include "ctype.h"

#define  INTEGER		1
#define  REAL			2
#define  IDENTIFIER		3
#define  RELOP			4
#define  ADDOP			5
#define	 MULOP			6
#define	 ASSIGNOP		7
#define	 SSIZE		 1000
#define  CODELEN	   31

//Tables
struct
{
	char lexeme[10];
	int  code;
} CODE[CODELEN]=
{
	//Key words
	"array",	 0,
	"begin",	 1,
	"do",		 2,
	"else",		 3,
	"end",		 4,
	"function",  5,
	"if",		 6,
	"integer",	 7,
	"of",		 8,
	"procedure", 9,
	"program",	10,
	"real",		11,
	"then",		12,
	"var",		13,
	"while",	14,
	//Operator
	"addop",	15,
	"assignop", 16,
	"mulop",	17,
	"not",		18,//Also keyword
	"relop",	19,
	//Identifier
	"id",		20,
	"num",		21,
	//Sign
	":",		22,
	"(",		23,
	")",		24,
	"[",		25,
	"]",		26,
	",",		27,
	";",		28,
	",",		29,
	".",		30
};

char KEYWORD[][10] =
{
	"array",
	"begin",
	"do",
	"else",
	"end",
	"function",
	"if",
	"integer",
	"not",
	"procedure",
	"program",
	"real",
	"then",
	"var",
	"while"
};
//Data structions
struct SYMBOL
{
	union
	{
		struct
		{
			int HEAD;
			int LENG;
		} NAME;
		union
		{
			int intVAL;
			double dblVAL;
		} VAL;
	} CONTENT;
	int KIND;
	int ADDR;
} symbol[500];

struct TOKEN
{
	int CODE;
	int ENTRY;
};

FILE *fpSource, *fpToken, *fpSymbol;
int line, length, end;
char string[SSIZE]={""};

void SORT(char);
void RECOGID(char, TOKEN &);
void HANDLECOM(char, TOKEN &);
void RECOGDIG(char, TOKEN &);
void RECOGDEL(char, TOKEN &);
bool Islegal(char);

bool IsKeyWord(char *);
int  KindNum(char *);
int  LookUp(char *, int);
int  LookUp(int, int);
int  LookUp(double, int);
void PrintToken();

void main(int argc, char *argv[])
{
	char ch;

	//if(argc != 2)
	//{
	//	printf("Usage:scanner source_file\n");
	//	return 1;
	//}
	

	fpSource = fopen("test.txt", "r");
	fpToken = fopen("TOKEN.TXT","w");
	fpSymb = fopen("SYMB.TXT","w");
	fpSet = fopen("SET.TXT","w");

	line = 1;
	length = 0;
	end = 0;

	while(!feof(fpSource))
	{
		ch = getc(fpSource);

		if(ch == ' ')
			continue;
		else if(ch == EOF)
			break;
		else if(ch != '\n')
			SORT(ch);
		else 
			line++;
	}
	
	fprintf(fpSet, "%s", string);

	fclose(fpSource);
	fclose(fpToken);
	fclose(fpSymb);
	fclose(fpSet);	
	PrintToken();
	getchar();
}

void SORT(char ch)
{
	TOKEN token;
	if(isalpha(ch))
		RECOGID(ch, token);
	else if(ch == '{')
		HANDLECOM(ch, token);
	else if(isdigit(ch))
		RECOGDIG(ch, token);
	else
		RECOGDEL(ch, token);

	if(token.CODE != -1)
		fwrite(&token, sizeof(TOKEN), 1, fpToken);
}

void RECOGID(char ch, TOKEN &token)
{
	char WORD[10];
	int index;
	bool legal;

	legal = false;
	index = 0;
	WORD[index++] = ch;

	while(index < 10)
	{
		ch = getc(fpSource);
		if(isalnum(ch))
			WORD[index++] = ch;
		else if(ch == EOF)
			break;
		else if(Islegal(ch))
		{
			printf("ERROR::legal symbol %c in Line %d\n", ch, line);
			token.CODE = -1;
			return;
		}
		
		else 
		{
			fseek(fpSource, -1L, SEEK_CUR);
			break;
		}
	}

	if(index == 10)
	{
		while(true)
		{
			ch = getc(fpSource);
			if(!Islegal(ch))
			{
				fseek(fpSource, -1L, SEEK_CUR);
				break;
			}
		}

		printf("WARNING::Identifier in Line %d is too long\n", line);
		token.CODE = -1;
		return;
	}
	WORD[index] = '\0';

	if(IsKeyWord(WORD))
	{
		token.CODE = KindNum(WORD);
		token.ENTRY = -1;
	}
	else
	{
		token.CODE = KindNum("id");
		token.ENTRY = LookUp(WORD, IDENTIFIER);
	}
}

void HANDLECOM(char ch, TOKEN &token)
{
	while((ch=getc(fpSource)) != '}')
	{
		if(ch == EOF)
			printf("ERROR::Comments in Line %d is not complete\n", line);
	}
	token.CODE = -1;
}

void RECOGDIG(char ch, TOKEN &token)
{
	int intVal;
	double dblVal=0.0;
	bool real;
	int exp;

	intVal = ch-'0';
	exp = 0;

	real = false;
	while(true)
	{
		ch = getc(fpSource);
		if(ch == EOF)
			break;
		if(isdigit(ch) && !real)
			intVal = intVal*10 + (ch-'0');
		else if(isdigit(ch) && real)
		{
			double temp = pow(10.0, exp--);
			dblVal += (double)(ch-'0') * temp;
		}
		else if(ch == '.')
		{
			real = true;
			dblVal = (double)intVal;
			exp--;
		}
		else
		{
			if(exp == -1)
			{
				printf("ERROR::Real number is not complete in Line %d\n", line);
				token.CODE = -1;
				return;
			}
			fseek(fpSource, -1L, SEEK_CUR);
			break;
		}
	}

	token.CODE = KindNum("num");
	real ? token.ENTRY = LookUp(dblVal, REAL) : token.ENTRY = LookUp(intVal, INTEGER);
}

void RECOGDEL(char ch, TOKEN &token)
{
	switch(ch)
	{
	case '=':
		token.CODE = KindNum("relop");
		token.ENTRY = LookUp("=", RELOP);
		break;
	case '<':
		ch = getc(fpSource);
		if(ch == '=')
		{
			token.ENTRY = LookUp("<=", RELOP);
		}
		else if(ch == '>')
			token.ENTRY = LookUp("<>", RELOP);
		else
		{
			token.ENTRY = LookUp("<", RELOP);
			fseek(fpSource, -1L, SEEK_CUR);
		}
		token.CODE = KindNum("relop");
		break;
	case '>':
		ch = getc(fpSource);
		if(ch == '=')
			token.ENTRY = LookUp(">=", RELOP);
		else
		{
			token.ENTRY = LookUp(">", RELOP);
			fseek(fpSource, -1L, SEEK_CUR);
		}
		token.CODE = KindNum("relop");
		break;
	case '+':
		token.CODE = KindNum("addop");
		token.ENTRY = LookUp("+", ADDOP);
		break;
	case '-':
		token.CODE = KindNum("addop");
		token.ENTRY = LookUp("-", ADDOP);
		break;
	case '|':
		token.CODE = KindNum("addop");
		token.ENTRY = LookUp("|", ADDOP);
		break;
	case '*':
		token.CODE = KindNum("mulop");
		token.ENTRY = LookUp("*", MULOP);
		break;
	case '/':
		token.CODE = KindNum("mulop");
		token.ENTRY = LookUp("/", MULOP);;
		break;
	case '%':
		token.CODE = KindNum("mulop");
		token.ENTRY = LookUp("%", MULOP);;
		break;
	case '&':
		token.CODE = KindNum("addop");
		token.ENTRY = LookUp("&", MULOP);;
		break;
	case ':':
		ch = getc(fpSource);
		if(ch == '=')
		{
			token.CODE = KindNum("assignop");
			token.ENTRY = LookUp(":=", ASSIGNOP);
		}
		else
		{
			token.CODE = KindNum(":");
			token.ENTRY = -1;
			fseek(fpSource, -1L, SEEK_CUR);
		}
		break;
	case '(':
		token.CODE = KindNum("(");
		token.ENTRY = -1;
		break;
	case ')':
		token.CODE = KindNum(")");
		token.ENTRY = -1;
		break;
	case '[':
		token.CODE = KindNum("[");
		token.ENTRY = -1;
		break;
	case ']':
		token.CODE = KindNum("]");
		token.ENTRY = -1;
		break;
	case ',':
		token.CODE = KindNum(",");
		token.ENTRY = -1;
		break;
	case ';':
		token.CODE = KindNum(";");
		token.ENTRY = -1;
		break;
	case '.':
		token.CODE = KindNum(".");
		token.ENTRY = -1;
		break;
	default:
		printf("ERROR::legal symbol \'%c\' in Line %d\n", ch, line);
		break;
	}

	
}

int LookUp(char *WORD, int KIND)
{
	int i, j, t, h;
	char str[20];	
	for(i=0; i<length; i++)
	{
		t = symbol[i].CONTENT.NAME.LENG;
		h = symbol[i].CONTENT.NAME.HEAD;
		if(t == (int)strlen(WORD))
		{
			t = symbol[i].CONTENT.NAME.LENG;
			h = symbol[i].CONTENT.NAME.HEAD;		
			for(int j=0; j<t; j++)
				str[j] = string[h+j];
			str[t]='\0';
			if(!strcmp(WORD, str))
				return i;
		}
	}

	strcat(&string[end], WORD);
	for(j=0; j<symbol[i].CONTENT.NAME.LENG; j++)
		string[end+j] = WORD[j];
	symbol[length].CONTENT.NAME.HEAD = end;
	symbol[length].CONTENT.NAME.LENG = strlen(WORD);
	symbol[length].KIND = KIND;
	length++;
	end += strlen(WORD);
	fwrite(&symbol[i], sizeof(SYMBOL), 1,fpSymb);
	return i;	
}

int LookUp(int Val, int KIND)
{
	int i;
	for(i=0; i<length; i++)
		if(symbol[i].KIND == INTEGER)
			if(Val == symbol[i].CONTENT.VAL.intVAL)
				return i;
	symbol[length].CONTENT.VAL.intVAL = Val;
	symbol[length].KIND = KIND;
	length++;
	fwrite(&symbol[i], sizeof(SYMBOL), 1,fpSymb);
	return i;	
}

int LookUp(double Val, int KIND)
{
	int i;
	for(i=0; i<length; i++)
		if(symbol[i].KIND == REAL)
			if(Val-symbol[i].CONTENT.VAL.dblVAL < 0.0000000001 && Val-symbol[i].CONTENT.VAL.dblVAL > -0.0000000001)
				return i;
	symbol[length].CONTENT.VAL.dblVAL = Val;
	symbol[length].KIND = KIND;
	length++;
	fwrite(&symbol[i], sizeof(SYMBOL), 1,fpSymb);
	return i;	
}

int KindNum(char *str)
{
	int i;
	for(i=0; i<CODELEN; i++)
		if(!strcmp(str, CODE[i].lexeme))
			return CODE[i].code;
	return -1;
}

bool IsKeyWord(char *WORD)
{
	int i;
	for(i=0; i<15; i++)
	{
		
		if(!strcmp(WORD, KEYWORD[i]))
		{
			return true;
		}
	}
	return false;
}

bool Islegal(char ch)
{
	switch(ch)
	{
	case '(':
	case ')':
	case '{':
	case ':':
	case ';':
	case ',':
	case '.':
	case '=':
	case '<':
	case '>':
	case '+':
	case '-':
	case '*':
	case '%':
	case '/':
	case ' ':
	case '|':
	case '&':
	case '\n':
		return false;
		break;
	default:
		return true;
		break;
	}
}

void PrintToken()
{
	int t,h,i;
	char str[20];

	fpToken = fopen("TOKEN.TXT","r");
	
    TOKEN token;

	printf("TOKEN 文件:\n");
	for(;!feof(fpToken);)
	{
		fread(&token, sizeof(TOKEN), 1, fpToken);
		if(feof(fpToken))
			break;
		printf("CODE: %2d  \tENTRY: %2d \t %s \n", token.CODE, token.ENTRY, CODE[token.CODE].lexeme);
	}
	printf("\n");

	printf("符号表:\n");
	for(i=0; i<length; i++)
	{
		switch(symbol[i].KIND)
		{
		case INTEGER:
			printf("Value: %-10d\tTYPE: INTEGER\n", symbol[i].CONTENT.VAL.intVAL);
			break;
		case  REAL:
			printf("Value: %-10g\tTYPE: REAL\n", symbol[i].CONTENT.VAL.dblVAL);
			break;
		default:
			t = symbol[i].CONTENT.NAME.LENG;
			h = symbol[i].CONTENT.NAME.HEAD;		
			for(int j=0; j<t; j++)
				str[j] = string[h+j];
			str[t]='\0';
			
			printf("Name : %-10s\tKIND: ", str);
			switch(symbol[i].KIND)
			{
			case IDENTIFIER: printf("IDENTIFIER\n");break;
			case RELOP: printf("RELATION OPERATOR\n");break;
			case ADDOP:	printf("ADDITION OPERATOR\n");break;
			case MULOP: printf("MULTIPLICATION OPERATOR\n");break;
			case ASSIGNOP: printf("ASSIGN OPERATOR\n");break;
			}
			break;
		}
	}
	printf("\n");
	
	rewind(fpSet);
	printf("符号集:\n");
	printf("%s\n",string);
	printf("\n");
	fclose(fpToken);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -