⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 word_analyze.cpp

📁 一个自己写的编译原理的词法分析程序
💻 CPP
字号:
#include <stdio.h> 
#include <iostream>
#include <string> 
#include <ctype.h> 
#include <malloc.h> 
#include <conio.h> 
#include <stdlib.h>

using namespace std;
using std::string;

/*判断是否是字母,是则返回 1,否则返回 0 */
int IsLetter(char ch) 
{ 
	if(isalpha(ch))
		return 1; 
	else
		return 0;
} 

/*判断是否为数字,是则返回 1,否则返回 0 */ 
int IsDigit(char ch) 
{
	if(isalnum(ch))
		return 1; 
	else
		return 0; 
} 
 
/*判断是否为各种空白符,是则返回 1,否则返回 0*/ 
int IsSpace(char ch) 
{
	if(isspace(ch))
		return 1; 
	else
		return 0; 
} 

void get_the_char(FILE *fp,char *ch) 
{
	*ch = fgetc(fp); 
} 

/*如果是空白则继续读下一个,直到不是空白*/
void getnext(FILE *fp,char *ch) 
{
	get_the_char(fp,ch);
	while(IsSpace(*ch)&&(*ch != EOF))
	{
		get_the_char(fp,ch);
	}
	/*do { 
		get_the_char(fp,ch); 
	}while(IsSpace(*ch)&&(*ch != EOF)); 
	*/
} 

/*光标回退一位,并使 ch 为空*/ 
void Retract(FILE *fp,char *ch) 
{
	fseek(fp,-1,1); 
    *ch = ' ';
} 

 
char* return_code(char *strToken) 
{
	//返回关键字
	if(strcmp(strToken,"int") == 0) return "101"; 
    if(strcmp(strToken,"if") == 0) return "102"; 
    if(strcmp(strToken,"else") == 0) return "103"; 
    if(strcmp(strToken,"while") == 0) return "104"; 
    if(strcmp(strToken,"for") == 0) return "105"; 
    if(strcmp(strToken,"true") == 0) return "106"; 
	if(strcmp(strToken,"false") == 0) return "107"; 
	if(strcmp(strToken,"break") == 0) return "108"; 
	if(strcmp(strToken,"return") == 0) return "109"; 

	//返回运算符
	if(strcmp(strToken,"&&") == 0) return "201"; 
    if(strcmp(strToken,"||") == 0) return "202"; 
    if(strcmp(strToken,"+") == 0) return "203"; 
    if(strcmp(strToken,"-") == 0) return "204"; 
    if(strcmp(strToken,"*") == 0) return "205"; 
    if(strcmp(strToken,"/") == 0) return "206"; 
    if(strcmp(strToken,"%") == 0) return "207"; 
	if(strcmp(strToken,"=") == 0) return "208";
	if(strcmp(strToken,"==") == 0) return "209"; 
    if(strcmp(strToken,"<") == 0) return "210"; 
    if(strcmp(strToken,">") == 0) return "211"; 
	if(strcmp(strToken,"<=") == 0) return "212"; 
	if(strcmp(strToken,">=") == 0) return "213"; 
    if(strcmp(strToken,"!=") == 0) return "214"; 
	

	//返回符号
    if(strcmp(strToken,";") == 0) return "301"; 
    if(strcmp(strToken,"\"") == 0) return "302"; 
    if(strcmp(strToken,"\"") == 0) return "303"; 
    if(strcmp(strToken,"/*") == 0) return "304"; 
	if(strcmp(strToken,"") == 0)   return "305"; 
    if(strcmp(strToken,":") == 0) return "306"; 
	if(strcmp(strToken,"(") == 0) return "307";
	if(strcmp(strToken,")") == 0) return "308"; 
	if(strcmp(strToken,"{") == 0) return "309";
	if(strcmp(strToken,"}") == 0) return "310"; 

	//id--0  constant--1
	return "0"; 
}



/*将ch中的字符连接到strToken后面*/ 
void Concat(char *strToken, char *ch) 
{
	int i; 
    for(i=0;i<80;i++) 
	{ 
     if(*strToken == NULL) 
	 { 
       *strToken = *ch; 
        break; 
	 } 
     strToken++;
	} 
} 

/*词法分析*/ 
int lexSubFunc(FILE *fp1,FILE *fp2) 
{
	char ch;
	char *code;

    int i; 
    char strToken[80]; 
    while(1) 
	{ 
		getnext(fp1,&ch); 
		for(i=0;i<80;i++) strToken[i]=NULL;
		if(ch == EOF) return 0; //to the end of infile
	
		if (IsLetter(ch))
		{ 
			while (IsLetter(ch) || IsDigit(ch)) { 
				Concat(strToken,&ch); 
				get_the_char(fp1,&ch);
			}
			Retract(fp1,&ch); 

            code = return_code(strToken); 

			//id
            if (strcmp(code,"0")==0) { 
				cout<<"(0,"<<strToken<<")"<<endl;
                fputs("(0,",fp2);
				fputs(strToken,fp2); 
				fputs(")\n",fp2); 
			} 

			//keyword
			else { 
                cout<<"("<<code<<","<<strToken<<")"<<endl;
				fputs("(",fp2); 
				fputs(code,fp2); 
				fputs(",",fp2);
				fputs(strToken,fp2);
				fputs(")\n",fp2); 

			} 
		} 

		//digit
		else if (IsDigit(ch))
		{ 
			while (IsDigit(ch)) {
				Concat(strToken,&ch); 
				get_the_char(fp1,&ch); 
			} 
			Retract(fp1,&ch); 
			cout<<"(1,"<<strToken<<")"<<endl;
			fputs("(1,",fp2); fputs(strToken,fp2); fputs(")\n",fp2); 
		} 
		else 
		{
			Concat(strToken,&ch); 
			get_the_char(fp1,&ch);

			while(!IsSpace(ch) && !IsDigit(ch) && !IsLetter(ch) )
			{
				Concat(strToken,&ch); 
		    	get_the_char(fp1,&ch);

			}

			Retract(fp1,&ch); 
            code = return_code(strToken); 
	      
			cout<<"("<<code<<","<<strToken<<")"<<endl;
			fputs("(",fp2);

	    	fputs(code,fp2);
			fputs(",",fp2);
			fputs(strToken,fp2); 
			fputs(")\n",fp2); 

			if(ch == EOF) return 0;
			continue;
			}
	}
}

void main() 
{
	FILE *fp1,*fp2; 

	if((fp1=fopen("d:\\cpp\\word\\in.txt","rb"))==NULL) 
	{ 
     	cout<<"Can't open in.txt! Press any key to exit."<<endl; 
        getch(); 
        exit(1); 
	}
	
	if((fp2=fopen("d:\\cpp\\word\\out.txt","wt+"))==NULL) 
	{ 
    	cout<<"Cannot create out.txt! Press any key to exit."<<endl; 
        getch(); 
    	exit(1); 
	} 
	
	cout<<"Notes:\n"
    <<"\ttype\t\t word\t\t typecode\n"
	<<"\tid\t\t  \t\t 0\n"
	<<"\tdigit\t\t  \t\t 1\n"
	<<"\tkeyword\t\t int \t\t 101\n"
	<<"\tkeyword\t\t if \t\t 102\n"
	<<"\tkeyword\t\t else \t\t 103\n"
	<<"\tkeyword\t\t while \t\t 104\n"
	<<"\tkeyword\t\t for \t\t 105\n"
	<<"\tkeyword\t\t true \t\t 106\n"
	<<"\tkeyword\t\t false \t\t 107\n"
	<<"\tkeyword\t\t break \t\t 108\n"
	<<"\tkeyword\t\t return \t\t109\n\n"

	<<"\toperator\t\t && \t\t 201\n"
	<<"\toperator\t\t || \t\t 202\n"
	<<"\toperator\t\t + \t\t 203\n"
	<<"\toperator\t\t - \t\t 204\n"
	<<"\toperator\t\t * \t\t 205\n"
	<<"\toperator\t\t / \t\t 206\n"
	<<"\toperator\t\t % \t\t 207\n"
	<<"\toperator\t\t = \t\t 208\n"
	<<"\toperator\t\t == \t\t 209\n"
	<<"\toperator\t\t < \t\t 210\n"
	<<"\toperator\t\t > \t\t 211\n"
	<<"\toperator\t\t <= \t\t 212\n"
	<<"\toperator\t\t >= \t\t 213\n"
	<<"\toperator\t\t != \t\t 214\n\n"

	<<"\t符号\t\t ; \t\t 301\n"
	<<"\t符号\t\t \" \t\t 302\n"
	<<"\t符号\t\t \' \t\t 303\n"
	<<"\t符号\t\t /* \t\t 304\n"
	<<"\t符号\t\t */ \t\t 305\n"
	<<"\t符号\t\t : \t\t 306\n"
	<<"\t符号\t\t ( \t\t 307\n"
	<<"\t符号\t\t ) \t\t 308\n"
	<<"\t符号\t\t { \t\t 309\n"
	<<"\t符号\t\t } \t\t 310\n"<<endl;

	lexSubFunc(fp1,fp2); 
    fclose(fp1); 
    fclose(fp2); 
} 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -