📄 kongge.txt

📁 编译原理课程实验
💻 TXT
字号:
#include<stdio.h>
#include<string.h>
#include<ctype.h>
#include<stdlib.h>
//定义缓冲区大小
#define  BUFFERLEN  2048
//定义缓冲区索引
int  front=0,rear=0;
//定义缓冲区并初始化
char buffer[BUFFERLEN]={0};
//全局变量指示行号
int  line=1;
//filename: 要解析的源文件名
FILE * fin;
//临时数组,用于存储标识符或关键字
char id[100];
//临时数组索引
int  dex=0;
//每行单词计数器
int  count=0;
//临时字符，用于存储字符
char achar;
//指示是否到达文件尾的标志
int  reach_end=0;
int  num_value=0;
//符号结构定义
struct token{
	int   lineno;//行号
	int   position;//在该行中的位置
	int   wordkey;//符号类型,用十六进制表示
    char* value;
	struct token* next_token;
}token;
//变量表
struct token* token_list_head=NULL;
//关键字表
char* keyword[]={
	"abstract",
	"boolean",
	"break",
	"byte",
	"case",
	"catch",
	"char",
	"class",
	"const",
	"continue",
	"default",
	"do",
	"double",
	"else",
	"extends",
	"false",
	"final",
	"finally",
	"float",
	"for",
	"goto",
	"if",
	"implements",
	"import",
	"instanceof",
	"int",
	"interface",
	"long",
	"native",
	"new",
	"null",
	"package",
	"private",
	"protect",
	"public",
	"return",
	"short",
	"static",
	"super",
	"switch",
	"synchronized",
	"this",
	"throw",
	"throws",
	"transient",
	"true",
	"try",
	"void",
	"volatile",
	"while"
};
//将源文件装入缓冲区,并且移动文件指针到新的位置;
//判断是否到达文件末尾，如果到达则置reach_end=1，否则置reach_end=0;
void load(){
	int num;//实际读取字节数；
	if(rear==1024){
		num=fread(buffer+1024,sizeof(char),1023,fin);
		rear=1025;
	}
	else{
		num=fread(buffer,sizeof(char),1023,fin);
		rear=0;
	}
	if(num<1023){
		fclose(fin);
		reach_end=1;
	}
	else{
		fseek(fin,1023L,1);
	}
}
//从缓冲区获取一个字符
char token_getch(){
	char ch;
	ch=buffer[rear++];
	//如果遇到缓冲区结尾，装入另一半缓冲区
	if(ch==EOF){
		load();
	}
	return ch;
}

//在关键字表中进行查找,若照到则返回1，否则返回0
int lookup_keyword(char* word){
	int i;
	for(i=0;i<49;i++){
		if(strcmp(word,keyword[i])==0)
			return 0;
	}
	if(i==49)
		return 1;
}
//向变量表中插入一个token结构，并装入各字段内容
void insert(int value_size,int attri,const char* value){
	
	struct token* token_pointer;
    token_pointer=(struct token*)malloc(sizeof(token));
	
	token_pointer->lineno=line;
	token_pointer->wordkey=attri;
    token_pointer->value=(char*)malloc(value_size*sizeof(char));
	strcpy(token_pointer->value,value);
	token_pointer->next_token=token_list_head;
	token_list_head=token_pointer;
}

//获取一个符号
void get_token(){
	int i=0;
    char ch;
	
	ch=token_getch();
	if(ch=='\n'){
		front=rear;
		line++;
	}
	//如果是空格，只是将其跳过，并将所有空格用一个属性字表示
    else if(isspace(ch)){
	    do{
			if(ch=='\n'){
        		line++;
			}
			ch=token_getch();

		} while(isspace(ch));

		rear--;
		front=rear;
    	insert(2,0x102," ");
    }
	//判断关键字和标识符及布尔值
	else if(isalpha(ch)||ch=='_'||ch=='$'){
	    do{
			id[dex++]=ch;
			ch=token_getch();


		}while(isalnum(ch)||ch=='_'||ch=='$');
	
		//在关键字表中查找
		if(lookup_keyword(id)){
			insert(dex,0x104,id);
		}
		else{
			if(strcmp(id,"true")==0 || strcmp(id,"false")==0){
				insert(dex,0x105,id);
			}
			else{
				insert(dex,0x103,id);
		    }
		}

		rear--;
		front=rear;
		//将临时变量存储数组索引置零
		dex=0;
		//将临时变量存储数组清零
		memset(id,0,100);
	}
	//注释处理
	else if(ch=='/'){
		ch=token_getch();
	    if(ch=='/'){
			do{ 
		        ch=token_getch();
				id[dex++]=ch;
			}while(ch!='\n');
			line++;
	
		}
		else if(ch=='*'){
			do{ 
		        ch=token_getch();
				id[dex++]=ch;
			}while( !(ch=='*'&& (ch=token_getch())=='/'));
		}
		else if(ch=='='){
			insert(3,0x110,"/=");
		}
		else{
			insert(2,0x11b,"/");
		}
		front=rear;
     
		insert(dex,0x101,id);
		dex=0;
		memset(id,0,100);
	}
	//字符串处理
	else if(ch=='"'){
		while((ch=token_getch())!='"'){
			if(ch=='\\'){
				ch=token_getch();
				if(ch=='\'')
                    ch='\'';
				else if(ch=='r')
				    ch='\r';
			    else if(ch=='f')	
				    ch='\f';
		        else if(ch=='t')
				    ch='\t';
		        else if(ch=='b')
			        ch='\b';
			    else if(ch=='n')
				    ch='\n';
			    else if(ch=='t')
				    ch='\t';
			    else if(ch='\\')
				    ch='\\';
			    else if(ch=='"')
				    ch='"';
			    else if(ch=='\'')
				    ch='\'';
			    else if((ch-'0')>=0 && (ch-'7')<=0){
				    while((ch-'0')>=0 && (ch-'7')<=0){
					    num_value=num_value*8;
						num_value+=(ch-'0');
						ch=token_getch();
					}
					if(num_value>128){
					    printf("%d: too big for character\n",line);
					}
					else{
						ch=num_value;
					}
					num_value=0;
				}
			    else if(ch='u'){
					ch=token_getch();
				    while(ch=='0'||ch=='1'||ch=='2'||ch=='3'||ch=='4'||ch=='5'||ch=='6'||ch=='7'||ch=='8'||ch=='9'||ch=='A'||ch=='a'||ch=='B'||ch=='b'||ch=='C'||ch=='c'||ch=='D'||ch=='d'||ch=='E'||ch=='e'||ch=='F'||ch=='f'){
					    num_value=num_value*16;
						if(ch=='0'||ch=='1'||ch=='2'||ch=='3'||ch=='4'||ch=='5'||ch=='6'||ch=='7'||ch=='8'||ch=='9'){
							num_value+=(id[i]-'0');
						}
						else if(ch=='A'||ch=='B'||ch=='C'||ch=='D'||ch=='E'||ch=='F'){
							num_value+=(id[i]-55);
						}
						else if(ch=='a'||ch=='b'||ch=='c'||ch=='d'||ch=='e'||ch=='f'){
							num_value+=(id[i]-55);
						}
						ch=token_getch();
					}
					if(num_value>128){
					    printf("%d: too big for character\n",line);
					}
					else{
						ch=num_value;
					}
					num_value=0;
				}
			}
			id[dex++]=ch;
			if(dex==100){
				printf("%d: string too long\n",line);
			}
		}
		front=rear;
		
		insert(dex,0x106,id);
		//将临时变量存储数组索引置零
		dex=0;
		//将临时变量存储数组清零
		memset(id,0,100);
	}
	//处理单个字符 
	else if(ch=='\''){
		ch=token_getch();
		if(ch=='\\'){
			ch=token_getch();
			if(ch=='\'')
                ch='\'';
			else if(ch=='r')
				ch='\r';
			else if(ch=='f')	
				ch='\f';
		    else if(ch=='t')
				ch='\t';
		    else if(ch=='b')
			    ch='\b';
			else if(ch=='n')
				ch='\n';
			else if(ch=='t')
				ch='\t';
			else if(ch='\\')
				ch='\\';
			else if(ch=='"')
				ch='"';
			else if(ch=='\'')
				ch='\'';
			else if((ch-'0')>=0 && (ch-'7')<=0){
				while((ch-'0')>=0 && (ch-'7')<=0){
					num_value=num_value*8;
					num_value+=(ch-'0');
					ch=token_getch();
				}
				if(num_value>128){
				    printf("%d: too big for character\n",line);
				}
				else{
					ch=num_value;
				}
				num_value=0;
			}
		    else if(ch='u'){
				ch=token_getch();
		        while(ch=='0'||ch=='1'||ch=='2'||ch=='3'||ch=='4'||ch=='5'||ch=='6'||ch=='7'||ch=='8'||ch=='9'||ch=='A'||ch=='a'||ch=='B'||ch=='b'||ch=='C'||ch=='c'||ch=='D'||ch=='d'||ch=='E'||ch=='e'||ch=='F'||ch=='f'){
			        num_value=num_value*16;
					if(ch=='0'||ch=='1'||ch=='2'||ch=='3'||ch=='4'||ch=='5'||ch=='6'||ch=='7'||ch=='8'||ch=='9'){
						num_value+=(id[i]-'0');
					}
					else if(ch=='A'||ch=='B'||ch=='C'||ch=='D'||ch=='E'||ch=='F'){
						num_value+=(id[i]-55);
					}
					else if(ch=='a'||ch=='b'||ch=='c'||ch=='d'||ch=='e'||ch=='f'){
						num_value+=(id[i]-55);
					}
					ch=token_getch();
				}
				if(num_value>128){
				    printf("%d: too big for character\n",line);
				}
				else{
					ch=num_value;
				}
				num_value=0;
			}
		}
	    if((ch=token_getch())!='\'')
			printf("%d: ''' expected\n");
		front=rear;
		insert(2,0x106,&ch);
	}
	//处理分界符及部分运算符 
	else if(ch==','){
        front=rear;
		insert(2,0x120,",");
	}
	else if(ch=='{'){
		front=rear;
		insert(2,0x121,"{");
	}
	else if(ch=='}'){
		front=rear;
		insert(2,0x121,"}");
	}
	else if(ch==';'){
		front=rear;
		insert(2,0x122,";");
	}
	else if(ch=='['){
		front=rear;
		insert(2,0x11d,"[");
	}
	else if(ch==']'){
		front=rear;
		insert(2,0x11d,"]");
	}
	else if(ch=='('){
		front=rear;
		insert(2,0x11d,"(");
	}
	else if(ch==')'){
		front=rear;
		insert(2,0x11d,")");
	}
	else if(ch=='.'){
		ch=token_getch();
		if(!isdigit(ch)){
			rear--;
            front=rear;
		    insert(2,0x11d,".");
		}
		else{
			do{
				id[dex++]=ch;
				ch=token_getch();
			}while(isdigit(ch));
			insert(dex,0x108,id);
			rear--;
			front=rear;
			dex=0;
			memset(id,0,100);
		}
	}
	else if(ch=='='){
		ch=token_getch();
		if(ch=='='){
			insert(3,0x117,"==");
		}
		else{
			rear--;
			insert(2,0x110,"=");
		}
		front=rear;
	}
	else if(ch=='+'){
		ch=token_getch();
		if(ch=='+'){
			insert(3,0x11c,"++");
		}
		else if(ch=='='){
			insert(3,0x110,"+=");
		}
		else{ 
			rear--;
			while(buffer[front--]!=' ');
			front++;
			if( isalpha(buffer[front])||isdigit(buffer[front]) ){
				insert(3,0x11a,"-");
			}
			else{
				insert(3,0x11a,"-");
			}
		}
		front=rear;
	}
	else if(ch=='-'){
		ch=token_getch();
		if(ch=='-'){
			insert(3,0x11c,"--");
		}
		else if(ch=='='){
			insert(3,0x110,"-=");
		}
		else{ 
			rear--;
			while(buffer[front--]!=' ');
			front++;
			if( isalpha(buffer[front])||isdigit(buffer[front]) ){
				insert(3,0x11a,"-");
			}
			else{
				insert(3,0x11a,"-");
			}
		}
		front=rear;
	}
	else if(ch=='*'){
		ch=token_getch();
		if(ch=='='){

			insert(3,0x110,"*=");
		}
		else{
			rear--;
			insert(2,0x11b,"*");
		}
		front=rear;
	}
	else if(ch=='%'){
		ch=token_getch();
		if(ch=='='){
			insert(3,0x110,"%=");
		}
		else{
			rear--;
			insert(2,0x11b,"%");
		}
		front=rear;
	}
	else if(ch=='&'){
		ch=token_getch();
		if(ch=='&'){
			insert(3,0x113,"&&");
		}
		else if(ch=='='){
			insert(3,0x110,"&=");
		}
		else{
			rear--;
			insert(1,0x116,"&");
		}
		front=rear;
	}
	else if(ch=='^'){
		ch=token_getch();
		if(ch=='='){
			insert(3,0x110,"^=");
		}
		else{
			rear--;
			insert(2,0x115,"^");
		}
		front=rear;
	}
	else if(ch=='|'){
		ch=token_getch();
		if(ch=='|'){
			insert(3,0x112,"||");
		}
		else if(ch=='='){
			insert(3,0x110,"|=");
		}
		else{
			rear--;
			insert(2,0x114,"|");
		}
		front=rear;
	}
	else if(ch=='!'){
		ch=token_getch();
		if(ch=='='){
			insert(3,0x117,"!=");
		}
		else{
			rear--;
			insert(2,0x11c,"!");
		}
		front=rear;
	}
	else if(ch=='>'){
		ch=token_getch();
		if(ch=='>'){
			ch=token_getch();
			if(ch=='>'){
				ch=token_getch();
				if(ch=='='){
					insert(5,0x110,">>>=");
				}
				else{
					rear--;
					insert(4,0x119,">>>");
				}
			}
			else{
				rear--;
				insert(3,0x119,">>");
			}
		}
		else if(ch=='='){
			insert(3,0x118,">=");
		}
		else{
			rear--;
			insert(2,0x118,">");
		}
		front=rear;
	}
	else if(ch=='<'){
		ch=token_getch();
		if(ch=='<'){
			ch=token_getch();
			if(ch=='='){
				insert(4,0x110,"<<=");
			}
			else{
				rear--;
				insert(3,0x118,"<<");
			}
		}
		else if(ch=='='){
			insert(3,0x118,"<=");
		}
		else{
			rear--;
			insert(2,0x118,"<");
		}
		front=rear;
	}
	else if(ch=='~'){
		insert(2,0x11c,"~");
	}
	else if(ch=='?'){
		insert(3,0x111,"?:");
	}
	else if(ch==':'){
		insert(3,0x111,"?:");
	}
	else if(isdigit(ch)){
		id[dex++]=ch;
		ch=token_getch();
		if(ch=='X' || ch=='x'){
			do{
				id[dex++]=ch;
				ch=token_getch();
			}while(isdigit(ch)||ch=='A'||ch=='a'||ch=='B'||ch=='b'||ch=='C'||ch=='c'||ch=='D'||ch=='d'||ch=='E'||ch=='e'||ch=='F'||ch=='f');
			insert(dex,0x107,id);
			dex=0;
			memset(id,0,100);
		}
		else{
			do{
				id[dex++]=ch;
				ch=token_getch();
			}while(isdigit(ch));
			if(ch=='L'||ch=='l'){
				id[dex++]=ch;
				ch=token_getch();
				insert(dex,0x107,id);
			    dex=0;
			    memset(id,0,100);
			}
			else if(ch=='.'||ch=='e'||ch=='E'){
				do{
					id[dex++]=ch;
					ch=token_getch();
				}while(isdigit(ch));
				if(ch=='F'|| ch=='f'){
					id[dex++]=ch;
					ch=token_getch();
				}
				insert(dex,0x108,id);
			    dex=0;
			    memset(id,0,100);
			}
		}
	}
}	
int main(int argc,char * argv[]){
    
	char filename[100];
	int  i=0;

    //设立缓冲区标志位
	buffer[1023]=EOF;
    buffer[2047]=EOF;

	printf("Please enter file name:\n");
    scanf("%s",filename);

    //打开文件,并将1023个字符读入前一半缓冲区
	fin=fopen(filename,"r");
    if(fin==NULL){
		printf("FILE_CANNOT_OPEN\n");
	}
	else{
		load(fin,0);

	}
	for(i=0;i<1000;i++){
		get_token();
	}
   

}
💿 文件大小 35 K
👤 上传用户 superdavid
📂 所属分类 Java编程
🏷️ 相关标签

#编译原理 #实验
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -