📄 javascanner.c

📁 编译原理课程实验
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*----------------------------------------- JAVA 词法分析器---------------------------------------*/

/*    (C) 2007-2009 lujia(20040094) (851010@bit.edu.cn) 
*     说明：
*         本词法分析器为编译原理实验作业，能够按照JAVA的语法规则进行分析。分析结果保留在
*     用户指定的某个文件中，结果以四元式(行，行内位置，十六进制编码，字面值)的形式表示，
*     并统计出了单词总数及每行单词个数。统计的结果中没有注释及空格，因为它们在后续分析中
*     无实际意义
*/ 

#include<stdio.h>
#include<string.h>
#include<ctype.h>
#include<stdlib.h>
//定义缓冲区大小，由于采用对半互补策略，每半个缓冲区为1024个字节
#define  BUFFERLEN  2048
//定义缓冲区索引
int  front=0,rear=0;
//定义缓冲区并初始化
char buffer[BUFFERLEN]={0};
//全局变量line指示行号
int  line=1;
//要解析的源文件指针
FILE * fin;
//临时数组,用于存储标识符,关键字,字符串及整实数字面值
char id[100];
//临时数组索引
int  dex=0;
//每行单词计数器
int  count=0;
//临时变量，用于将八进制或十六进制表示的转移字符转化为一般字符
int  num_value=0;
//指示单词在某一行的位置
int  position=0;
//读取文件次数，用于确定文件指针的位移量
int  cnt=0;
//符号结构定义，记录单词信息
struct token{
	int   lineno;  //行号
	int   position;//在该行中的位置
	int   wordkey; //符号类型,用十六进制表示
    char* value;   //单词的字面值
	struct token* next_token;//指向下一个单词
}token;

//单词表首尾指针
struct token* token_list_head=NULL;
struct token* token_list_tail=NULL;
struct token* token_pointer;

//关键字表
char* keyword[]={
	"abstract",
	"boolean",
	"break",
	"byte",
	"case",
	"catch",
	"char",
	"class",
	"const",
	"continue",
	"default",
	"do",
	"double",
	"else",
	"extends",
	"false",
	"final",
	"finally",
	"float",
	"for",
	"goto",
	"if",
	"implements",
	"import",
	"instanceof",
	"int",
	"interface",
	"long",
	"native",
	"new",
	"null",
	"package",
	"private",
	"protect",
	"public",
	"return",
	"short",
	"static",
	"super",
	"switch",
	"synchronized",
	"this",
	"throw",
	"throws",
	"transient",
	"true",
	"try",
	"void",
	"volatile",
	"while"
};

//根据移动文件指针到新的位置,并且rear的值将源文件装入缓冲区;
void load(){
	int num;//实际读取字节数；
	char* midpos=&buffer[1024];
	if(rear==1023){
		
		fseek(fin,(long)cnt*1023,0);
		num=fread(midpos,sizeof(char),1023,fin);
		
		cnt++;

		rear=1024;
	}
	else{
		fseek(fin,cnt*1023L,0);
		num=fread(buffer,sizeof(char),1023,fin);
		
		cnt++;
		rear=0;
	}
	if(num<1023){
		fclose(fin);
		buffer[rear+num]=EOF;
	}
}
//从缓冲区获取一个字符
char token_getch(){
	char ch;
	ch=buffer[rear];
	//如果遇到缓冲区结尾，装入另一半缓冲区
	if(ch==EOF&&(rear==1023||rear==2047)){
		load();
		ch=buffer[rear];
	}
	rear++;
	return ch;
}

//在关键字表中进行查找,若照到则返回1，否则返回0
int lookup_keyword(char* word){
	int i;
	for(i=0;i<49;i++){
		if(strcmp(word,keyword[i])==0)
			return 0;
	}
	if(i==49)
		return 1;
}

//向单词表中插入一个token结构，并装入各字段内容
void insert(int pos,int value_size,int attri,const char* value){
	
	
    token_pointer=(struct token*)malloc(sizeof(token));
	
	token_pointer->position=pos;
	token_pointer->lineno=line;
	token_pointer->wordkey=attri;
    token_pointer->value=(char*)malloc(value_size*sizeof(char));
	strcpy(token_pointer->value,value);
	token_list_tail->next_token=token_pointer;
	token_pointer->next_token=NULL;
    token_list_tail=token_pointer;
}

//分析器主体
void get_token(){
	int i=0,tri_op=0;
    char ch;
    while(1){
		//到达文件末尾，跳出循环
		if(ch==EOF){
			break;
		}
	    //获取一个字符
		ch=token_getch();
		//如果是回车，将行数加1，置position为0
 		if(ch=='\n'){
		    front=rear;
			position=0;
		    line++;
		}
	    //如果是空格，只是将其跳过
        else if(isspace(ch)){
		    if(ch=='\n'){
				position=0;
        	    line++;
			}
			front=rear;
    	//  insert(++position,2,0x102," ");
		}
/*---------------------------判断关键字和标识符及布尔值----------------------------------*/
	    else if(isalpha(ch)||ch=='_'||ch=='$'){
	        do{
			    id[dex++]=ch;
			    ch=token_getch();
			}while(isalnum(ch)||ch=='_'||ch=='$');
	
			//在关键字表中查找，根据查找结果填入单词表
		    if(lookup_keyword(id)){
			    insert(++position,dex,0x104,id);
			}
		    else{
			    if(strcmp(id,"true")==0 || strcmp(id,"false")==0){
				    insert(++position,dex,0x105,id);
				}
			    else{
				    insert(++position,dex,0x103,id);
				}
			}
			//回退一个字符
			rear--;
		    front=rear;
		    //将临时变量存储数组索引置零
		    dex=0;
		    //将临时变量存储数组清零
		    memset(id,0,100);
		}
/*-----------------------------注释或运算符处理-----------------------------------*/
	    else if(ch=='/'){
		    ch=token_getch();
			// "//"类型的注释
	        if(ch=='/'){
			    do{ 
		            ch=token_getch();
					if(ch!='\n'&&ch!=EOF){
						id[dex++]=ch;
					}
				}while(ch!='\\\n'&&ch!=EOF);
			//	insert(++position,dex,0x101,id);
				line++;
			    position=0;
				dex=0;
		        memset(id,0,100);
			}
			// "/*"类型的注释
		    else if(ch=='*'){
			    do{ 
		            ch=token_getch();
					//遇到回车，行数加1
					if(ch==13){
						line++;
						position=0;
					}
					if( !(ch=='*'&& (buffer[rear])=='/')){
				        id[dex++]=ch;
					}
				}while( !(ch=='*'&& (buffer[rear])=='/'));
				ch=token_getch();
			//	insert(++position,dex,0x101,id);
				dex=0;
		        memset(id,0,100);
			}
			// "/="运算符
		    else if(ch=='='){
			    insert(++position,3,0x110,"/=");
			}
			// "/"运算符
		    else{
				rear--;
			    insert(++position,2,0x11b,"/");
			}
		    front=rear;
   		}
/*--------------------------字符串处理，将所有字符填入id 串--------------------------*/
	    else if(ch=='"'){
		    while((ch=token_getch())!='"'){
				//各类转移字符处理
			    if(ch=='\\'){
				    ch=token_getch();
				    if(ch=='\'')
                        ch='\'';
				    else if(ch=='r')
				        ch='\r';
			        else if(ch=='f')	
				        ch='\f';
		            else if(ch=='t')
				        ch='\t';
		            else if(ch=='b')
			            ch='\b';
			        else if(ch=='n')
				        ch='\n';
			        else if(ch=='t')
				        ch='\t';
			        else if(ch=='\\')
				        ch='\\';
			        else if(ch=='"')
				        ch='"';
			        else if(ch=='\'')
				        ch='\'';
			        else if((ch-'0')>=0 && (ch-'7')<=0){
				        while((ch-'0')>=0 && (ch-'7')<=0){
					        num_value=num_value*8;
						    num_value+=(ch-'0');
						    ch=token_getch();
						}
					    if(num_value>128){
					        printf("%d: too big for character\n",line);
						}
					    else{
						    ch=num_value;
						}
					    num_value=0;
					}
			        else if(ch=='u'){
					    ch=token_getch();
				        while(ch=='0'||ch=='1'||ch=='2'||ch=='3'||ch=='4'||ch=='5'||ch=='6'||ch=='7'||ch=='8'||ch=='9'||ch=='A'||ch=='a'||ch=='B'||ch=='b'||ch=='C'||ch=='c'||ch=='D'||ch=='d'||ch=='E'||ch=='e'||ch=='F'||ch=='f'){
					        num_value=num_value*16;
						    if(ch=='0'||ch=='1'||ch=='2'||ch=='3'||ch=='4'||ch=='5'||ch=='6'||ch=='7'||ch=='8'||ch=='9'){
							    num_value+=(id[i]-'0');
							}
						    else if(ch=='A'||ch=='B'||ch=='C'||ch=='D'||ch=='E'||ch=='F'){
							    num_value+=(id[i]-55);
							}
						    else if(ch=='a'||ch=='b'||ch=='c'||ch=='d'||ch=='e'||ch=='f'){
							    num_value+=(id[i]-55);
							}
						    ch=token_getch();
						}
					    if(num_value>128){
					        printf("%d: too big for character\n",line);
						}
					    else{
						    ch=num_value;
						}
					    num_value=0;
					}
				}
			    id[dex++]=ch;
			    if(dex==100){
				    printf("%d: string too long\n",line);
				}
			}
		    front=rear;
		
			insert(++position,dex,0x106,id);
		    //将临时变量存储数组索引置零
		    dex=0;
		    //将临时变量存储数组清零
		    memset(id,0,100);
		}
/*-------------------------------处理单个字符---------------------------------*/ 
	    else if(ch=='\''){
		    ch=token_getch();
		    if(ch=='\\'){
			    ch=token_getch();
			    if(ch=='\'')
                    ch='\'';
			    else if(ch=='r')
				    ch='\r';
			    else if(ch=='f')	
				    ch='\f';
		        else if(ch=='t')
				    ch='\t';
		        else if(ch=='b')
			        ch='\b';
			    else if(ch=='n')
				    ch='\n';
			    else if(ch=='t')
				    ch='\t';
			    else if(ch=='\\')
				    ch='\\';
			    else if(ch=='"')
				    ch='"';
			    else if(ch=='\'')
				    ch='\'';
			    else if((ch-'0')>=0 && (ch-'7')<=0){
				    while((ch-'0')>=0 && (ch-'7')<=0){
					    num_value=num_value*8;
					    num_value+=(ch-'0');
					    ch=token_getch();
					}
				    if(num_value>128){
				        printf("%d: too big for character\n",line);
					}
				    else{
					    ch=num_value;
					}
				    num_value=0;
				}
		        else if(ch=='u'){
				    ch=token_getch();
		            while(ch=='0'||ch=='1'||ch=='2'||ch=='3'||ch=='4'||ch=='5'||ch=='6'||ch=='7'||ch=='8'||ch=='9'||ch=='A'||ch=='a'||ch=='B'||ch=='b'||ch=='C'||ch=='c'||ch=='D'||ch=='d'||ch=='E'||ch=='e'||ch=='F'||ch=='f'){
			            num_value=num_value*16;
					    if(ch=='0'||ch=='1'||ch=='2'||ch=='3'||ch=='4'||ch=='5'||ch=='6'||ch=='7'||ch=='8'||ch=='9'){
						    num_value+=(id[i]-'0');
						}
					    else if(ch=='A'||ch=='B'||ch=='C'||ch=='D'||ch=='E'||ch=='F'){
						    num_value+=(id[i]-55);
						}
					    else if(ch=='a'||ch=='b'||ch=='c'||ch=='d'||ch=='e'||ch=='f'){
						    num_value+=(id[i]-55);
						}
					    ch=token_getch();
					}
				    if(num_value>128){
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -