📄 javascanner.c
字号:
/*----------------------------------------- JAVA 词法分析器---------------------------------------*/
/* (C) 2007-2009 lujia(20040094) (851010@bit.edu.cn)
* 说明:
* 本词法分析器为编译原理实验作业,能够按照JAVA的语法规则进行分析。分析结果保留在
* 用户指定的某个文件中,结果以四元式(行,行内位置,十六进制编码,字面值)的形式表示,
* 并统计出了单词总数及每行单词个数。统计的结果中没有注释及空格,因为它们在后续分析中
* 无实际意义
*/
#include<stdio.h>
#include<string.h>
#include<ctype.h>
#include<stdlib.h>
//定义缓冲区大小,由于采用对半互补策略,每半个缓冲区为1024个字节
#define BUFFERLEN 2048
//定义缓冲区索引
int front=0,rear=0;
//定义缓冲区并初始化
char buffer[BUFFERLEN]={0};
//全局变量line指示行号
int line=1;
//要解析的源文件指针
FILE * fin;
//临时数组,用于存储标识符,关键字,字符串及整实数字面值
char id[100];
//临时数组索引
int dex=0;
//每行单词计数器
int count=0;
//临时变量,用于将八进制或十六进制表示的转移字符转化为一般字符
int num_value=0;
//指示单词在某一行的位置
int position=0;
//读取文件次数,用于确定文件指针的位移量
int cnt=0;
//符号结构定义,记录单词信息
struct token{
int lineno; //行号
int position;//在该行中的位置
int wordkey; //符号类型,用十六进制表示
char* value; //单词的字面值
struct token* next_token;//指向下一个单词
}token;
//单词表首尾指针
struct token* token_list_head=NULL;
struct token* token_list_tail=NULL;
struct token* token_pointer;
//关键字表
char* keyword[]={
"abstract",
"boolean",
"break",
"byte",
"case",
"catch",
"char",
"class",
"const",
"continue",
"default",
"do",
"double",
"else",
"extends",
"false",
"final",
"finally",
"float",
"for",
"goto",
"if",
"implements",
"import",
"instanceof",
"int",
"interface",
"long",
"native",
"new",
"null",
"package",
"private",
"protect",
"public",
"return",
"short",
"static",
"super",
"switch",
"synchronized",
"this",
"throw",
"throws",
"transient",
"true",
"try",
"void",
"volatile",
"while"
};
//根据移动文件指针到新的位置,并且rear的值将源文件装入缓冲区;
void load(){
int num;//实际读取字节数;
char* midpos=&buffer[1024];
if(rear==1023){
fseek(fin,(long)cnt*1023,0);
num=fread(midpos,sizeof(char),1023,fin);
cnt++;
rear=1024;
}
else{
fseek(fin,cnt*1023L,0);
num=fread(buffer,sizeof(char),1023,fin);
cnt++;
rear=0;
}
if(num<1023){
fclose(fin);
buffer[rear+num]=EOF;
}
}
//从缓冲区获取一个字符
char token_getch(){
char ch;
ch=buffer[rear];
//如果遇到缓冲区结尾,装入另一半缓冲区
if(ch==EOF&&(rear==1023||rear==2047)){
load();
ch=buffer[rear];
}
rear++;
return ch;
}
//在关键字表中进行查找,若照到则返回1,否则返回0
int lookup_keyword(char* word){
int i;
for(i=0;i<49;i++){
if(strcmp(word,keyword[i])==0)
return 0;
}
if(i==49)
return 1;
}
//向单词表中插入一个token结构,并装入各字段内容
void insert(int pos,int value_size,int attri,const char* value){
token_pointer=(struct token*)malloc(sizeof(token));
token_pointer->position=pos;
token_pointer->lineno=line;
token_pointer->wordkey=attri;
token_pointer->value=(char*)malloc(value_size*sizeof(char));
strcpy(token_pointer->value,value);
token_list_tail->next_token=token_pointer;
token_pointer->next_token=NULL;
token_list_tail=token_pointer;
}
//分析器主体
void get_token(){
int i=0,tri_op=0;
char ch;
while(1){
//到达文件末尾,跳出循环
if(ch==EOF){
break;
}
//获取一个字符
ch=token_getch();
//如果是回车,将行数加1,置position为0
if(ch=='\n'){
front=rear;
position=0;
line++;
}
//如果是空格,只是将其跳过
else if(isspace(ch)){
if(ch=='\n'){
position=0;
line++;
}
front=rear;
// insert(++position,2,0x102," ");
}
/*---------------------------判断关键字和标识符及布尔值----------------------------------*/
else if(isalpha(ch)||ch=='_'||ch=='$'){
do{
id[dex++]=ch;
ch=token_getch();
}while(isalnum(ch)||ch=='_'||ch=='$');
//在关键字表中查找,根据查找结果填入单词表
if(lookup_keyword(id)){
insert(++position,dex,0x104,id);
}
else{
if(strcmp(id,"true")==0 || strcmp(id,"false")==0){
insert(++position,dex,0x105,id);
}
else{
insert(++position,dex,0x103,id);
}
}
//回退一个字符
rear--;
front=rear;
//将临时变量存储数组索引置零
dex=0;
//将临时变量存储数组清零
memset(id,0,100);
}
/*-----------------------------注释或运算符处理-----------------------------------*/
else if(ch=='/'){
ch=token_getch();
// "//"类型的注释
if(ch=='/'){
do{
ch=token_getch();
if(ch!='\n'&&ch!=EOF){
id[dex++]=ch;
}
}while(ch!='\\\n'&&ch!=EOF);
// insert(++position,dex,0x101,id);
line++;
position=0;
dex=0;
memset(id,0,100);
}
// "/*"类型的注释
else if(ch=='*'){
do{
ch=token_getch();
//遇到回车,行数加1
if(ch==13){
line++;
position=0;
}
if( !(ch=='*'&& (buffer[rear])=='/')){
id[dex++]=ch;
}
}while( !(ch=='*'&& (buffer[rear])=='/'));
ch=token_getch();
// insert(++position,dex,0x101,id);
dex=0;
memset(id,0,100);
}
// "/="运算符
else if(ch=='='){
insert(++position,3,0x110,"/=");
}
// "/"运算符
else{
rear--;
insert(++position,2,0x11b,"/");
}
front=rear;
}
/*--------------------------字符串处理,将所有字符填入id 串--------------------------*/
else if(ch=='"'){
while((ch=token_getch())!='"'){
//各类转移字符处理
if(ch=='\\'){
ch=token_getch();
if(ch=='\'')
ch='\'';
else if(ch=='r')
ch='\r';
else if(ch=='f')
ch='\f';
else if(ch=='t')
ch='\t';
else if(ch=='b')
ch='\b';
else if(ch=='n')
ch='\n';
else if(ch=='t')
ch='\t';
else if(ch=='\\')
ch='\\';
else if(ch=='"')
ch='"';
else if(ch=='\'')
ch='\'';
else if((ch-'0')>=0 && (ch-'7')<=0){
while((ch-'0')>=0 && (ch-'7')<=0){
num_value=num_value*8;
num_value+=(ch-'0');
ch=token_getch();
}
if(num_value>128){
printf("%d: too big for character\n",line);
}
else{
ch=num_value;
}
num_value=0;
}
else if(ch=='u'){
ch=token_getch();
while(ch=='0'||ch=='1'||ch=='2'||ch=='3'||ch=='4'||ch=='5'||ch=='6'||ch=='7'||ch=='8'||ch=='9'||ch=='A'||ch=='a'||ch=='B'||ch=='b'||ch=='C'||ch=='c'||ch=='D'||ch=='d'||ch=='E'||ch=='e'||ch=='F'||ch=='f'){
num_value=num_value*16;
if(ch=='0'||ch=='1'||ch=='2'||ch=='3'||ch=='4'||ch=='5'||ch=='6'||ch=='7'||ch=='8'||ch=='9'){
num_value+=(id[i]-'0');
}
else if(ch=='A'||ch=='B'||ch=='C'||ch=='D'||ch=='E'||ch=='F'){
num_value+=(id[i]-55);
}
else if(ch=='a'||ch=='b'||ch=='c'||ch=='d'||ch=='e'||ch=='f'){
num_value+=(id[i]-55);
}
ch=token_getch();
}
if(num_value>128){
printf("%d: too big for character\n",line);
}
else{
ch=num_value;
}
num_value=0;
}
}
id[dex++]=ch;
if(dex==100){
printf("%d: string too long\n",line);
}
}
front=rear;
insert(++position,dex,0x106,id);
//将临时变量存储数组索引置零
dex=0;
//将临时变量存储数组清零
memset(id,0,100);
}
/*-------------------------------处理单个字符---------------------------------*/
else if(ch=='\''){
ch=token_getch();
if(ch=='\\'){
ch=token_getch();
if(ch=='\'')
ch='\'';
else if(ch=='r')
ch='\r';
else if(ch=='f')
ch='\f';
else if(ch=='t')
ch='\t';
else if(ch=='b')
ch='\b';
else if(ch=='n')
ch='\n';
else if(ch=='t')
ch='\t';
else if(ch=='\\')
ch='\\';
else if(ch=='"')
ch='"';
else if(ch=='\'')
ch='\'';
else if((ch-'0')>=0 && (ch-'7')<=0){
while((ch-'0')>=0 && (ch-'7')<=0){
num_value=num_value*8;
num_value+=(ch-'0');
ch=token_getch();
}
if(num_value>128){
printf("%d: too big for character\n",line);
}
else{
ch=num_value;
}
num_value=0;
}
else if(ch=='u'){
ch=token_getch();
while(ch=='0'||ch=='1'||ch=='2'||ch=='3'||ch=='4'||ch=='5'||ch=='6'||ch=='7'||ch=='8'||ch=='9'||ch=='A'||ch=='a'||ch=='B'||ch=='b'||ch=='C'||ch=='c'||ch=='D'||ch=='d'||ch=='E'||ch=='e'||ch=='F'||ch=='f'){
num_value=num_value*16;
if(ch=='0'||ch=='1'||ch=='2'||ch=='3'||ch=='4'||ch=='5'||ch=='6'||ch=='7'||ch=='8'||ch=='9'){
num_value+=(id[i]-'0');
}
else if(ch=='A'||ch=='B'||ch=='C'||ch=='D'||ch=='E'||ch=='F'){
num_value+=(id[i]-55);
}
else if(ch=='a'||ch=='b'||ch=='c'||ch=='d'||ch=='e'||ch=='f'){
num_value+=(id[i]-55);
}
ch=token_getch();
}
if(num_value>128){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -