词法分析器.cpp

来自「Java语言词法分析器的设计与实现 其中具体要求: 1.使用DFA实现词法分」· C++ 代码 · 共 696 行

CPP
696
字号
#include"stdio.h"
#include"string.h"
#include   <conio.h>   
#include   <stdlib.h>   
  
char buffer[129],strscan[256],str[129],keywords[50][13]={"abstract", "boolean","break","byte","case", "catch",   "char",   "class",
"const", "continue","default","do","double","else", "extends","false",
"final","finally","float" ,"for","goto","if", "implements","import",
"instanceof","int","interface","long","native","new","null","package",
"private","protected", "public","return","short","static", "super","switch",
"synchronized","this","throw","throws", "transient", "true","try","void",
"volatile","while"
},bult[5]={"true"},bulf[6]={"false"};
int W,B,F,S,word=0,sword=0,row=0,signal=0;
 FILE *fp1,*fp2;
isletter()
{if(strscan[F]>='A'&&strscan[F]<='Z')
return 1;
if(strscan[F]>='a'&&strscan[F]<='z')
return 1;
return 0;
}
isnumber()
{if(strscan[F]>='0'&&strscan[F]<='9')
 return 1;
 return 0;
}

funcm(int m)
{int i;
	for(i=m;i<130;i++)
  buffer[i]='\n';
return 1;
}
FM()
{if(F==127)
{if(S=fread(buffer,1,128,fp1))
{funcm(S);strcpy(&strscan[128],buffer);F++;}
else {printf("%d\n",word);exit(0);}
}else if(F==255)
{if(S=fread(buffer,1,128,fp1))
{funcm(S);strcpy(strscan,buffer);}
else {printf("%d\n",word);exit(0);}F=0;}
else F++; 
B=F;
return 1;
}

FG()
{if(F==128&&B<=127)
{fseek(fp1,-131,1);
F=127;
}
else if(F==0&&B>=128)
{fseek(fp1,-131,1);
F=255;}
else F--;
return 1;
}


funcom(int k)
{int i;
for(i=0;i<k;i++)
{	if(F==127)
   {str[W++]=strscan[F];
	F++;
	if(S=fread(buffer,1,128,fp1))
    {funcm(S);strcpy(&strscan[128],buffer);}
	else exit(0);
   }
   else if(F==255)
   {str[W++]=strscan[F];
   F=0;
  if(S=fread(buffer,1,128,fp1))
  {funcm(S); strcpy(strscan,buffer);}
	else exit(0);
   }
   else {str[W++]=strscan[F];F++;}
}
return 1;
}


funcha()
{int i,j,k;
while(isletter())
   {if(F==127)
   {str[W++]=strscan[F];
	F++;
	if(S=fread(buffer,1,128,fp1))
    {funcm(S);strcpy(&strscan[128],buffer);}
	else exit(0);
   }
   else if(F==255)
   {str[W++]=strscan[F];
   F=0;
  if(S=fread(buffer,1,128,fp1))
  {funcm(S);strcpy(strscan,buffer);}
	else exit(0);
   }
   else {str[W++]=strscan[F];F++;}
   }

for(k=0;k<50;k++)
{for(j=0,i=0;keywords[k][j]!='\0';j++,i++)
  {if(keywords[k][j]!=str[i])
   break;
  }
  if(keywords[k][j]=='\0'&&i==W)
  {fwrite("0x103",1,5,fp2);
sword++;
  W=0;
  B=F;
  return 1;}
}

for(j=0,i=0;bult[j]!='\0';j++,i++)
  {if(bult[j]!=str[i])
   break;
  }
  if(bult[j]=='\0'&&i==W)
  {fwrite("0x105",1,5,fp2);B=F;W=0;sword++;
  return 1;}


for(j=0,i=0;bulf[j]!='\0';j++,i++)
  {if(bulf[j]!=str[i])
   break;
  }
  if(bulf[j]=='\0'&&i==W)
  { fwrite("0x105",1,5,fp2);B=F;W=0;sword++;
  return 1;}

  W=0;
  if(F<=127&&B>=128||F>=128&&B<=127)
  {fseek(fp1,-131,1);}
   F=B;
return 0;
}


funsym()
{while(isletter()||isnumber()||strscan[F]=='$'||strscan[F]=='_')
   {if(F==127)
   {str[W++]=strscan[F];
	F++;
	if(S=fread(buffer,1,128,fp1))
    {funcm(S);strcpy(&strscan[128],buffer);}
	else exit(0);
   }
   else if(F==255)
   {str[W++]=strscan[F];
   F=0;
  if(S=fread(buffer,1,128,fp1))
  {funcm(S);strcpy(strscan,buffer);}
	else exit(0);
   }
   else {str[W++]=strscan[F];F++;}
   }
if(W>0)
{fwrite("0x104",1,5,fp2);sword++;
B=F;
W=0;
return 1;}
else if(F<=127&&B>=128||F>=128&&B<=127)
  {fseek(fp1,-131,1);}
  
F=B;
W=0;
 return 0;
}

funerror()
{while(strscan[F]!=' '&&strscan[F]!='\n')
{if(F==127)
   {str[W++]=strscan[F];
	F++;
	if(S=fread(buffer,1,128,fp1))
    {funcm(S);strcpy(&strscan[128],buffer);}
	else exit(0);
   }
   else if(F==255)
   {str[W++]=strscan[F];
   F=0;
  if(S=fread(buffer,1,128,fp1))
  {funcm(S);strcpy(strscan,buffer);}
	else exit(0);
   }
   else {str[W++]=strscan[F];F++;}
}
str[W]='\0';
if(strscan[F]=='\n')
  {row++; printf("%d,%d \n",row,sword);word+=sword;sword=0;}
printf("%d %s is error\n",row,str);
sword++;
W=0;
B=F;
 return 1;
}

emark()
{funcom(2);
if(str[W-1]=='=')
{
switch(str[W-2])
{case '=':fwrite("0x117",1,5,fp2);B=F;W=0;sword++;break;
 case '!':fwrite("0x117",1,5,fp2);B=F;W=0;sword++;break;
 case '*':fwrite("0x110",1,5,fp2);B=F;W=0;sword++;break;
 case '^':fwrite("0x110",1,5,fp2);B=F;W=0;sword++;break;
 case '%':fwrite("0x110",1,5,fp2);B=F;W=0;sword++;break;
}
return 1;
}
else{
 switch(str[W-2])
{case '=':fwrite("0x110",1,5,fp2);FG();B=F;W=0;sword++;break;
 case '!':fwrite("0x11c",1,5,fp2);FG();B=F;W=0;sword++;break;
 case '*':fwrite("0x11b",1,5,fp2);FG();B=F;W=0;sword++;break;
 case '^':fwrite("0x115",1,5,fp2);FG();B=F;W=0;sword++;break;
 case '%':fwrite("0x11b",1,5,fp2);FG();B=F;W=0;sword++;break;
}
return 1;
}
}

enumdeal()
{int i;
	for(i=0;i<2&&strscan[F]>='0'&&strscan[F]<='8';i++)
	{	if(F==127)
   {str[W++]=strscan[F];
	F++;
	if(S=fread(buffer,1,128,fp1))
    {funcm(S);strcpy(&strscan[128],buffer);}
	else exit(0);
   }
   else if(F==255)
   {str[W++]=strscan[F];
   F=0;
  if(S=fread(buffer,1,128,fp1))
  {funcm(S);strcpy(strscan,buffer);}
	else exit(0);
   }
   else {str[W++]=strscan[F];F++;}
}
fwrite("0x106",1,5,fp2);sword++;
 B=F;
return 1;
}

sixnumdeal()
{int i;
	for(i=0;i<3&&(strscan[F]>='0'&&strscan[F]<='9'||strscan[F]>='A'&&strscan[F]<='F');i++)
	{	if(F==127)
   {str[W++]=strscan[F];
	F++;
	if(S=fread(buffer,1,128,fp1))
    {funcm(S);strcpy(&strscan[128],buffer);}
	else exit(0);
   }
   else if(F==255)
   {str[W++]=strscan[F];
   F=0;
  if(S=fread(buffer,1,128,fp1))
  {funcm(S);strcpy(strscan,buffer);}
	else exit(0);
   }
   else {str[W++]=strscan[F];F++;}
}
B=F;
fwrite("0x106",1,5,fp2);sword++;
return 1;
}


backslash()
{funcom(2);
switch(str[W-1])
{case 'b':fwrite("0x106",1,5,fp2);FM();sword++;break;
case 't':fwrite("0x106",1,5,fp2);FM();sword++;break;
case 'f':fwrite("0x106",1,5,fp2);FM();sword++;break;
case 'n':fwrite("0x106",1,5,fp2);FM();sword++;break;
case 'r':fwrite("0x106",1,5,fp2);FM();sword++;break;
case '\\':fwrite("0x106",1,5,fp2);FM();sword++;break;
case '\'':fwrite("0x106",1,5,fp2);FM();sword++;break;
case 'u':sixnumdeal();break;
default:if(str[W-1]>='0'&&str[W-1]<='8')enumdeal();else funerror();break;
}
return 1;
}


addmark()
{funcom(2);
 switch(str[W-1])
 {case '=':fwrite("0x110",1,5,fp2);sword++;break;
case '+':fwrite("0x11c",1,5,fp2);sword++;break;
default:fwrite("0x11a",1,5,fp2);sword++;break;
}
 return 1;
}
submark()
{funcom(2);
 switch(str[W-1])
 {case '=':fwrite("0x110",1,5,fp2);W=0;B=F;sword++;break;
case '-':fwrite("0x11c",1,5,fp2);W=0;B=F;sword++;break;
default:fwrite("0x11a",1,5,fp2);FG();W=0;B=F;sword++;break;
}
 return 1;
}

askmark()
{
	funcom(2);
 switch(str[W-1])
 {case ':':fwrite("0x110",1,5,fp2);W=0;B=F;sword++;break;
default:FG();funerror();break;
}
 return 1;
}

andmark()
{
	funcom(2);
 switch(str[W-1])
 {case '=':fwrite("0x110",1,5,fp2);W=0;B=F;sword++;break;
case '&':fwrite("0x113",1,5,fp2);W=0;B=F;sword++;break;
default:fwrite("0x116",1,5,fp2);FG();W=0;B=F;word++;break;
}
 return 1;
}
chamark()
{
	funcom(3);
 switch(str[W-1])
 {case '\'':fwrite("0x106",1,5,fp2);sword++;break;
default:funerror();break;
}
 return 1;
}

ormark()
{	funcom(2);
 switch(str[W-1])
 {case '|':fwrite("0x112",1,5,fp2);W=0;B=F;sword++;break;
case '=':fwrite("0x110",1,5,fp2);W=0;B=F;sword++;break;
default:fwrite("0x114",1,5,fp2);FG();B=F;W=0;sword++;break;
}
 
 return 1;
}

chasmark()
{
   do 
	{	if(F==127)
   {str[W++]=strscan[F];
	F++;
	if(S=fread(buffer,1,128,fp1))
	{funcm(S);  strcpy(&strscan[128],buffer);}
	else exit(0);
   }
   else if(F==255)
   {str[W++]=strscan[F];
   F=0;
  if(S=fread(buffer,1,128,fp1))
  {funcm(S); strcpy(strscan,buffer);}
	else exit(0);
   }
   else {str[W++]=strscan[F];F++;}
}while(strscan[F]!='"');
if(strscan[F]=='"')
{fwrite("0x114",1,5,fp2);sword++;
W=0;FM();}
return 1;
}


greatmarksm()
{funcom(1);
switch(str[W-1])
 {case '=':fwrite("0x110",1,5,fp2);W=0;B=F;sword++;break;
default:fwrite("0x119",1,5,fp2);FG();W=0;B=F;sword++;break;
}
 return 1;
}

greatmarks()
{funcom(1);
switch(str[W-1])
 {case '>':greatmarksm();break;
case '=':fwrite("0x110",1,5,fp2);W=0;B=F;sword++;break;
default:fwrite("0x119",1,5,fp2);FG();W=0;B=F;sword++;break;
}
 return 1;
}

greatmark()
{funcom(2);
switch(str[W-1])
 {case '>':greatmarks();break;
case '=':fwrite("0x118",1,5,fp2);W=0;B=F;sword++;break;
default:fwrite("0x118",1,5,fp2);FG();W=0;B=F;sword++;break;
}
 return 1;
}

smallmarks()
{funcom(2);
switch(str[W-1])
 {case '=':fwrite("0x110",1,5,fp2);W=0;B=F;sword++;break;
default:fwrite("0x119",1,5,fp2);FG();W=0;B=F;sword++;break;
}
 return 1;
}

smallmark()
{funcom(2);
switch(str[W-1])
 {case '<':smallmarks();break;
case '=':fwrite("0x118",1,5,fp2);W=0;B=F;sword++;break;
default :fwrite("0x118",1,5,fp2);FG();W=0;B=F;sword++;break;
}
return 1;
}
bargmarks()
{do{if(F==127)
   {str[W++]=strscan[F];
	F++;
	if(S=fread(buffer,1,128,fp1))
    {funcm(S);strcpy(&strscan[128],buffer);}
	else exit(0);
   }
   else if(F==255)
   {str[W++]=strscan[F];
   F=0;
  if(S=fread(buffer,1,128,fp1))
  {funcm(S);strcpy(strscan,buffer);}
	else exit(0);
   }
   else {str[W++]=strscan[F];F++;}
}while(strscan[F]!='\n');
if(strscan[F]=='\n')
{fwrite("0x101",1,5,fp2);sword++;row++;printf("%d,%d\n",row,sword);word+=sword;sword=0;}
FM();
W=0;

return 1;

}

barxmarks()
{do{if(F==127)
   {
	F++;
	if(S=fread(buffer,1,128,fp1))
    {funcm(S);strcpy(&strscan[128],buffer);}
	else exit(0);
   }
   else if(F==255)
   {
   F=0;
  if(S=fread(buffer,1,128,fp1))
  {funcm(S); strcpy(strscan,buffer);}
	else exit(0);
   }
   else {F++;}
}while(strscan[F]!='/');
if(strscan[F]=='/')
  fwrite("0x101",1,5,fp2);sword++;
W=0;
FM();
B=F;
return 1;
}


barmark()
{funcom(2);
switch(str[W-1])
 {case '/':bargmarks();break;
case '=':fwrite("0x110",1,5,fp2);sword++;break;
case '*':barxmarks();break;
default :fwrite("0x11b",1,5,fp2);F=B;sword++;break;
}
return 1;
}


funfenum()
{
	do{if(F==127)
   {str[W++]=strscan[F];
	F++;
	if(S=fread(buffer,1,128,fp1))
    {funcm(S);strcpy(&strscan[128],buffer);}
	else exit(0);
   }
   else if(F==255)
   {str[W++]=strscan[F];
   F=0;
  if(S=fread(buffer,1,128,fp1))
  {funcm(S); strcpy(strscan,buffer);}
	else exit(0);
   }
   else {str[W++]=strscan[F];F++;}
}while(isnumber());
fwrite("0x108",1,5,fp2);sword++;
B=F;
return 1;
}


funfnum()
{do{if(F==127)
   {str[W++]=strscan[F];
	F++;
	if(S=fread(buffer,1,128,fp1))
	{funcm(S);  strcpy(&strscan[128],buffer);}
	else exit(0);
   }
   else if(F==255)
   {str[W++]=strscan[F];
   F=0;
  if(S=fread(buffer,1,128,fp1))
  {funcm(S); strcpy(strscan,buffer);}
	else exit(0);
   }
   else {str[W++]=strscan[F];F++;}
}while(isnumber());
switch(strscan[F])
{
case 'F':fwrite("0x108",1,5,fp2);FM();W=0;B=F;sword++;break;
default:fwrite("0x108",1,5,fp2);W=0;B=F;sword++;break;

}
	return 1;
}

funmm()
{do{if(F==127)
   {str[W++]=strscan[F];
	F++;
	if(S=fread(buffer,1,128,fp1))
	{funcm(S); strcpy(&strscan[128],buffer);}
	else exit(0);
   }
   else if(F==255)
   {str[W++]=strscan[F];
   F=0;
  if(S=fread(buffer,1,128,fp1))
  {funcm(S);
	  strcpy(strscan,buffer);}
	else exit(0);
   }
   else {str[W++]=strscan[F];F++;}
}while(isnumber());
fwrite("0x107",1,5,fp2);W=0;B=F;sword++;
	return 1;
}
funnum()
{do{if(F==127)
   {str[W++]=strscan[F];
	F++;
	if(S=fread(buffer,1,128,fp1))
	{funcm(S); strcpy(&strscan[128],buffer);}
	else exit(0);
   }
   else if(F==255)
   {str[W++]=strscan[F];
   F=0;
  if(S=fread(buffer,1,128,fp1))
  {funcm(S);
	  strcpy(strscan,buffer);}
	else exit(0);
   }
   else {str[W++]=strscan[F];F++;}
}while(isnumber());
switch(strscan[F])
{case '.':funfnum();break;
case 'x':funmm();break;
case 'L':fwrite("0x107",1,5,fp2);FM();W=0;B=F;sword++;break;
case 'E':funfenum();break;
case 'e':funfenum();break;
default:fwrite("0x107",1,5,fp2);W=0;B=F;sword++;break;

}
	return 1;
}

funnod()
{do{if(F==127)
   {str[W++]=strscan[F];
	F++;
	if(S=fread(buffer,1,128,fp1))
	{funcm(S); strcpy(&strscan[128],buffer);}
	else exit(0);
   }
   else if(F==255)
   {str[W++]=strscan[F];
   F=0;
  if(S=fread(buffer,1,128,fp1))
  {funcm(S);
	  strcpy(strscan,buffer);}
	else exit(0);
   }
   else {str[W++]=strscan[F];F++;}
}while(isnumber());
if(W==1)
{fwrite("0x11d",1,5,fp2);B=F;W=0;sword++;}
else
{ fwrite("0x108",1,5,fp2);B=F;W=0;sword++;}
return 1;
}


main()
{

if((fp1=fopen("Test-Lexcial","r"))==NULL)
{
printf("Cannot open this file1\n");
return 0;
}
if((fp2=fopen("scanner_output","w"))==NULL)
{
printf("Cannot open this file2\n");
return 0;
}
W=0;
B=W;
F=W;
if(S=(fread(buffer,1,128,fp1)))
{funcm(S);
	strcpy(strscan,buffer);
}
else exit(0);
while(1)
{
 if(strscan[F]=='\t'||strscan[F]=='\n')
	   {if(strscan[F]=='\n'&&signal==0) {row++;printf("%d,%d\n",row,sword);if(strscan[F+1]=='\n'&&S<128&&signal==0)signal=1;word+=sword;sword=0;}FM();}
   else if(strscan[F]==' ')
   {fwrite("0x102",1,5,fp2);sword++;
     FM();}
   else if(isletter()){
	  
    if(funcha())
	   {}
	else if(funsym()){}
	else funerror();
   }
  
   else if(isnumber())
   {funnum();
   }
   else {switch(strscan[F])
   {case '+':addmark();  break;
   case '\\': backslash(); break;
	case '\'':chamark();  break;
	case '"':chasmark(); break;
	case '{': fwrite("0x121",1,5,fp2);FM();sword++; break;
	case '}':fwrite("0x121",1,5,fp2);FM();sword++; break;
	case '[':fwrite("0x11d",1,5,fp2); FM();sword++;break;
	case ']':fwrite("0x11d",1,5,fp2);FM(); sword++;break;
	case '(':fwrite("0x11d",1,5,fp2); FM();sword++;break;
	case ')':fwrite("0x11d",1,5,fp2);FM();sword++;break;
	case ':':fwrite("0x123",1,5,fp2);FM();sword++;break;
	case '.':funnod(); break;
	case ',':fwrite("0x120",1,5,fp2);FM();sword++;break;
	case ';':fwrite("0x122",1,5,fp2);FM();sword++;break;
	case '&':andmark();break;
	case '=':emark();break;
	case '~':fwrite("0x11c",1,5,fp2);FM();sword++;break;
	case '|':ormark();break;
	case '?':askmark();break;
	case '^':emark();break;
	case '-':submark();  break;
	case '/':barmark();break;
	case '>':greatmark(); break;
	case '<':smallmark(); break;
	case '%':emark();break;
	case '*':emark();break;
	case '!':emark();break;
    case '_':funsym(); break;
	case '$':funsym();break;
	default :funerror(); break;
	}

   }
}
fclose(fp1);fclose(fp2);

return 0;

}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?