⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 compiler.c

📁 编译原理--词法分析器
💻 C
字号:
#include<stdio.h>
#include<ctype.h>
#include<string.h>


#define SYMTABLE_LENGTH 500

/***************************************************************************************************** 
 *ID 标识符,ADD '+' ,SUB '-',MUL '*',DIV '/',AFF ';',SADD '++' ,LB ')' ,FLOAT 实数 ,INT 整数 ,
 *END 程序结束 ,LMB '[' ,RMB ']' ,SSUB '--' ,POINT 指针 ,RB ')' ,SPT '.' ,ADRESS '&' ,NT '!' ,
 *LMB '{' ,PER '%' ,'RMB '}' ,LE '<=' ,NE '<' ,GT '>=' ,NT '>' ,EQ '=' ,EV 赋值 ,COA ',' ,STR 字符串 ,
 *J '#' ,AND '&&' ,OR '|' ****************************************************************************/

#define END              -1
#define ID                0
#define ADD               1
#define SUB               2
#define MUL               3
#define DIV               4
#define AFF               5
#define SADD              6
#define LB                7
#define FLOAT             8
#define INT               9
#define LMB              10
#define RMB              11
#define SSUB             12
#define POINT            13
#define RB               14
#define SPT              15
#define ADRSS            16
#define NOT              17
#define INV              19
#define LXB              20
#define PER              21
#define RXB              22
#define LE               23
#define NE               24
#define LT               25
#define NT               26
#define EQ               27
#define EV               28
#define COA              29
#define STR              30
#define J                31
#define AND              32
#define OR               33
#define RSR         256


int linenum,errno;
char lexemes[1000];

struct entry
{
    char *lexptr;
    int token;
};

struct keywords
{
    char word[15];
    int tokval;
};

struct entry symtable[SYMTABLE_LENGTH];
int lastchar=0;
int lastentry=0;

struct keywords keyword[32];

/****************************************************
    初始化,将keyword文件中的关键字导入结构体数组中
 ****************************************************/
void init()
{
    FILE *fp;
    char *tmpStr='\0';
    int i=0;

    if((fp=fopen("keyword.txt","r"))==NULL)

    {
     printf("File can't be opened!\n");
     exit(0);
    }

    while(fscanf(fp,"%s",tmpStr)!=EOF)
    {
       strcpy(keyword[i].word,tmpStr);
       keyword[i].tokval=34+i;
       i++;
    }
    fclose(fp);

}

/****************************
     在符号表中查找标识符
 ****************************/
int lookup(char s[])
{
    int p;

    for(p=lastentry;p>0;p--)
    {
    if(strcmp(symtable[p].lexptr,s)==0)
    return p;

    }
    return 0;
    
} 

/******************************
    向符号表中插入新的标识符
 ******************************/
int insert(char s[],int tok)
{
    int len;
    len=strlen(s);

    if((lastentry+1)>=SYMTABLE_LENGTH)
       printf("symtable is full\n");

    lastentry=lastentry+1;
    symtable[lastentry].token=tok;
    strcpy(&lexemes[lastchar+1],s);
    symtable[lastentry].lexptr=&lexemes[lastchar+1];

    lastchar=lastchar+len+1;

    return lastentry;
    
}


/********************************
        判断是否是保留字
 ********************************/
int isReserver(char str[])
{
   int i=0;
   while(i<32)
   {
     if(strcmp(str,keyword[i].word)==0)
     return keyword[i].tokval;
     i++;
   }
   return 1;
}

/**********************************
          词法分析核心程序
 **********************************/
int analyse(FILE *in,FILE *out)
{

    char ch,ch1,*token="";
    int i=0,flag=0;
    int p=0;
    static int linenum=1;
    ch=fgetc(in);

    while(ch!=EOF)
    {
      i=0;
      /*跳过空格,换行符*/
      while((ch==' '||ch=='\n'||ch=='\t')&&ch!=EOF)
     {
       if(ch=='\n')
       linenum++;

       ch=fgetc(in);
     }

     /*记号以字母开头*/
     if(isalpha(ch))
     {
       while(isalpha(ch)||isdigit(ch))
      {
         token[i++]=ch;
         ch=fgetc(in);
      }

       token[i]='\0';

       flag=isReserver(token);

       /*识别标识符*/
       if(flag==1)
       {
          fprintf(out,"(标识符\t\t,    %-15s)\n",token);
          p=lookup(token);

          if(p==0)
          p=insert(token,ID);
          ungetc(ch,in);
          return symtable[p].token;
       }

       /*识别保留字*/
      else if(flag>33)
      {
         fprintf(out,"(保留字\t\t,    %-15s)\n",token);
         ungetc(ch,in);
         return flag;
      }

       continue;
    }

    /*识别#号*/
    else if(ch=='#')
   {
     token[i++]=ch;
     ch=fgetc(in);
     while(ch!='\n')
     {
       token[i++]=ch;
       ch=fgetc(in);
     }

     token[i++]='\0';
     fprintf(out,"(引用文件\t,    %-15s)\n",token);
     return J;
   }


    /*识别小括号*/
   else if(ch=='(')
   {
     token[i++]=ch;
     ch=fgetc(in);
     token[i]='\0';
     fprintf(out,"(左括号   \t,    %-15s)\n",token);
     ungetc(ch,in);
     return LB;
   }

   else if(ch==')')
   {
     token[i++]=ch;
     ch=fgetc(in);
     token[i]='\0';
     fprintf(out,"(右括号   \t,    %-15s)\n",token);
     ungetc(ch,in);
     return RB;
   }

   /*识别分号*/
   else if(ch==';')
   {
     token[i++]=ch;
     ch=fgetc(in);
     token[i]='\0';
     fprintf(out,"(分号   \t,    %-15s)\n",token);
     ungetc(ch,in);
     return AFF;
   }
   /*识别打括号*/
   else if(ch=='{')
   {
     token[i++]=ch;
     ch=fgetc(in);
     token[i]='\0';
     fprintf(out,"(左大括号\t,    %-15s)\n",token);
     ungetc(ch,in);
     return LXB;
   }

   else if(ch=='}')
   {
     token[i++]=ch;
     ch=fgetc(in);
     token[i]='\0';
     fprintf(out,"(右大括号\t,    %-15s)\n",token);
     ungetc(ch,in);
     return RXB;
   }
   /*识别以+开头的记号*/
   else if(ch=='+')
   { 
     token[i++]=ch;
     ch=fgetc(in);
     if(ch=='+')
    {
      token[i++]=ch;
      token[i]='\0';
      fprintf(out,"(自加运算符\t,    %-15s)\n",token);
      ch=fgetc(in);
      ungetc(ch,in);
      return SADD;
    }
    token[i]='\0';
    fprintf(out,"(加号\t\t,    %-15s)\n",token);
    ungetc(ch,in);
    return ADD;
   }

   /*识别数字*/
   else if(isdigit(ch))
   {
     while(isdigit(ch))
    {
      token[i++]=ch;
      ch=fgetc(in);
    }
   /*出现非数字且为点时*/   
   if (ch=='.')
    {
      token[i++]=ch;
      ch=fgetc(in);
      if (isdigit(ch))
     {
       while(isdigit(ch))
      {
        token[i++]=ch;
        ch=fgetc(in);
      }
      if((ch=='e')||(ch=='E'))
      {   token[i++]=ch;
          ch=fgetc(in);
          if((ch=='+')||(ch=='-'))
         {
           token[i++]=ch;
           ch=fgetc(in);
           if(isdigit(ch))
          {
             while(isdigit(ch))
            {
              token[i++]=ch;
              ch=fgetc(in);
            }
          }
        }
          else
          {
            while(isdigit(ch)||isalpha(ch)||ch=='.')
            {
              token[i++]=ch;
              ch=fgetc(in);
            }
            token[i]='\0';
            fprintf(out,"(无法识别单词\t,    %-15s)\n",token);
            fprintf(out,"\nERROR line %d\n",linenum);
            errno++;
            ungetc(ch,in);
            return FLOAT;
         }
      }
      /*识别实数*/

      token[i]='\0';
      fprintf(out,"(实数\t\t,    %-15s)\n",token);
      ungetc(ch,in);

      return FLOAT;

     }
    }
   
   /*如果是字符,则判断为标识错误*/
    else if(isalpha(ch))
    {

      while(isdigit(ch)||isalpha(ch)||ch=='.')
     {
       token[i++]=ch;
       ch=fgetc(in);
     }
     token[i]='\0';
     fprintf(out,"(无法识别单词\t,     %-15s)\n",token);
     fprintf(out,"\nERROR line %d\n",linenum);
     errno++;
     ungetc(ch,in);
     return FLOAT;


    }
   /*如果是单词段结束符时,则为常数*/
    else
    {
       token[i]='\0';
       fprintf(out,"(整数\t\t,    %-15s)\n",token);
       ungetc(ch,in);

      return INT;
    }
   }

   /*识别中括号*/
   else if(ch=='[')
   {
     token[i++]=ch;
     ch=fgetc(in);
     token[i]='\0';
     fprintf(out,"(左中括号\t,    %-11s    )\n",token);
     ungetc(ch,in);
     return LMB;
   }

   else if(ch==']')
   {
     token[i++]=ch;
     ch=fgetc(in);
     token[i]='\0';
     fprintf(out,"(右中括号\t,    %-11s    )\n",token);
     ungetc(ch,in);
     return RMB;
   }
   /*识别以-开头的记号*/
   else if(ch=='-')
   {
     token[i++]=ch;
     ch=fgetc(in);
     if(ch=='-')
    {
      token[i++]=ch;
      token[i]='\0';
      fprintf(out,"(自减运算符\t,    %-15s)\n",token);
      ch=fgetc(in);
      ungetc(ch,in);
     return SSUB;
    }
    /*识别以>开头的记号*/
    else if (ch=='>')
    {
      token[i++]=ch;
      token[i]='\0';
      fprintf(out,"(指针  \t,    %-15s)\n",token);
      ch=fgetc(in);
      ungetc(ch,in);
     return POINT;
    }
    token[i]='\0';
    fprintf(out,"(负(减)号\t,    %-15s)\n",token);
    ungetc(ch,in);
    return SUB;
   }

    /*识别.*/
   else if(ch=='.')
   {
     token[i++]=ch;
     ch=fgetc(in);
     token[i]='\0';
     fprintf(out,"(点运算符\t,    %-15s)\n",token);
     ungetc(ch,in);
     return SPT;
   }
   /*识别以&开头的记号*/
   else if(ch=='&')
   {
     token[i++]=ch;
     ch=fgetc(in);
     if(ch=='&')
     {
       token[i]=ch;
       token[i]='\0';
       fprintf(out,"(逻辑与  \t,    %-15s)\n",token);
       return AND;
     }
     token[i]='\0';
     fprintf(out,"(地址运算符\t,    %-15s)\n",token);
     ungetc(ch,in);
     return ADRSS;
   }
   /*识别以|开头的记号*/
   else if(ch=='|')
   {
     token[i++]=ch;
     ch=fgetc(in);
     if(ch=='|')
    {
       token[i]=ch;
       token[i]='\0';
       fprintf(out,"(逻辑或  \t,    %-15s)\n",token);
       return OR;
    }
   }
   /*识别以!开头的记号*/
   else if(ch=='!')
   {
     token[i++]=ch;
     ch=fgetc(in);
     if(ch=='=')
     {
       token[i++]=ch;
       token[i]='\0';
       fprintf(out,"(不等于  \t,    %-15s)\n",token);
       ch=fgetc(in);
       ungetc(ch,in);
       return NOT;
    }
    token[i]='\0';
    fprintf(out,"(取反运算符\t,    %-15s)\n",token);
    ungetc(ch,in);
    return INV;
   }

   /* 识别* */
   else if(ch=='*')
   {
     token[i++]=ch;
     ch=fgetc(in);
     token[i]='\0';
     fprintf(out,"(乘号   \t,    %-15s)\n",token);
     ungetc(ch,in);
     return MUL;
   }
   /*识别%*/
   else if(ch=='%')
   {
     token[i++]=ch;
     ch=fgetc(in);
     token[i]='\0';
     fprintf(out,"(求余   \t,    %-15s)\n",token);
     ungetc(ch,in);
     return PER;
   }
   /*识别注释及除号*/
   else if(ch=='/')
   {
     token[i++]=ch;
     ch=fgetc(in);
     if(ch=='*')
     {
        ch=fgetc(in);

        while(1)
        {

           if(ch=='*')
           {
            ch=fgetc(in);
            if(ch=='/')
            {
               break;
            }


           }
           else if(ch=='\n')
           {linenum++; ch=fgetc(in);}
           else
           ch=fgetc(in);

        }
        ch=fgetc(in);

        continue;
    }
    else
    {
        token[i]='\0';
        fprintf(out,"(除号   \t,    %-15s)\n",token);

        ungetc(ch,in);
        return DIV;
    }
   }  
   /*识别比较符*/
   else if(ch=='<')
   {
     token[i++]=ch;
     ch=fgetc(in);
     if(ch=='=')
     {
       token[i++]=ch;
       token[i]='\0';
       fprintf(out,"(小于等于\t,    %-15s)\n",token);
       ch=fgetc(in);
       ungetc(ch,in);
       return LE;
    }
    token[i]='\0';
    fprintf(out,"(小于   \t,    %-15s)\n",token);
    ungetc(ch,in);
    return NE;
   }

   else if(ch=='>')
   {
     token[i++]=ch;
     ch=fgetc(in);
     if(ch=='=')
     {
       token[i++]=ch;
       token[i]='\0';
       fprintf(out,"(大于等于\t,    %-15s)\n",token);
       ch=fgetc(in);
       ungetc(ch,in);
       return LT;
     }
     token[i]='\0';
     fprintf(out,"(大于   \t,    %-15s)\n",token);
     ungetc(ch,in);
     return NT;
   }

   else if(ch=='=')
   {
      token[i++]=ch;
      ch=fgetc(in);
      if(ch=='=')
     {
        token[i++]=ch;
        token[i]='\0';
        fprintf(out,"(等于   \t,    %-15s)\n",token);
        ch=fgetc(in);
        ungetc(ch,in);
        return EQ;
     }
     token[i]='\0';
     fprintf(out,"(赋值   \t,    %-15s)\n",token);
     ungetc(ch,in);
     return EV;
   }

   else if(ch==',')
   {
     token[i++]=ch;
     ch=fgetc(in);
     token[i]='\0';
     fprintf(out,"(逗号   \t,    %-15s)\n",token);
     ungetc(ch,in);
     return COA;
   }
   /*识别字符串*/
   else if(ch=='"')
   { 
     token[i++]=ch;
     ch=fgetc(in);
     while(ch!='"'&&ch!=EOF)
    {
      token[i++]=ch;
      ch=fgetc(in);
    }
    if(ch=='"')
    {
     token[i++]=ch;
     ch=fgetc(in);
     token[i]='\0';

     fprintf(out,"(字符串\t\t,    %-15s)\n",token);
     ungetc(ch,in);

     return STR;
    }
    else
    {
      while(ch!=EOF)
     {
       token[i++]=ch;
       ch=fgetc(in);
     }
    token[i]='\0';
    fprintf(out,"(缺少\"\t,    %-15s)\n",token);
    fprintf(out,"\nERROR line %d\n",linenum);
    errno++;
    ungetc(ch,in);

    return STR;
    }
   }
   else if(ch==EOF)
   {
   return END;
   }
   else
   {

    ch=fgetc(in);
    continue;
   }
   }
}

main()
{
 FILE *in,*out;
 char fname[20];
 char result[5][10]={"r1.txt","r2.txt","r3.txt","r4,txt","r5.txt"};
 char yn;
 int i,j=0;

 init();

 do
 {
 i=0;
 errno=0;
 printf("\nPlease enter the file name you want to compile: ");
 scanf("%s",fname);

 if((in=fopen(fname,"rt"))==NULL)
    {
      printf("File can't be opened!\n");
      exit(0);
    }

 if((out=fopen(result[j],"wt"))==NULL)
    {
     printf("File can't be opened!\n");
     exit(0);
    }

 fprintf(out,"\n%s%s\n","    This is compiler for: ",fname);
 fprintf(out,"*****************************************\n\n\n");
 fprintf(out,"%8s\t %-15s)\n","(  属性  ","    值\t\t");

 while(i!=-1)
 {

   i=analyse(in,out);

 }
 fprintf(out,"\n---------------------------------------");
 if(errno==0)
   fprintf(out,"\nSuccess!");
 else
  fprintf(out,"\nThere are %d errors!!",errno);
 fprintf(out,"\n---------------------------------------");

 printf("The result is in the %s!",result[j]);
 printf("\nDo you want to continue?(y/n) ");
 yn=getch();
 j++;
 }while(yn=='y'||yn=='Y');

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -