⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tocken.cpp

📁 C语言实现的SIMPLE语言词法分析器
💻 CPP
字号:
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include <ctype.h>

#include "tocken.h"


FILE *fpin,*fpout,*fpxml;

/** symboltable是符号表的头结点,symboltail是符号表的尾结点*/
struct symbol *symboltable=NULL,*symboltail=NULL;
/**当前正在处理的行数*/
int executerow=0;

void main()
{
    struct text *temp=readFile();
    struct tocken *first=NULL,*end=NULL;
    while(temp!=NULL)                                                  //将整个文件的tocken字组织成一个单链表
    {
	    struct tocken *firstnode=analysisLine(temp->line,temp->rownum); //分析一行,返回这一行的tocken字组成的单链表
	    if(end!=NULL)
	       end->next=firstnode;
	    if(firstnode!=NULL)
	    {
            if(first==NULL)
	            first=firstnode;
	        while(firstnode->next!=NULL)
	        {
		       firstnode=firstnode->next;
	         }
	        end=firstnode;
        }
	    temp=temp->next;
    }
    first=deletenode(first);
    outResult(first);
    closeFile();
}

/**按行数来进行分析,返回这一行的tocken字组成的链表*/
struct tocken *analysisLine(char *line,int rownum)
{
    int col=0;
    char first=line[col];
    int len=strlen(line);
    tocken *pre=NULL,*now=NULL;
    tocken *firstnode=NULL;
    while(col<len)
    {
       tocken *now=(tocken*)malloc(sizeof(tocken));
    //   now->p=(symbol*)malloc(sizeof(symbol));
       now->row=rownum+1;
       executerow=rownum+1;
       now->col=col+1;

       if(isalpha(first))                           col=recogid(line,now,col);
       else if(isdigit(first))                      col=recogdig(line,now,col);
       else if(first==' '||first==','||first==';')  col++;
       else if(first==39)                           col=recogstr(line,now,col);
       else if(first=='/')                          col=handlecom(line,now,col);
       else                                         col=recogdel(line,now,col);

       now->next=NULL;
       if(pre!=NULL)     pre->next=now;
       else              firstnode=now;
       pre=now;

       first=line[col];
    }
    return firstnode;
}

/**识别标识符*/
int recogid(char *line,struct tocken *node,int col)
{

    char word[IDENTIFIERMAXLENGTH];
    int i=0;
    word[i++]=line[col++];
    for(;isdigit(line[col])||isalpha(line[col]);i++,col++)
    {
       word[i]=line[col];
    }
    if(isIlegal(line[col]))
	   col--;
    else
    {    word[i]='\0';
         error(errorinfo[0],col)  ;            //不合法字符   undo
    }

    word[i]='\0';
    int n=isKey(word);
    if(n!=0)   node->type=n;
    else
    {
	   node->type=34;
	   node->p=lookup(node,4,word);          //4 represents id
    }
    return col+1;
}

/**识别注释*/
int handlecom(char *line,struct tocken *node,int col)
{
    col++;
    char nowchar=line[col];
    if(nowchar!='*')
    { //不是注释
	   node->type=48;
	   return col;
    }
    int flag=-1;
    char prechar=line[col++];
    for(int n=strlen(line);col<n;col++)
    {
        nowchar=line[col];
    	if(prechar=='*'&&nowchar=='/')
    	{
            flag=0; //注释结束
            break;
        }
        prechar=nowchar;
    }
    if(flag==-1)  error(errorinfo[1],col);                //error(注释未结束)  undo
    return col+1;
}

/**识别常数字符串*/
int recogstr(char *line,struct tocken *node,int col)
{
    char word[IDENTIFIERMAXLENGTH];
    int i=0;
    col++;
    for(;isIlegal(line[col])&&line[col]!=39;i++,col++)
          word[i]=line[col];
    word[i]='\0';

    if(line[col]==39)
    { //constant char
        node->type=37;
        node->p=lookup(node,4,word);            //4 represents constant chars
    }
    else       error(errorinfo[0],col);          //error("不合法字符");   undo

    return col+1;
}
/**
  *识别数字字符串
  *分整数和实数
  */
int recogdig(char *line,struct tocken *node,int col)
{
    char word[IDENTIFIERMAXLENGTH];
    int i=0;
    char nowchar=line[col];
    if(isdigit(nowchar))
    {
        word[i++]=line[col++];
	    nowchar=line[col];
	    while(isdigit(nowchar))
	    {
	       word[i++]=line[col++];
           nowchar=line[col];
	    }
	   if(nowchar=='.')
	   {//实数
	       word[i++]=line[col++];
	       nowchar=line[col];
	       while(isdigit(nowchar))
	       {
                word[i++]=line[col++];
                nowchar=line[col];
	       }
	       if(!isdigit(nowchar))
	       {
	           node->type=36;
	           word[i]='\0';
               lookup(node,1,word);    //1 represents constant digital
	       }
	   }
	   else if(!isdigit(nowchar))
	   {//integer
	       node->type=35;
	       word[i]='\0';
           lookup(node,1,word);       //1 represents constant digital
	   }
    }
    return col;
}

/**识别界限符*/
int recogdel(char *line,struct tocken *node,int col)
{
    switch(line[col])
    {
	case '+':  node->type=43;break;
	case '-':  node->type=45;break;
	case '*':  node->type=41;break;
	case ')':  node->type=40;break;
	case '(':  node->type=39;break;
	case '<':
		   if(line[col+1]=='=')
		   {
		      node->type=54;
		      col++;
		   }else if(line[col+1]=='>')
		   {
		      node->type=55;
		      col++;
		   }
		   else
		      node->type=53;
		   break;
	case '>':
		   if(line[col+1]=='=')
		   {
		      node->type=58;
		      col++;
		   }else
		      node->type=57;
		   break;
	case ':':
		   if(line[col+1]=='=')
		   {
		      node->type=51;
		      col++;
		   }else
		      node->type=52;
		   break;
	case '.':    //something unknow
		   if(line[col+1]=='.')
		   {
		      node->type=47;
		      col++;
		   }else
		      node->type=38;
		   break;
	default:
	{
	    if(!isIlegal(line[col]))    error(errorinfo[0],col);
        node->type=-1;
	}
    }
    return col+1;
}

/**判断字符是否合法*/
int isIlegal(char c)
{
    if(isalpha(c)) return 1;
    else if(isdigit(c)) return 1;
    else if(c=='('||c==')'||c=='['||c==']'||c=='>'||c=='<'||c=='='||c=='+'||c=='-'||c=='*'||c=='/'||c==':'||c==';'||c==' '||c=='.'||c==','||c==39||c==13||c==10)
	   return 1;
    else return 0;
}

/**查找添加符号表*/
struct symbol *lookup(struct tocken *node,int type,char *word)
{
    {
        struct symbol *p=findstr(word,type);
        if(p==NULL)
        {
            symbol *nowsymbol=(symbol *)malloc(sizeof(symbol));
            nowsymbol->next=NULL;
            nowsymbol->type=type;
            nowsymbol->length=strlen(word);
            strcpy(nowsymbol->word,word);
            if(symboltable==NULL)      symboltable=nowsymbol;
            if(symboltail!=NULL)
                symboltail->next=nowsymbol;
             symboltail=nowsymbol;

             node->p=nowsymbol;
             return nowsymbol;
        }else
        {
            node->p=p;
            return p;
        }
    }
}

/**在符号表中查找是否存在相同的标识符或数字
  *存在则返回指向该符号的指针
  *否则返回NULL
  */
struct symbol *findstr(char *word,int type)
{
    struct symbol *table=symboltable;
    int flag=0;
    while(table!=NULL)
    {
        if(table->type==type)
            if(strcmp(table->word,word)==0)
                return table;
        table=table->next;
    }
    return NULL;
}

/**判断是否关键字*/
int isKey(char *s)
{
   for(int i=0;i<KEYWORDSNUMBER;i++)
      if(!strcmp(s,key[i]))           //if equal
	       return i+1;
   return 0;
}

/**输出错误信息*/
void error(char *s,int col)
{
       char *error="出现错误,编译程序终止,错误信息:";
       fprintf(fpout,"行:%d , 列:%d : %s %s\n",executerow,col,error,s);
       printf("行:%d , 列:%d : %s %s\n",executerow,col,error,s);
       closeFile();
       exit(0);
}

/**输出结果,包获一般形式和XML形式*/
void outResult(struct tocken *firstnode)
{
    struct tocken *node=firstnode;
    while(node!=NULL)
    {     //输出一般形式
        if(node->type>=0&&node->type<34)
        {
             fprintf(fpout,"%10s\t(%2d,_         ) in row:%d,col:%d\n",key[node->type-1],node->type,node->row,node->col);
        }else if(node->type>=34&&node->type<=37)
        {
             fprintf(fpout,"%10s\t(%2d,%10s) in row:%d,col:%d\n",node->p->word,node->type,node->p->word,node->row,node->col);
        }else if(node->type>=38&&node->type<=60)
        {
             fprintf(fpout,"%10s\t(%2d,_         ) in row:%d,col:%d\n",charcode[node->type-38],node->type,node->row,node->col);
        }else
        {
             fprintf(fpout,"maybe error in row %d,column %d. \n",node->row,node->col);
        }
	    node=node->next;
    }

    //输出XML形式
    node=firstnode;
    fprintf(fpxml,"<?xml version=\"1.0\" encoding=\"gb2312\"?><xml>\n");
    while(node!=NULL)
    {
        if(node->type>=0&&node->type<34)
        {
             fprintf(fpxml,"<tocken><keyword>%s</keyword><type>%d</type><row>%d</row><col>%d</col></tocken>\n",key[node->type-1],node->type,node->row,node->col);
        }else if(node->type>=34&&node->type<=37)
        {
             fprintf(fpxml,"<tocken><value>%s</value><type>%d</type><row>%d</row><col>%d</col></tocken>\n",node->p->word,node->type,node->row,node->col);
        }else if(node->type>=38&&node->type<=60)
        {
             fprintf(fpxml,"<tocken><symbol>%s</symbol><type>%d</type><row>%d</row><col>%d</col></tocken>\n",charcode[node->type-38],node->type,node->row,node->col);
        }else
        {
             fprintf(fpout,"maybe error in row %d,column %d. \n",node->row,node->col);
        }
	    node=node->next;
    }
    fprintf(fpxml,"</xml>\n");
}

/*删除多余tocken字*/
struct tocken *deletenode(struct tocken *node)
{
    struct tocken *nownode=node,*pre=NULL,*first;
    while(nownode!=NULL)
    {
        if(nownode->type<=0||nownode->type>60)
        {
            struct tocken *p=nownode;
            nownode=nownode->next;
            if(pre!=NULL)
                pre->next=nownode;
            
            free(p);
        }
        else
        {
            if(pre==NULL)   first=nownode;
            pre=nownode;
            nownode=nownode->next;
        }
    }
    return first;
}

/**读取文件内容*/
struct text* readFile()
{
    if(openFile()==0) return NULL;       //open file failure

    char temp[MAXCHARPERLINE];           //temporary array
    int i=0;

    struct text *first=NULL,*pre=NULL,*nex=NULL;
    if(fgets(temp,MAXCHARPERLINE,fpin)!=NULL)       //the first line
    {
	   first=(text *)malloc(sizeof(text));
	   strcpy(first->line,temp);
	   first->rownum=i++;
	   first->next=nex;
	   pre=first;
    }

    while(fgets(temp,MAXCHARPERLINE,fpin)!=NULL)       //the other lines
    {
    	nex=(text *)malloc(sizeof(text));
    	nex->next=NULL;
    	strcpy(nex->line,temp);
    	nex->rownum=i++;
    	pre->next=nex;
    	pre=nex;
    }
    return first;
}
/**打开文件*/
int openFile()
{
    if((fpin=fopen("input.txt","r"))==NULL)
    {
	   error(errorinfo[2],0);
    }
    if((fpout=fopen("tocken.txt","w"))==NULL)
    {
	   error(errorinfo[3],0);
    }
    if((fpxml=fopen("tocken.xml","w"))==NULL)
    {
	   error(errorinfo[3],0);
    }
    return 1;
}
/**关闭文件*/
void closeFile()
{
    fclose(fpin);
    fclose(fpout);
    fclose(fpxml);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -