📄 tocken.cpp
字号:
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include <ctype.h>
#include "tocken.h"
FILE *fpin,*fpout,*fpxml;
/** symboltable是符号表的头结点,symboltail是符号表的尾结点*/
struct symbol *symboltable=NULL,*symboltail=NULL;
/**当前正在处理的行数*/
int executerow=0;
void main()
{
struct text *temp=readFile();
struct tocken *first=NULL,*end=NULL;
while(temp!=NULL) //将整个文件的tocken字组织成一个单链表
{
struct tocken *firstnode=analysisLine(temp->line,temp->rownum); //分析一行,返回这一行的tocken字组成的单链表
if(end!=NULL)
end->next=firstnode;
if(firstnode!=NULL)
{
if(first==NULL)
first=firstnode;
while(firstnode->next!=NULL)
{
firstnode=firstnode->next;
}
end=firstnode;
}
temp=temp->next;
}
first=deletenode(first);
outResult(first);
closeFile();
}
/**按行数来进行分析,返回这一行的tocken字组成的链表*/
struct tocken *analysisLine(char *line,int rownum)
{
int col=0;
char first=line[col];
int len=strlen(line);
tocken *pre=NULL,*now=NULL;
tocken *firstnode=NULL;
while(col<len)
{
tocken *now=(tocken*)malloc(sizeof(tocken));
// now->p=(symbol*)malloc(sizeof(symbol));
now->row=rownum+1;
executerow=rownum+1;
now->col=col+1;
if(isalpha(first)) col=recogid(line,now,col);
else if(isdigit(first)) col=recogdig(line,now,col);
else if(first==' '||first==','||first==';') col++;
else if(first==39) col=recogstr(line,now,col);
else if(first=='/') col=handlecom(line,now,col);
else col=recogdel(line,now,col);
now->next=NULL;
if(pre!=NULL) pre->next=now;
else firstnode=now;
pre=now;
first=line[col];
}
return firstnode;
}
/**识别标识符*/
int recogid(char *line,struct tocken *node,int col)
{
char word[IDENTIFIERMAXLENGTH];
int i=0;
word[i++]=line[col++];
for(;isdigit(line[col])||isalpha(line[col]);i++,col++)
{
word[i]=line[col];
}
if(isIlegal(line[col]))
col--;
else
{ word[i]='\0';
error(errorinfo[0],col) ; //不合法字符 undo
}
word[i]='\0';
int n=isKey(word);
if(n!=0) node->type=n;
else
{
node->type=34;
node->p=lookup(node,4,word); //4 represents id
}
return col+1;
}
/**识别注释*/
int handlecom(char *line,struct tocken *node,int col)
{
col++;
char nowchar=line[col];
if(nowchar!='*')
{ //不是注释
node->type=48;
return col;
}
int flag=-1;
char prechar=line[col++];
for(int n=strlen(line);col<n;col++)
{
nowchar=line[col];
if(prechar=='*'&&nowchar=='/')
{
flag=0; //注释结束
break;
}
prechar=nowchar;
}
if(flag==-1) error(errorinfo[1],col); //error(注释未结束) undo
return col+1;
}
/**识别常数字符串*/
int recogstr(char *line,struct tocken *node,int col)
{
char word[IDENTIFIERMAXLENGTH];
int i=0;
col++;
for(;isIlegal(line[col])&&line[col]!=39;i++,col++)
word[i]=line[col];
word[i]='\0';
if(line[col]==39)
{ //constant char
node->type=37;
node->p=lookup(node,4,word); //4 represents constant chars
}
else error(errorinfo[0],col); //error("不合法字符"); undo
return col+1;
}
/**
*识别数字字符串
*分整数和实数
*/
int recogdig(char *line,struct tocken *node,int col)
{
char word[IDENTIFIERMAXLENGTH];
int i=0;
char nowchar=line[col];
if(isdigit(nowchar))
{
word[i++]=line[col++];
nowchar=line[col];
while(isdigit(nowchar))
{
word[i++]=line[col++];
nowchar=line[col];
}
if(nowchar=='.')
{//实数
word[i++]=line[col++];
nowchar=line[col];
while(isdigit(nowchar))
{
word[i++]=line[col++];
nowchar=line[col];
}
if(!isdigit(nowchar))
{
node->type=36;
word[i]='\0';
lookup(node,1,word); //1 represents constant digital
}
}
else if(!isdigit(nowchar))
{//integer
node->type=35;
word[i]='\0';
lookup(node,1,word); //1 represents constant digital
}
}
return col;
}
/**识别界限符*/
int recogdel(char *line,struct tocken *node,int col)
{
switch(line[col])
{
case '+': node->type=43;break;
case '-': node->type=45;break;
case '*': node->type=41;break;
case ')': node->type=40;break;
case '(': node->type=39;break;
case '<':
if(line[col+1]=='=')
{
node->type=54;
col++;
}else if(line[col+1]=='>')
{
node->type=55;
col++;
}
else
node->type=53;
break;
case '>':
if(line[col+1]=='=')
{
node->type=58;
col++;
}else
node->type=57;
break;
case ':':
if(line[col+1]=='=')
{
node->type=51;
col++;
}else
node->type=52;
break;
case '.': //something unknow
if(line[col+1]=='.')
{
node->type=47;
col++;
}else
node->type=38;
break;
default:
{
if(!isIlegal(line[col])) error(errorinfo[0],col);
node->type=-1;
}
}
return col+1;
}
/**判断字符是否合法*/
int isIlegal(char c)
{
if(isalpha(c)) return 1;
else if(isdigit(c)) return 1;
else if(c=='('||c==')'||c=='['||c==']'||c=='>'||c=='<'||c=='='||c=='+'||c=='-'||c=='*'||c=='/'||c==':'||c==';'||c==' '||c=='.'||c==','||c==39||c==13||c==10)
return 1;
else return 0;
}
/**查找添加符号表*/
struct symbol *lookup(struct tocken *node,int type,char *word)
{
{
struct symbol *p=findstr(word,type);
if(p==NULL)
{
symbol *nowsymbol=(symbol *)malloc(sizeof(symbol));
nowsymbol->next=NULL;
nowsymbol->type=type;
nowsymbol->length=strlen(word);
strcpy(nowsymbol->word,word);
if(symboltable==NULL) symboltable=nowsymbol;
if(symboltail!=NULL)
symboltail->next=nowsymbol;
symboltail=nowsymbol;
node->p=nowsymbol;
return nowsymbol;
}else
{
node->p=p;
return p;
}
}
}
/**在符号表中查找是否存在相同的标识符或数字
*存在则返回指向该符号的指针
*否则返回NULL
*/
struct symbol *findstr(char *word,int type)
{
struct symbol *table=symboltable;
int flag=0;
while(table!=NULL)
{
if(table->type==type)
if(strcmp(table->word,word)==0)
return table;
table=table->next;
}
return NULL;
}
/**判断是否关键字*/
int isKey(char *s)
{
for(int i=0;i<KEYWORDSNUMBER;i++)
if(!strcmp(s,key[i])) //if equal
return i+1;
return 0;
}
/**输出错误信息*/
void error(char *s,int col)
{
char *error="出现错误,编译程序终止,错误信息:";
fprintf(fpout,"行:%d , 列:%d : %s %s\n",executerow,col,error,s);
printf("行:%d , 列:%d : %s %s\n",executerow,col,error,s);
closeFile();
exit(0);
}
/**输出结果,包获一般形式和XML形式*/
void outResult(struct tocken *firstnode)
{
struct tocken *node=firstnode;
while(node!=NULL)
{ //输出一般形式
if(node->type>=0&&node->type<34)
{
fprintf(fpout,"%10s\t(%2d,_ ) in row:%d,col:%d\n",key[node->type-1],node->type,node->row,node->col);
}else if(node->type>=34&&node->type<=37)
{
fprintf(fpout,"%10s\t(%2d,%10s) in row:%d,col:%d\n",node->p->word,node->type,node->p->word,node->row,node->col);
}else if(node->type>=38&&node->type<=60)
{
fprintf(fpout,"%10s\t(%2d,_ ) in row:%d,col:%d\n",charcode[node->type-38],node->type,node->row,node->col);
}else
{
fprintf(fpout,"maybe error in row %d,column %d. \n",node->row,node->col);
}
node=node->next;
}
//输出XML形式
node=firstnode;
fprintf(fpxml,"<?xml version=\"1.0\" encoding=\"gb2312\"?><xml>\n");
while(node!=NULL)
{
if(node->type>=0&&node->type<34)
{
fprintf(fpxml,"<tocken><keyword>%s</keyword><type>%d</type><row>%d</row><col>%d</col></tocken>\n",key[node->type-1],node->type,node->row,node->col);
}else if(node->type>=34&&node->type<=37)
{
fprintf(fpxml,"<tocken><value>%s</value><type>%d</type><row>%d</row><col>%d</col></tocken>\n",node->p->word,node->type,node->row,node->col);
}else if(node->type>=38&&node->type<=60)
{
fprintf(fpxml,"<tocken><symbol>%s</symbol><type>%d</type><row>%d</row><col>%d</col></tocken>\n",charcode[node->type-38],node->type,node->row,node->col);
}else
{
fprintf(fpout,"maybe error in row %d,column %d. \n",node->row,node->col);
}
node=node->next;
}
fprintf(fpxml,"</xml>\n");
}
/*删除多余tocken字*/
struct tocken *deletenode(struct tocken *node)
{
struct tocken *nownode=node,*pre=NULL,*first;
while(nownode!=NULL)
{
if(nownode->type<=0||nownode->type>60)
{
struct tocken *p=nownode;
nownode=nownode->next;
if(pre!=NULL)
pre->next=nownode;
free(p);
}
else
{
if(pre==NULL) first=nownode;
pre=nownode;
nownode=nownode->next;
}
}
return first;
}
/**读取文件内容*/
struct text* readFile()
{
if(openFile()==0) return NULL; //open file failure
char temp[MAXCHARPERLINE]; //temporary array
int i=0;
struct text *first=NULL,*pre=NULL,*nex=NULL;
if(fgets(temp,MAXCHARPERLINE,fpin)!=NULL) //the first line
{
first=(text *)malloc(sizeof(text));
strcpy(first->line,temp);
first->rownum=i++;
first->next=nex;
pre=first;
}
while(fgets(temp,MAXCHARPERLINE,fpin)!=NULL) //the other lines
{
nex=(text *)malloc(sizeof(text));
nex->next=NULL;
strcpy(nex->line,temp);
nex->rownum=i++;
pre->next=nex;
pre=nex;
}
return first;
}
/**打开文件*/
int openFile()
{
if((fpin=fopen("input.txt","r"))==NULL)
{
error(errorinfo[2],0);
}
if((fpout=fopen("tocken.txt","w"))==NULL)
{
error(errorinfo[3],0);
}
if((fpxml=fopen("tocken.xml","w"))==NULL)
{
error(errorinfo[3],0);
}
return 1;
}
/**关闭文件*/
void closeFile()
{
fclose(fpin);
fclose(fpout);
fclose(fpxml);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -