theparsingprocessofwordsandsyntaxincompilerbymyself.txt

来自「我自己写的一个C - 语言的词法分析与语法分析两个编译步骤」· 文本代码 · 共 407 行
TXT
407 行
  int main()    {
	if( ( fp = fopen("c:\\test.txt","r") )==NULL) {
	  printf("Failed to open the file ! ");     return 0;
	}	
out=fopen("test_out.txt","w");  
	convertGrammar();
	   getFirstSet();	
	   getFollowSet();
	   initParSetable();
	   createTree();	    
    fclose(fp);
    fclose(out);		
    return 0;
}
枚举类型的定义：
enum Symbol
{
	    ERROR,//0	错误
		ENDF,//1	文件结束
		ELSE,//2	else
		IF,//3		if
		INT,//4		int 整数
		RETURN,//5	返回
		VOID,//6	空型
		WHILE,//7	while循环
		DIGIT,//8	数字
		ID, //9		标识
		PLUS, //10	+
		MINUS, //11	-
		MUL, //12	*
		DIV, //13	/
		LT, //14		<
		LTEQ, //15	<=
		GT, // 16		>
		GTEQ, //17	>=	
		EQ, //18		==
		NEQ, //19	!=
		ASSIGN, //20	=
		SEMI, //21	;分号
		COMMA, //22	,逗号
		LPAREN, //23	(
		RPAREN, //24	)
		LMIDPAREN, //25	[
		RMIDPAREN, //26	]
		LMAXPAREN, //27	{
		RMAXPAREN, //28	}
		empty, //29
     //  grammar terminals and nonterminals
		program, //30
		declaration_list, //31
		declaration_list_1, //32
		declaration, //33
		declaration_1, //34
		type_specifier, //35
        var_declaration, //36
		var_declaration_1, //37
        params,//36
		params_1, //38
		params_2, //39
		param_list,//40
		param_list_1,//41
     	param,//42
		param_1,//43
		compound_stmt,//44
		local_declarations,//45
		local_declarations_1,//46
		statement_list,//47
		statement_list_1,//48		
		statement,//49
		expression_stmt,//50
		selection_stmt,//51
		selection_stmt_1,//52
		iteration_stmt,//53
		return_stmt,//54
		return_stmt_1,//55
		expression,//56
		expression_1,//57
		expression_2,//58
		expression_3,//59
		var,//60
		var_1,//61
		simple_expression,//62
		simple_expression_1,//63
        relop,//64
     	additive_expression,//65
		additive_expression_1,//66	
		addop, //67
		term, //68
		term_1, //69
		mulop, //70
		factor, //71
		factor_1, //72
		call, //73
		args, //74
		arg_list, //75
		arg_list_1, //76
		OVER, //77   $ 堆栈初始化时的栈顶元素		
 } ;


将文本文件中记录的文法转换为整数型并在内存中表示的函数实现：
   void convertGrammar()
{
	/* read grammar from text file in disc and convert it into memory */
  int rowIndex=0,columnIndex=0;
  char grammarFactor[20];
  FILE *f=fopen("c:\\grammar.txt","r");
  int i=0, typeValue=0;
  char ch;
  ch=fgetc(f);
  while( ch!= EOF ) {
      if(ch!=' ' && ch != '\n')    {
	  grammarFactor[i]=ch; 
	  i++;
	  ch=fgetc(f);
	  continue;
	 }
      grammarFactor[i+1] = '\0';
	 
	  if(ch==' ')   {
	     if( strcmp( grammarFactor, "->")==0 )    {  
	     ch=fgetc(f);   i=0;
		 continue;
		 }
        else   {
		   typeValue = convert(grammarFactor);
		   Grammar[rowIndex][columnIndex] = typeValue;
        columnIndex++;
		}
	 }    else        /*  ch='\n'  */    {
     typeValue = convert(grammarFactor);
	    Grammar[rowIndex][columnIndex]=typeValue;
     Grammar[rowIndex][columnIndex+1]=-1;
	    ch=fgetc(fp);
	   rowIndex++;
    columnIndex=0;
	   i=0;
	 }  }   }

基于转换后的文法求每个符号的first集合：
  void getFirstSet()  /* construct first set */
{
	 bool  sequel;
	 int i,j,k;
	 for( i=0; i<30; i++)  /* initial the terminal's firstst to be itself  */ 	 {
		FirstSet[i].item[0] = i;
		FirstSet[i].Length = 1;
	 for( j=1; j<30; j++ )     {
	  FirstSet[i].item[j] = -1;
		}
	}
	for( i =30; i < 86; i++ )    /*initial the nonterminal's firstst to be empty */ 	{
		for( j=0; j < 30; j++ ) {
		FirstSet[i].item[j]=-1;
		}
		FirstSet[i].Length = 0;
/***************************************************************************/
	}
	changed = true;
	 while( changed )    {
		changed=false;
	   for( i=0; i < 86; i ++){
			sequel = true;    k = 1 ;  
   while( sequel && ( Grammar[i][k] != -1  ) )   {
	       setMerge( &FirstSet[ Grammar[i][0] ], &FirstSet[ Grammar[i][k] ], false );
			 if( ! Index( &FirstSet[ Grammar[i][k] ], empty ) )   {
					  sequel = false;
				       k ++;
					}
		   }
		 if( sequel == true )   /* add empty into the first set */
		      setMerge ( &FirstSet[ Grammar[i][0] ], &EP, true );  
		}
	}
	}
对文法中的非终结符求fellow集合的源代码：
       void getFollowSet()
{
	int num=0;
	int i,j,k;
	/* Initialize all the follow set of nonterminals to be empty */  
	 /*  48 nonterminal exist in the grammar   */
	for( i=0 ; i < 48; i++)      {
		for( j=0; j < 30; j++)    {
			FollowSet[i].item[j]=-1;
		}
		FollowSet[i].Length=0;
	}
	/* '$' is add into the follow set of the 'program' nonterminal */ 
	   FollowSet[0].item[0] = OVER ;
	   FollowSet[0].Length ++ ;
	/*****************************************************************/
	 bool isEmpty;     /* indicating whether empty is existed in the first set */
	 changed = true;
	 while( changed )  	 {
		num ++;
		changed = false;
		for( i=0; i < 86; i++)    {           
			if( Grammar[i][2] == -1 )   {
				if( Grammar[i][1] < 30
 )  /* if the right side of a production is only one terminal , ignore it */
			    continue;
 setMerge(  &FollowSet[ Grammar[i][1]-30 ], &FollowSet[ Grammar[i][0]-30 ], false);
			}      
    /* if the right side of a production is only a nonterminal, add the follow set of the left part */
 	/* to the follow set of the right nonterminal . */
			else        {
				  k = 1;
				while( Grammar[i][k+1] != -1 )    {
				 	if( Grammar[i][k] < 30 ) 
   /*  with more than one elements on the right side of the production,*/
						{                       /* terminal is ignored */
						   k++；    continue;
						}
					isEmpty = true;
					for( j=k+1; isEmpty && Grammar[i][j] != -1; j++ )    {
						if( !Index( &FirstSet[ Grammar[i][j] ], empty ) )
							  isEmpty = false;                    
        /* empty is not exist in the first set */
				setMerge( &FollowSet[ Grammar[i][k]-30 ], &FirstSet[ Grammar[i][j] ], false);				         /* add first(Xi+1Xi+2....Xn)- empty to follow(Xi) */ 
					 }
	if(  isEmpty )    /* if empty is in first(Xi+1Xi+2...Xn)  then add follow(A) to Follow(Xi)*/
			{
			setMerge(&FollowSet[ Grammar[i][k]-30 ], &FollowSet[ Grammar[i][0]-30 ], false );
				}
			 k++;
				}	  
      if( Grammar[i][k] >= 30 )   /*  The last elemrnt of a production is a nonterminal  */
			      /* add the follow set of the left side to the follow set of the last nonterminal */
		  setMerge(  &FollowSet[ Grammar[i][k]-30 ], &FollowSet[ Grammar[i][0]-30 ] , false );	    	}				
		}
	}
}
在first集合与follow集合的基础上构建LL(1)文法的分析表代码：
   void initParseTable()
{
   // int table[48][28];
   int i,j,k, temp[50];
   bool isEmpty = false;
      for (i=0 ; i<48; i++ )
	for (j=0; j<28; j++)
			 table[i][j] = -1;
    for (i=0; i<50; i++)
	    temp[i] = -1;
///////////////////////////////////////////////////////////////////////
   for (i=0; i<86; i++)    {
      for (j=1; Grammar[i][j] != -1; j++)   {
		    if ( Grammar[i][j] < 30 )
			{         /* add A-> ... to the entry M[A,a] */
               if (table[ Grammar[i][0] - 30 ][ Grammar[i][j] -2] == -1)
			        table[ Grammar[i][0] - 30 ][ Grammar[i][j] -2] = i;
               else 
                   printf(" ambuguity occurs! \n");
			}    
			else  {        //Grammar[i][j]>=30 means nonterminals
			   for( k=0; k < FirstSet[ Grammar[i][j] ].Length; k++)
			           if ( FirstSet[ Grammar[i][j]].item[k] == empty ) 
			                    isEmpty = true ;
			      if ( isEmpty == false )    {
                     for ( k=0; k < FirstSet[ Grammar[i][j]].Length ; k++)
					 {
						 if ( table[ Grammar[i][0]-30 ][ Grammar[i][j] -2 ]  == -1)
							   table[ Grammar[i][0]-30 ][ Grammar[i][j] -2 ] = i;
						 else
							 printf("Ambuguity occurs!\n");
					 }
				  }
 	 			 else    {                   // isEmpty==true    
				 for ( k=0; k < FirstSet[ Grammar[i][j]].Length ; k++)   {
					 if ( table[ Grammar[i][0]-30 ][ Grammar[i][j] -2 ]  == -1)
							   table[ Grammar[i][0]-30 ][ Grammar[i][j] -2 ] = i;
						 else
							 printf("Ambuguity occurs!\n");
     /* if empty is in first(...), for each element 'a' of follow(A)(a token or $). add A->.. to M[A,a] */                   
                  for ( k=0; k < FollowSet[ Grammar[i][j]].Length; k++ )
                      if (table[Grammar[i][0] -30][ Grammar[i][j] -2] == -1)
							table[Grammar[i][0] -30][ Grammar[i][j] -2] = i;                          
			 }
	     }
    }
}
}
文法分析和构造分析树的源程序：
bool  createTree()
{
     int stack[200], production[10];
	 int *bottom, *top , i = 0, j=0, next, location, length;
     TreeNode *current, *fresh;
	 bottom = top= stack; 
	 TreeNode *Trace[20];   /* record the location of generated nonterminal nodes */
	 int tracePointer=0;
	 stack[0]= OVER;
	 top++;
	 *top = program;
     for ( i=0 ; i<10; i++)
			 production[i]=-1;
	 head = new TreeNode;
     current = head;
	 initTreeNode(current);
	 Trace[tracePointer]=current;
	 tracePointer ++;
	 strcpy( head->stringValue, getName(*top));   
     
	 while ( next=getToken( token ) != ENDF)
	 {
       if ( *top >=30)   {
	   	location = table[*top - 30][next];     /* look up the parse table */
			if (location == -1)    {
			    printf("Syntax Error!\n");
				return false;
			}    
for ( i = 0; Grammar[location][i+1] != -1; i++ )  /* count the right part's length of production */
			length++;
   		    current = Trace[tracePointer]; 
		    tracePointer--;
			current->childNum = length;
while( length != 0)     /* push the production into stack and generate the son node of current nonterminal */
			 {                     
                 top++;
                *top = Grammar[location][length];
                 fresh = new TreeNode;
				  initTreeNode(fresh);
                 current->child[j] = fresh;        
                 strcpy( fresh->stringValue, getName( Grammar[location][length] ) );
			      current->typeValue = Grammar[location][i];
                  if ( *top >= 30 )
				  {
                   Trace[tracePointer] = fresh;
				    tracePointer++;
				  }
				 length--;
			  }
		}
	    if( *top < 30 )  {
		   if ( *top == next )  { /*  match  */
		     if ( *top==OVER )    {
			    printf("Accept!\n");
				 return true;
			 }
			 else {
			  printf(" match !\n" );
             top--;
			 }
		   }
       else { 
	    printf("Error at line %d!\n",line);
         return false;
	      }
	   } 
	 }
	return true;
}             
对每个新生成的结点的初始化函数：
void initTreeNode(TreeNode *p)
{
  int i;
  for (i=0; i<10; i++)
	  p->child[i]=NULL;
  p->childNum=0;
  p->typeValue=-1;
  i=0;
  while(i<20)
  {
      p->stringValue[1]='\0';
      i++;  
  }
}
在生成集合的过程中用到的辅助函数的定义为：
bool Index (Set *pointer,int target)    /* query target in the pointer set  */  
{                             /* if target is existed in the set then return true. */
	int i=0;
	while(i < pointer->Length)
	{
		if(pointer->item[i] == target)
			 return true;
		 i++;
	}
	return false;
}

void addItem(Set *pointer,int key)  /*  add key into pointed set  */
 {	
	 int i;
	 i= pointer->Length;
	 pointer->item[i] = key;
	 i++;
	 pointer->Length = i;
    changed = true;
 }
  void setMerge( Set * destinationSet, Set * sourceSet, bool type )    
{
	  int i = 0;                    /* Elements in source set are all add into destination set,*/
	 while( i < sourceSet -> Length )                       /* whether empty element shall be added is determined by type value.*/
	 {                  /* if type equals to 'false', empty is not allowed to add into.*/
         if( sourceSet -> item[i] == empty && type ==false) /* type equals to 'true', empty is allowed to add into.  */   
		 {
		    i++;
		    continue;
		 }
        if( ! Index( destinationSet , sourceSet -> item[i] ) )
		 {
		    addItem( destinationSet, sourceSet -> item[i] );	
			i++;		
		 }
	 }
}
theparsingprocessofwordsandsyntaxincompilerbymyself.txt - 源码说明

本页面展示了「我自己写的一个C - 语言的词法分析与语法分析两个编译步骤」中的 theparsingprocessofwordsandsyntaxincompilerbymyself.txt 源码文件，采用文本编程语言编写，共 407 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与C语言相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?