📄 word_analysis.c

📁 一个C语言的词法分析器
💻 C
字号:
/*++
	module name: word_analysis.c  
	kernel module of linear scan
--*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>  /* strtol(,,)  */
#include "symbol_table.h"
#include "error_.h"

/* the maximum file size */
#define	 MAX_FILE_SIZE	(1024*100)
/**********************************************************/
char		*p_char  = NULL;
int			pre_type = TYPE_UNDEF;
char		str_tem[MAX_LENGTH] = "";
char		file_buffer[MAX_FILE_SIZE] = ""; 
/**********************************************************/
/* copy certain file to buffer and attach our global pointer to file buffer */
int file_to_buffer(char *filename)
{
	FILE *pfile;
	int  i  = 0;
	pfile = fopen(filename, "r" );
	if( pfile == NULL ) /* bad file name */
		return FALSE;
	do
	{
		file_buffer[i++] = fgetc(pfile );
	}while( file_buffer[i-1] != EOF );
	/* 将文件内容写入缓冲区 并写入字符串结尾符  */
	file_buffer[i-1] = '\0';
	/* we don't need pfile any more             */
	fclose(pfile );
	/* attach our char_pointer to file buffer   */
	p_char = file_buffer;
	return TRUE;
}
/*  if we've got to the end of file */
int is_end  (void)
{
	return (*p_char == EOF || *p_char == '\0') ? TRUE : FALSE;
}
/* 当前字符指针p_char自加 如果当前字符为文件结尾 返回错误 */
int pop_char(void)
{
	if( *p_char == EOF || *p_char == '\0' )
		return FALSE;
	p_char++;
	return TRUE;
}
/* need more testing... */
/* 当前字符若不属于合法Token的一部分 则将当前字符指针指向下一个合法Tolen首符 */
/* 如果到达源文件字串的结尾 返回FALSE */
int goto_token(void)
{
	do
	{
		switch( *p_char )
		{/* fgetc func cannot get '\r', it only get '\n' */
		case '\n':
			line++;  /* fall through */	
		case ' ' :  
		case '\t' :	/* 处理空格 制表符 回车 换行符 */
			if( !pop_char() )  /* 到达串结尾 */
				return FALSE;
			else               /* 继续 处理下一字符 */
				break;
		case '/'  :  /*  处理注释   */
			if( *(p_char+1) == '*' )   /* C style comment */
			{
				p_char += 2;
				while( !(*p_char == '*' && *(p_char+1) == '/') )
				{
					if(  *(p_char+1) == '\n' /* &&  *p_char == '\r' */ ) 
						line++;		/* fgetc func cannot get '\r', it only get '\n' */
					if( !pop_char() )
						return FALSE;
				}
				p_char += 2;   /* skip '*' and '/' */
				break;
			}
			else if( *(p_char+1) == '/' ) /* C++ style comment */
			{
				p_char += 2;    /* skip "//" */
				/* fgetc func cannot get '\r', it only get '\n' */
				while( !(*p_char == '\n') )   /* stop at the end of each line */ 
					if( !pop_char() )
						return FALSE;
			/*	p_char += 2;   */ /* go to next line  */
				pop_char();		  /* we only need to skip '\n', there's no '\r' any more */
				line++;
				break;
			}
			else			/* 不是注释 退出 */
				return TRUE;
		default:     /* 当前字符属于某个Token */
			return TRUE;
		}
	}while(TRUE);
}
/**********************************************************************/
/* some functions for certain char *p_char judging  */
/* 是否为空格 */
int is_blank(void)      /* is *p_char a blank */
{
	return *p_char == ' ' ;
}
/* 是否为制表符 */
int is_tab  (void)
{
	return *p_char == '\t' ;
}
/* 是否为数字 */
int is_digit (void)
{
	if( *p_char >=48 && *p_char <=57 )    /* 0-9 */
		return TRUE;
	else 
		return FALSE;
}
/* 是否为字母 或许该将下划线作为字母 */
int is_letter(void)
{
	if( *p_char >= 65 && *p_char <= 90 )    	/* A-Z */
		return TRUE;
	else if( *p_char >= 97 && *p_char <= 122 )	/* a-z */
		return TRUE;
	else if( *p_char == '_' )
		return TRUE;
	else 
		return FALSE;
}
/* 是否为保留字 */
int is_reserve(char *str)
{
	ENTRY entry = { TYPE_UNDEF, NULL };
	entry = query_reserve_table(str );
	return entry.p_item == NULL ? FALSE : TRUE ;
}
/* 是否为界符 包括运算符 */
int is_specifier(void)    /* 界符 */
{
	char str[2] = { *p_char, '\0' };
	ENTRY entry = { TYPE_UNDEF, NULL };
	entry = query_specifier_table(str );
	return entry.p_item == NULL ? FALSE : TRUE ;
	
}
/* 判断是否为合法转义字符 从'\\'后面第一个字符开始分析 若不合法 返回FALSE		*/
/* 合法则返回TRUE，并将转义字符的值写入p_c指向的字符 将字符指针指向后一个字符   */
/* 但不检查该字符是否为 '\'' 此工作交给Deal_single来做                          */
int is_tranmean (char *p_c)
{
	char tem[4] = "";
	int  i      = 0 ;
	switch( *p_char )
	{
	case 'n':
			*p_c = '\n';
			pop_char();		/* skip to ' */
			return TRUE;
	case 't':
			*p_c = '\t';
			pop_char();		/* skip to ' */
			return TRUE;
	case 'b':
			*p_c = '\b';
			pop_char();		/* skip to ' */
			return TRUE;
	case 'r':
			*p_c = '\r';
			pop_char();		/* skip to ' */
			return TRUE;
	case 'f':
			*p_c = '\f';
			pop_char();		/* skip to ' */
			return TRUE;
	case '\\':	/* fall through */
	case '\'':
	case '\"':
			*p_c = *p_char;
			pop_char();		/* skip to ' */
			return TRUE;
	case 'x':
		{/*  \xdd  */
			pop_char();
			/* 0-9 a-f A-F */
			if( ! (is_digit() || (*p_char >= 65 && *p_char <= 70)
				|| (*p_char >= 97 && *p_char <= 102)) )
				return FALSE;
			for( i = 0; is_digit() || (*p_char >= 65 && *p_char <= 70)
				|| (*p_char >= 97 && *p_char <= 102); i++ )
			{/* 0-9 a-f A-F \xdd */
				if( i > 1 )
					return FALSE;
				tem[i] = *p_char;
				pop_char();
			}
			*p_c =(char) strtol(tem, NULL, 16 );
			return TRUE;
		}
	default:
		{	
			if( !is_digit() ) /* !(0-9) */
				return FALSE;
			/*  0-9  \ddd */
			for( i = 0; is_digit() ; i++ )
			{
				if( i > 2 )
					return FALSE;
				tem[i] = *p_char;
				pop_char();
			}
			*p_c =(char) strtol(tem, NULL, 10 );
			return TRUE;
		}/* default */
	}/* switch */
}
/****************************************************************/
/* some functions deal with certain type  */
ENTRY deal_specifier (void)    /* 处理界符 */
{
	ENTRY entry        = { TYPE_UNDEF, NULL };
	/* 假设界符和运算符最多两个字符 */
	char  str_tem[3]   = { *p_char, *(p_char+1), '\0' }; 
	
	entry = query_specifier_table(str_tem );
	if( entry.type == TYPE_UNDEF )
	{
		str_tem[1] = '\0';
		entry = query_specifier_table(str_tem );
	}
	else
		pop_char();
	pop_char();
	strcpy(str_tem, "" );  /* clear the str_tem array for next call */
	pre_type = entry.type; /* 为判断+ 和- 的意义做准备 */
	return entry;
}
/* 处理字符串常量 将其添加入字符串常量表 */
ENTRY deal_double_   (void) 	/* "		*/
{
	int i;
	ENTRY entry;
	pop_char();  /* skip the first " */
	/* stop when meet another " except \"  */
	for(i=0; !(*p_char == '\"' && *(p_char-1) != '\\'); i++ )
	{/* 可能的错误 缺少配对的 " */
		/* fgetc func cannot get '\r', it only get '\n' */
		if(	*p_char == '\\' )
		{
			pop_char();
			if( is_tranmean(str_tem +i ) )
				continue;
			else
				p_char--;	/* back to pre char  */
		}
		if( /* *p_char == '\r' && */ *(p_char+1) == '\n' )
		{
			pop_char();  /*  go to \n, next pop_char make us go to next line */
			report_error(ERROR_ILLEGAL_STR, line++, NULL );
			break;
		}
		str_tem[i] = *p_char;
		pop_char();
	}
	str_tem[i] = '\0';
	entry.p_item = add_cst_str(str_tem );
	entry.type   = TYPE_CONST_STR;
	pop_char();            /*  get out of this token  */
	strcpy(str_tem, "" );  /* clear the str_tem array for next call */
	pre_type = entry.type; /* 为判断+ 和- 的意义做准备 */
	return entry;
}	
/* 处理字符常量  */
ENTRY deal_single_   (void) 	/* '		*/
{
	ENTRY entry = { TYPE_UNDEF, NULL };
	pop_char();  /* skip the first ' */
	if( *p_char != '\\' )
	{
		str_tem[0] = *p_char;
		if( *(p_char+1) != '\'' )
		/* 出现错误 没有另一个匹配的' 或非转义字符并且含多个字符 */
		{
			report_error(ERROR_ILLEGAL_CHAR, line, NULL );
			/* 不再向后搜索'\'',只提示出错位置和类型 */
			/* 该行剩余部分不再分析 跳至下一行 */
			while( *p_char != '\n' )
				if( !pop_char() )
					break;
			line++;
		}
		else
			pop_char();
	}
	else
	{	/* 处理转义字符  */
		pop_char();		/* skip out of '\' */
		if( !is_tranmean(str_tem ) || *p_char != '\'' )
		{/* 非法转义字符 */
			report_error(ERROR_ILLEGAL_CHAR, line, NULL );
			/* 不再向后搜索'\'',只提示出错位置和类型 */
			/* 该行剩余部分不再分析 跳至下一行       */
			while( *p_char != '\n' )
				if( !pop_char() )
					break;
			line++;
		}
		/* else , do nothing...                         */
		/* is_tranmean() 已将转义字符值写入str_tem[0]   */		
	}
	entry.p_item = add_cst_char(str_tem[0] );
	entry.type   = TYPE_CONST_CHAR;
	pop_char();  /*  get out of this token  */
	strcpy(str_tem, "" );  /* clear the str_tem array for next call */
	pre_type = entry.type; /* 为判断+ 和- 的意义做准备 */
	return entry;
}	
/* 处理整形常量和实型常量 */
ENTRY deal_num  	 (void)
{	
	int i, n_tem;
	double d_tem;
	int b_dot   = FALSE;  /* 字串中有无.  */
	int b_e		= FALSE;  /* 字串中有无e 若既无.也无e则认为是整型   */
	ENTRY entry = { TYPE_UNDEF, NULL };
	for(i=0; (is_digit() || *p_char == '.' || *p_char == 'e') ; i++ )
	{/* 如果属于整型和实型的要求的字符 则进行处理 */
		if( *p_char == '.' )  
			b_dot += TRUE;
		if( *p_char == 'e' )
		{
			b_e   += TRUE;
			if( *(p_char+1) == '-' || *(p_char+1) == '+' )
			{
				str_tem[i++] = *p_char;
				str_tem[i]	 = *(p_char+1);
				p_char += 2;
				continue;
			}
		}
		str_tem[i] = *p_char;
		pop_char();
	}
	str_tem[i] = '\0';	

	if( b_dot == FALSE && b_e == FALSE )	/* 整型 */
	{
		n_tem = strtol (str_tem, NULL, 10 );
		if( pre_type == TYPE_NEGTIVE )
			n_tem = 0 - n_tem;
		entry.p_item = add_cst_num (n_tem );
		entry.type   = TYPE_CONST_NUM;
	}
	else if( b_dot > 1 || b_e >1  )
	{	/*  错误 包含非法输入e.g.  0.45...435ee543+33 要进行处理  */
		/*  为了不影响以后的语法分析 向常量表中填入一项 */
		d_tem = atof(str_tem );
		entry.p_item = add_cst_real (d_tem );
		entry.type   = TYPE_CONST_REAL;
		/*  need error handing... */
		report_error(ERROR_ILLEGAL_REAL, line, NULL );
	}
	else									/* 实型 */
	{	/*   */
		d_tem = atof(str_tem );
		if( pre_type == TYPE_NEGTIVE )
			d_tem = 0 - d_tem;
		entry.p_item = add_cst_real(d_tem );
		entry.type   = TYPE_CONST_REAL;
	}
	strcpy(str_tem, "" );  /* clear the str_tem array for next call */
	pre_type = entry.type; /* 为判断+ 和- 的意义做准备 */
	return entry;
}
/* 处理以字母开始的字串 包括保留字和标识符 */
ENTRY deal_other	 (void) 	/* reservers and identifiers */
{
	int i;
	ENTRY entry = { TYPE_UNDEF, NULL };
	for(i=0; is_digit() || is_letter(); i++ )
	{
		str_tem[i] = *p_char;
		pop_char();
	}
	str_tem[i] = '\0';
	entry = query_reserve_table(str_tem );
	if( entry.type == TYPE_UNDEF )
	{
		entry.p_item = add_identifier(str_tem );
		entry.type   = TYPE_IDENTIFIER;
	}
	strcpy(str_tem, "" );  /* clear the str_tem array for next call */
	pre_type = entry.type; /* 为判断+ 和- 的意义做准备 */
	return entry;
}
/***********************************************************************/
/* 获得一个Token的属性 当前字符指针后移 为取下一token做准备 */
ENTRY get_token(void )
{
	ENTRY entry = { TYPE_UNDEF, NULL };
	/* goto token */
	if( !goto_token() )			/* 如果到达文件结尾符 扫描应结束 */
		return entry;

	if( is_specifier() )		/* 界符和运算符 */
	{
		if( ( *p_char == '+' || *p_char == '-' ) 
			&& ( *(p_char+1) >= 48 && *(p_char+1) <= 57 )
			&& ( pre_type != TYPE_IDENTIFIER )
			&& ( pre_type != TYPE_CONST_NUM  )
			&& ( pre_type != TYPE_CONST_REAL )
			&& ( pre_type != TYPE_CONST_CHAR ) )
		{/* 当前符为+或- 且下一字符为数字，并且前一个Token类型不是标识符和常量 */
		 /* 那么当前符号是用来表示正负而不是加减的                             */
			pre_type = *p_char == '+' ? TYPE_POSITIVE : TYPE_NEGTIVE;
			pop_char();
			return deal_num();
		}
		return deal_specifier();
	}
	else if( *p_char == '\"' )	/* 字符串常量 */
	{
		return deal_double_();
	}
	else if( *p_char == '\'' )	/* 字符常量 */
	{
		return deal_single_();
	}	
	else if( is_digit() )		/*  整形常量 以后应加入实型常量处理  */
	{
		return deal_num();
	}	
	else if( is_letter() )		/*  保留字或者标识符 */
	{
		return deal_other();
	}
	else						/*	未知类型 应报错   */
	{
		report_error(ERROR_UNKNOWN_CHAR, line, p_char );
		pop_char();
		return entry;
	}
}
/**********************************end of file*************************************/
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -