📄 parse.cpp

📁 原理同简单的编译器分析
💻 CPP
字号:
// token.cpp: implementation of the phase class.
//
//////////////////////////////////////////////////////////////////////

#include "csascii.h"
#include <ctype.h>
#include <stdlib.h> //include memset
#include "parse.h"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////


static ifstream* tokenstream;

//某一次存入的字符长度
static int length_;

//行数
static int line_number = 1;


//
static int if_counter = 0;
static phase program_section = preamble;
static int end_of_file = 0;


parse_array::parse_array( ifstream& input )
{
	parse_stream = &input;
	token_count = 0;
	p_section = program_section;

	build_array();
}



//建立一个对象数组
//同时调用token构造函数以初始化
void parse_array::build_array()
{
	token* tk = new token( *parse_stream );
    
	//'@main' or '@conclusion' or '@end' appears in the file "argv[1]" 
	if ( !end_of_file && tk->token_type() != phase_change )
	{
		token_count++;
		build_array();
	}

	else
	{
		tokenarray = new token* [( end_ = token_count )];

		//函数名: memset  
        //功  能: 设置s中的所有字节为ch, s数组的大小由n给定  
        //用  法: void *memset(void *s, char ch, unsigned n); 
		//以 0  初始化tokenarray对象
		memset( tokenarray, 0, end_ * sizeof( token* ) );

		if ( token_count )
		{
			token_count--;
		}
		return ;
	}

	tokenarray[token_count--] = tk;
}



parse_array::~parse_array()
{
	for ( int i = 0; i < end_; i++ )
	{
		delete tokenarray[i];
	}
	
	delete tokenarray;

}



ostream& operator << ( ostream& s, token& t )
{
	switch ( t.ttype )
	{
	//if type of the union is a string we will print them
	case string__:
		s << ( char* )t.literal;
		break;

	//if type of the union is a number we will print the number
	case fieldnumber:
		s << " fieldnumber: " << t.fieldnum << endl;
		break;

    default:
		cout << "break;:" << endl;
		break;
	}
	
	return s;

}


token& parse_array::operator []( int index )
{
	if ( index >= end_ || tokenarray[index] == 0 )
	{
		cerr << "parse_array error: index " << index
			 << "out of bounds\n";
		exit( 1 );
	}
	return *tokenarray[index];
}


token::token( ifstream& input )
{
	tokenstream = &input;
	length_ = 0;
	get_token();
}

token::~token()
{
	if ( ttype == string__ )
	{
		delete literal;
	}
}


void token::get_token()
{
	
	char c;

    //读下一个字符
	getnext( c );
	if ( c == '@' ) 
	{
		if ( length_ == 0 ) 
		{
			getnext( c );
			switch( c )
			{
			//如果是 '!' 跳过一行
			case '!':
			
				dumpline();
				get_token();
				break;

            //如果是 p 跳过一行
			case 'p':case 'P':
			
				if ( program_section != preamble )
				{
					error( "only one preamble allowed" );
				}
				dumpline();
				get_token();
                break;

			//如果是 m 跳过一行,并退出	
			case 'm': case 'M':
				dumpline();
				program_section = tmain;
				ttype = phase_change;
				return;

			
			case 'c': case 'C':
				dumpline();
				program_section = conclusion;
				ttype = phase_change;
				return;

            //如果是 e 标记文件结束, 并退出
			case 'e': case 'E':
				end_of_file++;
				ttype = fieldnumber;
				if ( if_counter )
				{
					error( "unclosed 'if' statement(s) ");
				}
				return;

			//如果是 '(' fieldnum ＝ 括号内的值, 并退出
			case '(':
				
				if ( program_section == preamble || 
					program_section == conclusion )
				{
					error( "@() not sllowed in preamble or conclusion" );
				}

				//fieldnum ＝ 括号内的值
				fieldnum = get_value( ')', "@()" );

				ttype = fieldnumber;

				return ;
			
			//如果是 '<' c ＝ 括号内的值, 并第归调用
			case '<':
				c = get_value( '>', "@<>" );
				length_++;
				get_token();
				break;

			//如果是 '?' fieldnum ＝ 括号内的值,if_counter += 1 并退出
			case '?':
				if ( program_section == preamble || 
					program_section == conclusion )
				{
					error( "@? not sllowed in preamble or conclusion" );
				}
				fieldnum = get_value( '@', "@?@" );
				ttype = if_;
				getnext( c );
				if ( c != ':' )
				{
					error( "@? must be followed by @: (then)" );
				}
				
				
				if_level = ++if_counter;
				return ;

			 //如果是 ~ 退出
			case '~' :
	
				ttype = else_;
				if_level = if_counter;
				return ;

			//如果是 . if_counter 减一 ，  退出
			case '.':
				ttype = endif_;
				if_level = if_counter--;
				if ( if_counter < 0 )
				{
					error( "incorrect nesting of if-then-else clauses" );
				}
				return;

			//如果是 @ 调用自己
			case '@':
				
				length_++;
				get_token();
				break;

			default:
				error( "'@' must be followed by:",
					"',' ,'<', '?', ':', '~', '.', 'p', 'm', 'c'or'@'" );
			}
		}

		//length_不为零
		else 
		{
			
			//即前移一个字符, 去掉'@'
			tokenstream->putback( c );

			literal = new unsigned char[length_ + 1];
			
			literal[length_--] = '\0';
			ttype = string__;
			return ; 
		}
	}

	//字符不为'@',长度加1，第归调用
	else
	{
		
		length_++;
		get_token();

	}

	//
	if ( length_ >= 0 )
	{
		literal[length_--] = c;
		//cerr << c ;
	}
}



//提示出错
void token::error( char* msg, char* msg2 )
{
	cerr << "token error on line " << line_number << ": "
		 << msg << " " << msg2 << "\n";
	exit( 1 );
}



//把<>, ()内的字符转化为整数
unsigned char token::get_value( char delimiter, char* msg )
{
	char c;
	char buf[5];
	int i = 0;
	while ( getnext( c ), c != delimiter )
	{
		//int isdigit( int )  若ch是数字( '0' - '9' )返回非0值,否则返回0
		if ( !isdigit( c ) )
		{
			error( "must use only digits inside", msg );
		}
		buf[i++] = c;
		
		
	}

	buf[i] = 0;
	//double atoi(char *nptr) 将字符串nptr转换成整数并返回这个整数
	return atoi( buf );
}


//跳过一行
void token::dumpline()
{
	char c;
	while ( getnext( c ), c != '\n' );
}

//取下一个字符
void token::getnext( char& c )
{
	if ( end_of_file )
	{
		error( "attempt to read after @end statement\n",
			"missing @conclusion ?" );
	}

	if ( ( tokenstream->get( c ) ).eof() ) 
	{
		error( "@end statement missing" );
	}

	if ( c == '\n' )
	{
		//记录行数
		line_number++;
	}

}




//putback() —— 此函数将返回最后一个所读取字符，
//同时将内置指针移动  -1个   字符。
//换言之，如果你使用get()来读取一个字符后再使用putback()，
//它将为你返回同一个字符，然而同时会将内置指针移动  -1个  字符，
//所以你再次使用get()时，它还是会为你返回同样的字符。
💿 文件大小 17 K
👤 上传用户 zyhunicom
📂 所属分类编译器/解释器
🏷️ 相关标签

#编译器 #分
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -