⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scanner.cpp

📁 C-MINUS编译器
💻 CPP
字号:
/**:	scanner.cpp	implementation file
&
*		author:	lonelyforest;
*		data:	2006.03.16
*/

#include "scanner.h"
#include <cstdio>


// overload operator =
Token &Token::operator =(const Token &rh)
{
	type = rh.type;
	str = rh.str;

	return *this;
}

/**: construction & destruction
&
*		author:	lonelyforest;
*		data:	2006.03.16
*/
//-----------------------------------------------------------------------------
Scanner::Scanner(const string &filename):Tokenizer(filename)
{
	m_pushed = false;
	m_token.type = k_NONE;
	build_key_map(); // initial key words map
}
// .........
Scanner::~Scanner()
{

}

// push current token
//-----------------------------------------------------------------------------
void  Scanner::push()
{
	m_pushed = true;
}

//  initial keyword map
void Scanner::build_key_map()
{
	string minic_keywords[] ={"int", "else", 
		"return", "void", "if","while",	"read", "write"};
	for (int i = 0; i < 8; ++i)
	{
		Token temp;
		temp.type = tokenType(i);
		temp.str = minic_keywords[i];
		key_word.push_back( temp);
	}
}

/**: getListFile
&	
*	create the trace log file...
&
*	author: lonelyforest
*	data:	2006.03.16
*/
//-----------------------------------------------------------------------------
bool Scanner::getListFile()
{
	if (TraceSource)
	{
	int pos = source_name.rfind('.');
	string listfile(source_name);
	listfile.erase(pos, listfile.size()-1);
	listfile += ".log";

	ofstream listing(listfile.c_str(), ofstream::out);

	if (!listing )
	{
		sprintf(msg_temp, "create source list file \"%s\" fail...",listfile.c_str());
		outputMsg(-1,msg_temp );

		return false;
	}
	else
	{	
		for (std::vector< string >::iterator iter = list_msg_.begin();
			iter != list_msg_.end();
			++iter)
		{
			listing << *iter;			
		}

		listing <<"\n---------------------------- Done. ----------------------------\n";
		listing << " There has " << errCount()<< " error(s) and "
			<< warnCount() << " warning(s)\n";
			
		sprintf(msg_temp, "source list file has save to \"%s\"...",listfile.c_str());
        outputMsg(-2, msg_temp);    // -2 means source file list.
	}

	return true;
	}
	else	
	{
		return false;
	}
}


//	look_up
// if found return the tokenType value,
// else	return k_NONE;
//-----------------------------------------------------------------------------
tokenType	Scanner::reservedLookup(const string& word)
{	// can use binary-search...maybe better
	tokenType rev = k_ID;

	for( std::vector<Token>::size_type i = 0;	
		i < key_word.size();
        ++i)
	{
		if ( key_word[i].str == word )
		{
			rev = key_word[i].type;
			break;
		}
	}

	return rev;
}



/*: s_state;
&
*   状态机的各个状态,nextToken() 的辅助
*   状态。
*/
enum    stateType {
	s_START, s_INID, s_INNUM, s_INCOMMENT,
	s_INASSIGN, s_INL, s_ING, s_DONE};

//-------------------------------------------------------------------------------

/**: nextToken
&
*	primary interface ......
*	return a Token from source file(list_of_source)
&
*	author: lonelyforest
*	data:	2006.03.16
*/
//-----------------------------------------------------------------------------
Token& Scanner::nextToken()
{
	if (m_pushed )
	{
		m_pushed = false;
		return m_token;
	}
	else
	{

		char t;
		bool   save = false;
		int	tokenStringIndex;
		tokenStringIndex = 0;

		stateType  state = s_START;
		m_token.str = "";
		m_token.type = k_NONE;

		while (state != s_DONE)
		{
			char c;
			c = getNextChar();
			save = false;
			switch (state)
			{
			case s_START:
				if (::isdigit(c)){
					save= true;
					state = s_INNUM;
				}
				else if (::isalpha(c)|| c == '_'){
					save = true;
					state = s_INID;
				}
				else if (c=='=')
					state = s_INASSIGN;
				else if ( ::isspace(c))//(c== ' ') || (c=='\t') || (c=='\n'))
					state = s_START;	/* blanks */
				else if (c== '<')
					state = s_INL;
				else if (c== '>')
					state = s_ING;
				else
				{
					state = s_DONE;
					switch (c)
					{
					case EOF:
						m_token.type = k_EOF;
						m_token.str  = "EOF";
						break;
					case '+':
						m_token.type = PLUS;
						m_token.str  = "+";
						break;
					case '-':
						m_token.type = MINUS;
						m_token.str  = "-";
						break;
					case '*':
						m_token.type = TIMES;
						m_token.str  = "*";
						break;
					case '%':
						m_token.type = MOD;
						m_token.str = "%";
						break;
					case '(':
						m_token.type = LPARAN;
						m_token.str  = "(";
						break;
					case ')':
						m_token.type = RPARAN;
						m_token.str  = ")";
						break;
					case '{':
						m_token.type = LBRACE;
						m_token.str  = "{";
						break;
					case '}':
						m_token.type = RBRACE;
						m_token.str  = "}";
						break;
					case ',':
						m_token.type = COMMA;
						m_token.str  = ",";
						break;
					case ';':
						m_token.type = SEMI;
						m_token.str  = ";";
						break;
					case '[':
						m_token.type = LSQUARE;
						m_token.str  = "[";
						break;
					case ']':
						m_token.type = RSQUARE;
						m_token.str  = "]";
						break;
					case '/':	/* 判断是否有注释 */
						t = getNextChar();
						if ( t == '*')		//C Style Comment,
						{
							state = s_INCOMMENT;
						}
						else if ( t == '/')	// C++ Style Comment,
						{
							c = t;
							while (c != '\n' && c != EOF)
							{
								c = getNextChar();
							}
								state = s_START;
						}
						else
						{		/* not comment, */
							m_token.type = DIV;
							m_token.str = "/";
							unGetNextChar();
						}
							break; // break case '/':
						case '!':
						t = getNextChar();
						if ( t== '=' ) {
							m_token.type = NEQ;
							m_token.str = "!=";
						}
						else
						{
							m_token.type = k_ERROR;
							m_token.str = "!";
							unGetNextChar();
						}

						break;

					default:
						add_err();
						m_token.type = k_ERROR;
						m_token.str = c;
						break;
					}           // end inside switch
				}

				break; // end case s_START

			case s_INCOMMENT:
				save = false;
				t = getNextChar();
				if ( (c!=EOF) && (t!=EOF))
				{	/* 防止出现文件结束但是注释尚未结束的情况 */
					if ((c == '*') && (t=='/'))
					{	// C Style Comment,
						state = s_START;
					}
					else
					{
						unGetNextChar();
					}
				}
				else
				{ /* 出现文件结束但是注释尚未结束 */
					outputMsg(-1,"maybe comment end before code !");
					add_err();
					m_token.type = k_NONE;
					m_token.str = "--> comment unexpected end before code !";
					state = s_DONE;
				}
				break;  // end  state s_INCOMMENT
			case s_INASSIGN:
				state = s_DONE;
				m_token.type = ASSIGN;
				m_token.str = "=";
				if (c== '=')
				{
					m_token.type = EQ;
					m_token.str = "==";
				}
				else {	unGetNextChar();}
				break;
			case s_INL:  /* < or <= */
				state = s_DONE;
				m_token.type = LT;
				m_token.str = "<";
				if (c=='=')
				{
					m_token.type = NGT;
					m_token.str = "<=";
				}
				else { unGetNextChar();}
				break;
			case s_ING:  /* > or >= */
				state = s_DONE;
				m_token.type = GT;
				m_token.str = ">";
				if (c=='=')
					m_token.type = NLT;    /* >= */
				else { unGetNextChar();}
				break;
			case s_INNUM: /* number, integer */
				save = true;
				if (!::isdigit(c))
				{ /* backup int the input */
					unGetNextChar();
					save = false;
					state = s_DONE;
					m_token.type = k_NUM;
				}
				break;
			case s_INID:
				save = true;
				if( !( ::isalpha(c) || ::isdigit(c) || c == '_'))
				{ /*backup in the input */
					unGetNextChar();
					save = false;
					state = s_DONE;
					m_token.type = k_ID;
				}
				break;
			case s_DONE:	/* 除非机器或者系统故障,否则不会出现 */
			default:    /* should never happen */
				sprintf(msg_temp, "Scanner Bug : State = %4d", state);
				outputMsg(lineno(),msg_temp);
				add_err();
				m_token.type = k_ERROR;
				m_token.str = msg_temp;

				state = s_DONE;
				break;
			}

			if (save && (tokenStringIndex < 43))
			{
				//tokenString[tokenStringIndex++] = c;
				tokenStringIndex++;
				m_token.str += c;
			}

			if (state == s_DONE)
			{
				//tokenString[tokenStringIndex] = '\0';
				if (m_token.type == k_ID)
				{
					m_token.type = reservedLookup(m_token.str);
				}
			}
		}


		// trace compiler and trace scan
		if (m_token.type == k_ERROR)
		{
			string msg = "unknow or unsuported symbol ----> \'";
			msg += m_token.str+"\'";
			outputMsg(lineno(), msg.c_str());
			if (TraceSource )
			{
				sprintf(msg_temp, "\t%d: Error: unexpected or unsuported symbol--> '%s'\n", lineno(), m_token.str.c_str());
				insert_list(msg_temp);
			}
		}
		else if (TraceSource && (m_token.type != k_EOF) )
		{
			// 排版也很有艺术哦,
			string outmsg;
			sprintf(msg_temp, "\t%d: ", lineno());
			outmsg = msg_temp;
			switch (m_token.type)
			{
			case k_ID:
				outmsg += "ID, name = ";
				break;
			case k_NUM:
				outmsg += "NUM, val = ";
				break;
			case k_ELSE:	case k_IF:
			case k_WHILE:	case k_READ:
			case k_WRITE:	case k_INT:
			case k_RETURN:	case k_VOID:
				outmsg += "reserve word: ";
				break;
			case k_ERROR:
				outmsg += m_token.str;
				outmsg += ", Scanner Bug !";
			case k_NONE:
				outmsg += "Bug!";
				break;
			default:
				break;
			}
			
			outmsg = outmsg + m_token.str + '\n';
			Tokenizer::insert_list(outmsg.c_str());
		}
		
		return m_token;
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -