📄 linetok.cpp

📁 虚拟机设计与实现——C/C++
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+                                                                   +
+ linetok.cpp   - facilities to break a line of text into a         +
+                 series of tokens.                                 +
+                                                                   +
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ macros                                                            +
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

#define TOK_IDENTIFIER	0
#define TOK_INT_REG		1
#define TOK_FLT_REG		2
#define TOK_DBL_REG		3
#define TOK_CHAR_CONST	4
#define TOK_INT_CONST	5
#define TOK_FLT_CONST	6
#define TOK_COMMA		7
#define TOK_NO_MORE		8
#define TOK_BAD			9

#define ID_SIZE	256		/* max. chars in identifier */

#define EOL		'\0'

/*#define LINE_TOK_DEBUG		1*/

#ifdef LINE_TOK_DEBUG
#define	LINE_TOK_DEBUG0(arg);				printf(arg);
#define LINE_TOK_DEBUG1(arg1,arg2);			printf(arg1,arg2);
#define LINE_TOK_DEBUG2(arg1,arg2,arg3);	printf(arg1,arg2,arg3);
#else
#define	LINE_TOK_DEBUG0(arg);				
#define LINE_TOK_DEBUG1(arg1,arg2);			
#define LINE_TOK_DEBUG2(arg1,arg2,arg3);			
#endif

/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ declarations                                                      +
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

char *TokStr[] = {"TOK_IDENTIFIER","TOK_INT_REG","TOK_FLT_REG",
                 "TOK_DBL_REG","TOK_CHAR_CONST","TOK_INT_CONST",
				 "TOK_FLT_CONST","TOK_COMMA","TOK_NO_MORE","TOK_BAD"};

struct Token
{
	char text[ID_SIZE];
	char *fName;	/*file reading assembly code from*/
	U4 line;		/* line number in source file */
	U1 type;		/* type of token */
	S8 val;			/* register bytecode val, char val, integer const */
	F8 fval;		/* floating-pt val */
};

class LineTokenizer
{
	char tokenBuffer[LINE_SIZE];	/*whole line of assembler text*/
	int ntChars;					/*number chars + null char */
	int itChars;					/*index into buffer*/
	struct Line *lineptr;			/*ptr to Line fed to constructor*/

	/*called by proces----- functions*/
	char getNextLineChar();
	void goBackOneChar();
	char skipLineWhiteSpace();

	/*called by getNextLineToken*/
	void processRegister(struct Token *tptr);
	void processCharConst(struct Token *tptr);
	void processIdentifier(struct Token *tptr, char ch);
	void processNumConst(struct Token *tptr, char ch);

	public:
	LineTokenizer(struct Line *ln);

	/*called by match*/
	struct Token getNextLineToken(); 
	
	U1 match(struct Token *tptr, int ttype); /* big one */
	void printToken(struct Token *tptr);
};

/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ definitions                                                       +
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

LineTokenizer::LineTokenizer(struct Line *ln)
{
	LINE_TOK_DEBUG2("LineTokenizer::LinkeTokenizer(): fed \"%s\" at line (%lu)\n",(*ln).src,(*ln).line);
	lineptr = ln;
	strcpy(tokenBuffer,(*ln).src);
	ntChars = strlen(tokenBuffer)+1; /*include null char*/
	itChars = -1; 
	return;

}/*end constructor*/

/*-----------------------------------------------------------------*/

char LineTokenizer::getNextLineChar()
{
	itChars++; /* can be in range 0,1, ..., (ntChars-1)*/

	if(itChars>=ntChars-1)
	{
		return(EOL);
	}

	return(tokenBuffer[itChars]);

}/*end getNextLineChar*/

/*-----------------------------------------------------------------*/

void LineTokenizer::goBackOneChar()
{
	if(itChars >= 0){ itChars--; }
	else
	{ 
		LINE_TOK_DEBUG1("LineTokenizer::goBackOneChar(): decrement below 0, %d\n",itChars); 
	} 
	return;

}/*end goBackOneChar*/

/*-----------------------------------------------------------------*/

char LineTokenizer::skipLineWhiteSpace()
{
	char ch;
	ch=getNextLineChar();
	while((ch==' ')||(ch=='\t'))
	{
		ch=getNextLineChar();
	}
	return(ch);

}/*end skipLineWhiteSpace*/

/*-----------------------------------------------------------------*/

/*
	Gameplan:  
	
	i) skip white space ( space or tab )
	ii) look at first char ( determines tok type )
			$ = register ( $R1, $F1, $D1 )
			' = char constant ( 'a' )
			a-z, A-Z, @, _, ?, .  = identifier
			0-9, +, - = numeric constant
			, = comma
			'\0' = end of string
	iii) keep reading until
			-hit end of line
			-reach char not belonging to tok type
	iv) populate token attributes and return
*/

struct Token LineTokenizer::getNextLineToken()
{
	struct Token token;
	char current;
	
	/* still need to set text,type,val/fval*/

	token.line = (*lineptr).line; 
	token.fName = (*lineptr).fName;
	token.val  = 0; 
	token.fval = 0.0; 

	current = skipLineWhiteSpace();

	if(current=='$'){ processRegister(&token); }

	else if(current=='\''){ processCharConst(&token); }

	else if(((current>='a')&&(current<='z'))||
		    ((current>='A')&&(current<='Z'))||
			(current=='@')||
			(current=='_')||
			(current=='?')||
			(current=='.')){ processIdentifier(&token,current); }

	else if(((current>='0')&&(current<='9'))||
		    (current=='-')||
			(current=='+')){ processNumConst(&token,current); }

	else if(current==',')
	{
		token.text[0]=current;
		token.text[1]='\0';
		token.type = TOK_COMMA; 
	}
	else if(current==EOL)
	{
		LINE_TOK_DEBUG2("LineTokenizer::getNextLineToken(): hit EOL line (%lu) index(%d)\n",token.line,itChars);
		strcpy(token.text,"EOL");
		token.type = TOK_NO_MORE; 
	}
	else
	{
		token.text[0]=current;
		token.text[1]='\0';
		token.type = TOK_BAD; 
	}

	return(token);

}/*end getNextLineToken*/

/*-----------------------------------------------------------------*/

void LineTokenizer::processRegister(struct Token *tptr)
{
	char current;
	current = getNextLineChar();

	switch(current)
	{
		/*INT_REGISTER-----------------------------------------*/
		case 'R':
		case 'r':
		{
			current = getNextLineChar();

			switch(current)
			{
				case '1': /* $R1- */
				{
					char peek = getNextLineChar();

					switch(peek)
					{
						case '0':
						{
							strcpy((*tptr).text,"$R10");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R10;
						}break;
						case '1':
						{
							strcpy((*tptr).text,"$R11");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R11;
						}break;
						case '2':
						{
							strcpy((*tptr).text,"$R12");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R12;
						}break;
						case '3':
						{
							strcpy((*tptr).text,"$R13");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R13;
						}break;
						case '4':
						{
							strcpy((*tptr).text,"$R14");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R14;
						}break;
						case '5':
						{
							strcpy((*tptr).text,"$R15");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R15;
						}break;
						case '6':
						{
							strcpy((*tptr).text,"$R16");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R16;
						}break;
						case '7':
						{
							strcpy((*tptr).text,"$R17");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R17;
						}break;
						case '8':
						{
							strcpy((*tptr).text,"$R18");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R18;
						}break;
						case '9':
						{
							strcpy((*tptr).text,"$R19");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R19;
						}break;
						default:
						{
							strcpy((*tptr).text,"$R1");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R1;
							goBackOneChar();
						}
					}/*end of $R1 sub-switch*/
				}break;
				case '2': /* $R2- */
				{
					char peek = getNextLineChar();

					switch(peek)
					{
						case '0':
						{
							strcpy((*tptr).text,"$R20");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R20;
						}break;
						case '1':
						{
							strcpy((*tptr).text,"$R21");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R21;
						}break;
						case '2':
						{
							strcpy((*tptr).text,"$R22");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R22;
						}break;
						case '3':
						{
							strcpy((*tptr).text,"$R23");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R23;
						}break;
						case '4':
						{
							strcpy((*tptr).text,"$R24");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R24;
						}break;
						default:
						{
							strcpy((*tptr).text,"$R2");
							(*tptr).type = TOK_INT_REG;
							(*tptr).val = $R2;
							goBackOneChar();
						}
					}/*end of $R2 sub-switch*/
				}break;
				case '3':
				{
					strcpy((*tptr).text,"$R3");
					(*tptr).type = TOK_INT_REG;
					(*tptr).val = $R3;
				}break;
				case '4':
				{
					strcpy((*tptr).text,"$R4");
					(*tptr).type = TOK_INT_REG;
					(*tptr).val = $R4;
				}break;
				case '5':
				{
					strcpy((*tptr).text,"$R5");
					(*tptr).type = TOK_INT_REG;
					(*tptr).val = $R5;
				}break;
				case '6':
				{
					strcpy((*tptr).text,"$R6");
					(*tptr).type = TOK_INT_REG;
					(*tptr).val = $R6;
				}break;
				case '7':
				{
					strcpy((*tptr).text,"$R7");
					(*tptr).type = TOK_INT_REG;
					(*tptr).val = $R7;
				}break;
				case '8':
				{
					strcpy((*tptr).text,"$R8");
					(*tptr).type = TOK_INT_REG;
					(*tptr).val = $R8;
				}break;
				case '9':
				{
					strcpy((*tptr).text,"$R9");
					(*tptr).type = TOK_INT_REG;
					(*tptr).val = $R9;
				}break;
				default: /*does not start with 0-9*/
				{
					(*tptr).text[0]='$';
					(*tptr).text[1]='R';
					(*tptr).text[2]=current;
					(*tptr).text[3]='\0';
					(*tptr).type = TOK_BAD; 
				}
			}/*end $R switch*/
		}break;
		/*FLT_REGISTER-----------------------------------------*/
		case 'F':
		case 'f':
		{
			current = getNextLineChar();

			switch(current)
			{
				case '1':
				{
					current = getNextLineChar();

					if(current=='0')
					{
						strcpy((*tptr).text,"$F10");
						(*tptr).type = TOK_FLT_REG;
						(*tptr).val = $F10;
					}
					else
					{
						strcpy((*tptr).text,"$F1");
						(*tptr).type = TOK_FLT_REG;
						(*tptr).val = $F1;
						goBackOneChar();
					}
				}break;
				case '2':
				{
					strcpy((*tptr).text,"$F2");
					(*tptr).type = TOK_FLT_REG;
					(*tptr).val = $F2;
				}break;
				case '3':
				{
					strcpy((*tptr).text,"$F3");
					(*tptr).type = TOK_FLT_REG;
					(*tptr).val = $F3;
				}break;
				case '4':
				{
					strcpy((*tptr).text,"$F4");
					(*tptr).type = TOK_FLT_REG;
					(*tptr).val = $F4;
				}break;
				case '5':
				{
					strcpy((*tptr).text,"$F5");
					(*tptr).type = TOK_FLT_REG;
					(*tptr).val = $F5;
				}break;
				case '6':
				{
					strcpy((*tptr).text,"$F6");
					(*tptr).type = TOK_FLT_REG;
					(*tptr).val = $F6;
				}break;
				case '7':
				{
					strcpy((*tptr).text,"$F7");
					(*tptr).type = TOK_FLT_REG;
					(*tptr).val = $F7;
				}break;
				case '8':
				{
					strcpy((*tptr).text,"$F8");
					(*tptr).type = TOK_FLT_REG;
					(*tptr).val = $F8;
				}break;
				case '9':
				{
					strcpy((*tptr).text,"$F9");
					(*tptr).type = TOK_FLT_REG;
					(*tptr).val = $F9;
				}break;
				case 'P':
				case 'p':
				{
					strcpy((*tptr).text,"$FP");
					(*tptr).type = TOK_INT_REG;
					(*tptr).val = $FP;
				}break;
				default: /* not 0-9*/
				{
					(*tptr).text[0]='$';
					(*tptr).text[1]='F';
					(*tptr).text[2]=current;
					(*tptr).text[3]='\0';
					(*tptr).type = TOK_BAD;
				}
			}/*end of $F switch*/
		}break;
		/*DBL_REGISTER-----------------------------------------*/
		case 'D':
		case 'd':
		{
			current = getNextLineChar();

			switch(current)
			{
				case '1':
				{
					current = getNextLineChar();
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -