📄 parser.cpp
字号:
// parser.cpp : implementation file
//
#include "stdafx.h"
#include "cminus.h"
#include "parser.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
/* * CParser
* Construction & destruction
* * *
*** Programer: 陆晓春
* Date: 2004.05.18 */
// construction
CParser::CParser( CString& str )
{
m_pScaner = new CScaner( str );
m_pProgram = NULL;
indent = -1;
}
// destruction
CParser::~CParser()
{
delete m_pScaner;
if( m_pProgram ) delete m_pProgram;
if( m_fTraceFile.m_hFile != CFile::hFileNull )
m_fTraceFile.Close();
}
/* * CParser
* public functions
* * *
*** Programer: 陆晓春
* Date: 2004.05.19 */
// build the parse tree
CTreeNode* CParser::BuildSyntaxTree()
{
return (m_pProgram = program());
}
// trace the result of parse tree
void CParser::Trace( LPCTSTR lpszPathName )
{
// build the syntax tree
if( !m_pProgram ) m_pProgram = program();
if( bErrFlag )
OutputPhaseMsg( "\r\nerrors occur while parsing, stop printing the syntax tree" );
else {
OutputPhaseMsg( "successfully build the syntax tree! printing it..." );
// create file
CFileException e;
if( !m_fTraceFile.Open( lpszPathName, CFile::modeCreate | CFile::modeReadWrite, &e ) ) {
OutputErrMsg( "failed to create parse trace file: %s", lpszPathName );
return;
}
// print the syntax tree
PrintTree( m_pProgram );
}
}
/* * CParser
* Help routines
* * *
*** Programer: 陆晓春
* Date: 2004.05.18 */
// construct a new node
CTreeNode* CParser::newNode( NodeKind kind, enum TokenType type, CString& ID )
{
CTreeNode* t = new CTreeNode();
t->lineno = m_pScaner->LineNo();
t->nodekind = kind;
t->type = type;
t->szName = ID;
t->szScope = m_szScope;
return t;
}
// construct a new statment node
CTreeNode* CParser::newStmtNode( StmtKind kind, CString& ID )
{
CTreeNode* t = new CTreeNode();
t->lineno = m_pScaner->LineNo();
t->nodekind = kStmt;
t->kind.stmt = kind;
t->type = _NONE;
t->szName = ID;
t->szScope = m_szScope;
return t;
}
// construct a new expression node
CTreeNode* CParser::newExpNode( ExpKind kind, enum TokenType type, CString& ID )
{
CTreeNode* t = new CTreeNode();
t->lineno = m_pScaner->LineNo();
t->nodekind = kExp;
t->kind.exp = kind;
t->type = type;
t->szName = ID;
t->szScope = m_szScope;
return t;
}
// get the next token, check if its type is expected
BOOL CParser::match( enum TokenType type )
{
m_token = m_pScaner->NextToken();
return (m_token.type == type);
}
// for error recovery
void CParser::ConsumeUntil( enum TokenType type )
{
while( m_token.type != type && m_token.type != _EOF )
m_token = m_pScaner->NextToken();
}
void CParser::ConsumeUntil( enum TokenType type1, enum TokenType type2 )
{
while( m_token.type != type1 && m_token.type != type2 && m_token.type != _EOF )
m_token = m_pScaner->NextToken();
}
/* * CParser
* functions for all grammars
* * *
*** Programer: 陆晓春
* Date: 2004.05.18 */
// Grammar:
// 1. program->declaration_list
CTreeNode* CParser::program()
{
return declaration_list();
}
// Grammar:
// 2. declaration_list->declaration_list declaration | declaration
CTreeNode* CParser::declaration_list()
{
CTreeNode *first = NULL, *last = NULL, *temp = NULL;
m_token = m_pScaner->NextToken();
while( m_token.type != _EOF ) {
if( m_token.type != _CHAR && m_token.type != _INT &&
m_token.type != _VOID && m_token.type != _FLOAT ) {
//throw _error( ERROR_INVALID_TYPE, m_pScaner->LineNo(), m_token.str );
OutputErrMsg( "error in line %d: invalid type '%s'",
m_pScaner->LineNo(), (LPCTSTR)m_token.str );
ConsumeUntil( SEMI/* ';' */, RBRACE/* '}' */ );// error recovery
} else if( (temp = declaration()) != NULL ) {
// link all declarations together
if( !first ) { first = temp; last = temp->LastSibling(); }
else { last->sibling = temp; last = temp->LastSibling(); }
}
// read the next token
m_token = m_pScaner->NextToken();
}
return first;
}
// Grammar:
// 3. declaration->var_declaration | fun_declaration
// m_token is a supported type-identifier token
CTreeNode* CParser::declaration()
{
m_szScope = _T( "global" );// global function or variable declaration
CTreeNode* temp = NULL;
TypeToken = m_token;
IDToken = m_token = m_pScaner->NextToken();
if( IDToken.type != _ID ) {
//throw _error( ERROR_DECLARATION, m_pScaner->LineNo(), IDToken.str );
OutputErrMsg( "error in line %d: \"%s\" is a reserved token",
m_pScaner->LineNo(), (LPCTSTR)IDToken.str );
ConsumeUntil( SEMI, RBRACE );
} else {
m_token = m_pScaner->NextToken();// '(', ';', '[', ',' or error
if( m_token.type == LPARAN ) temp = fun_declaration();
else if( m_token.type == SEMI || m_token.type == LSQUARE || m_token.type == COMMA )
temp = var_declaration();
else {
// throw _error( ERROR_SEMICOLON_MISS, m_pScaner->LineNo(), IDToken.str );
OutputErrMsg( "error in line %d: missing ';' after identifier \"%s\"",
m_pScaner->LineNo(), (LPCTSTR)IDToken.str );
ConsumeUntil( SEMI, RBRACE );
}
}
return temp;
}
// Grammar:
// 4. var_declaration->type_specifier ID(, ...)`;` | type_specifier ID `[` NUM `]`(, ...)`;`
// 5. type_specifier->`int` | `void` | `char`, actually this step is in declaration_list()
// m_token.str == ";" "," or "["
CTreeNode* CParser::var_declaration()
{
CTreeNode* temp = newNode( kVarDec, TypeToken.type, IDToken.str );
if( m_token.type == LSQUARE ) {// '['
m_token = m_pScaner->NextToken();// NUM
if( m_token.type != _NUM ) {
OutputErrMsg( "error in line %d: syntax error in declaration of array %s[], missing array size",
m_pScaner->LineNo(), (LPCTSTR)IDToken.str );
delete temp;
ConsumeUntil( SEMI, RBRACE );// error recovery
return NULL;
}
temp->bArray = TRUE;
temp->iArraySize = m_pScaner->GetIntNumValue();
if( !match(RSQUARE) ) {// `]`
OutputErrMsg( "error in line %d: syntax error in declaration of array %s[], missing ']'",
m_pScaner->LineNo(), (LPCTSTR)IDToken.str );
m_pScaner->PushBack();// error recovery
}
m_token = m_pScaner->NextToken();// should be ';' or ','
}
if( m_token.type == COMMA ) {
IDToken = m_token = m_pScaner->NextToken();// ID or error
if( IDToken.type != _ID ) {
OutputErrMsg( "error in line %d: \"%s\" is a reserved token",
m_pScaner->LineNo(), (LPCTSTR)IDToken.str );
ConsumeUntil( SEMI, RBRACE );// error recovery
return temp;
}
m_token = m_pScaner->NextToken();// ';', '[', ',' or error
if( m_token.type == SEMI || m_token.type == LSQUARE || m_token.type == COMMA )
temp->sibling = var_declaration();// link following variable declarations
else {
OutputErrMsg( "error in line %d: missing ';' after identifier \"%s\"",
m_pScaner->LineNo(), (LPCTSTR)IDToken.str );
m_pScaner->PushBack();// error recovery
return temp;
}
} else if( m_token.type != SEMI ) {// m_token should be ';' now
OutputErrMsg( "error in line %d: bad declaration sequence, missing ';'", m_pScaner->LineNo() );
ConsumeUntil( SEMI, RBRACE );
}
return temp;
}
// Grammar:
// 6. fun_declaration->type_specifier ID `(` params `)` compound_stmt
// m_token.str == "(", TypeToken contains type_specifier, IDToken contains ID
CTreeNode* CParser::fun_declaration()
{
CTreeNode* temp = newNode( kFunDec, TypeToken.type, IDToken.str );
// update function scope
m_szScope = IDToken.str;
// params
CTreeNode* p = temp->child[0] = params();
if( p ) p->father = temp;
while( p && p->sibling ) {
p = p->sibling; p->father = temp;
}
if( !match(RPARAN) ) {
OutputErrMsg( "error in line %d: missing ')' in function \"%s\"(...) declaration",
m_pScaner->LineNo(), (LPCTSTR)m_token.str );
m_pScaner->PushBack();
}
// compound statements
p = temp->child[1] = compound_stmt();
if( p ) p->father = temp;
while( p && p->sibling ) {
p = p->sibling; p->father = temp;
}
return temp;
}
// Grammar:
// 7. params->param_list | `void` | empty, `void` is thought as empty
// 8. param_list->param_list `,` param | param
// 9. param->type_specifier ID | type_specifier ID `[` `]`
// m_token.str == "("
CTreeNode* CParser::params()
{
CTreeNode *first = NULL, *temp = NULL;
TypeToken = m_token = m_pScaner->NextToken();// type-specifier or ')'
if( m_token.type == RPARAN ) {
m_pScaner->PushBack();
return NULL;
}
if( TypeToken.type == _VOID )
if( match( RPARAN ) ) {
m_pScaner->PushBack();
return NULL;
} else m_pScaner->PushBack();// is not ')', push it back
while( TypeToken.type == _INT || TypeToken.type == _CHAR ||
TypeToken.type == _VOID || TypeToken.type == _FLOAT ) {
IDToken = m_token = m_pScaner->NextToken();
if( IDToken.type != _ID ) {
OutputErrMsg( "error in line %d: invalid parameter \"%s\"",
m_pScaner->LineNo(), (LPCTSTR)IDToken.str );
} else {
temp = newNode( kParam, TypeToken.type, IDToken.str );
temp->sibling = first;// the FIRST parameter is the LAST sibling node
first = temp;
}
m_token = m_pScaner->NextToken();
if( m_token.type == LSQUARE ) {// '['
temp->bArray = TRUE;
if( !match( RSQUARE ) ) {//']'
OutputErrMsg( "error in line %d: bad array parameter, missing ']'", m_pScaner->LineNo() );
ConsumeUntil( COMMA, RPARAN );// error recovery
} else
m_token = m_pScaner->NextToken();// should be ',' or ')'
}
if( m_token.type == RPARAN ) break;// ')'
else if( m_token.type == COMMA )// ','
TypeToken = m_token = m_pScaner->NextToken();
else {// just break
//OutputErrMsg( "error in line %d: bad function parameters", m_pScaner->LineNo() );
//ConsumeUntil( RPARAN );// error recovery
break;
}
}
m_pScaner->PushBack();// the next token should be ')'
return first;
}
// Grammar:
// 10. compound_stmt->`{` loal_declarations statement_list `}` | expression_stmt
// the next token should be '{'
CTreeNode* CParser::compound_stmt()
{
CTreeNode *first = NULL, *last = NULL, *temp = NULL;
BOOL bHasNoBraces = FALSE;
if( !match(LBRACE) ) {// match'{'
// OutputErrMsg( "error in line %d: missing '{'", m_pScaner->LineNo() );
bHasNoBraces = TRUE;
m_pScaner->PushBack();// error recovery
}
// local_declarations
while( 1 ) {
TypeToken = m_token = m_pScaner->NextToken();
if( m_token.type == _CHAR || m_token.type == _INT ||
m_token.type == _VOID || m_token.type == _FLOAT )
temp = local_declarations();
else { m_pScaner->PushBack(); break; }
if( bHasNoBraces ) return temp;// has no braces, return when reach the first ';'
if( temp )
// link all local_declarations together
if( !first ) { first = temp; last = temp->LastSibling(); }
else { last->sibling = temp; last = temp->LastSibling(); }
}
// statement_list
// m_token contains the first token of statement_list
m_token = m_pScaner->NextToken();
while( 1 ) {
temp = NULL;
if( m_token.type == RBRACE ) {
if( bHasNoBraces ) OutputErrMsg( "error in line %d: unpaired '}'", m_pScaner->LineNo() );
break;// '}'
}
if( m_token.type == _EOF ) {
OutputErrMsg( "error in line %d: missing '}'", m_pScaner->LineNo() );
m_pScaner->PushBack();
break;
}
switch( m_token.type ) {
case _READ:
temp = read_stmt(); break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -