📄 assembler.cpp
字号:
///////////////////////////////////////////////////////////////////////////////
//
// FileName : Assembler.cpp
// Version : 0.10
// Author : Luo Cong
// Date : 2004-10-28 22:12:03
// Comment : first version started on 2004-10-16
//
///////////////////////////////////////////////////////////////////////////////
#include "StdAfx.h"
#include "OllyMachine.h"
#include "Assembler.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
#ifndef FatalError
#define FatalError() m_nStopFlag = 1
#endif
#ifndef AddCodeLen
#define AddCodeLen(x) m_unCodeLen += (x)
#endif
#ifndef AddOpcode
#define AddOpcode(MneType) nRetCode = CheckCodeCapacity(1);\
OM_PROCESS_ERROR(nRetCode);\
m_Code[m_unCodeLen] = m_VM->opcode(MneType);\
++m_unCodeLen
#endif
#ifndef AddCodes
#define AddCodes(Codes, len) nRetCode = CheckCodeCapacity((len));\
OM_PROCESS_ERROR(nRetCode);\
for (int cidx = 0; cidx < len; ++cidx)\
m_Code[m_unCodeLen++] = (Codes[cidx])
#endif
#ifndef AddModRM
#define AddModRM(ModRm) nRetCode = CheckCodeCapacity(1);\
OM_PROCESS_ERROR(nRetCode);\
m_Code[m_unCodeLen] = (unsigned char)(ModRm);\
++m_unCodeLen
#endif
#ifndef Add1LCode
#define Add1LCode(lValue) nRetCode = CheckCodeCapacity(4);\
OM_PROCESS_ERROR(nRetCode);\
*(long *)&m_Code[m_unCodeLen] = (lValue);\
m_unCodeLen += 4
#endif
static const unsigned char g_uchar_bits_lookup_tbl[255] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0 ~ 15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16 ~ 31
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 16, 0, 16, 0, 0, // 32 ~ 47
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, // 48 ~ 63
0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 64 ~ 79
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, // 80 ~ 95
0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 96 ~ 111
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 112 ~ 127
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128 ~ 143
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 144 ~ 159
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 160 ~ 175
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 176 ~ 191
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 192 ~ 207
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 208 ~ 223
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 224 ~ 239
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // 240 ~ 255
};
#define UC_LETTER_MASK (1 << 1) // upper letter
#define LC_LETTER_MASK (1 << 2) // lower letter
#define DIGIT_MASK (1 << 3) // 0 ~ 9
#define OPERATOR_MASK (1 << 4) // +, -, *
#define LETTER_MASK (UC_LETTER_MASK | LC_LETTER_MASK)
#define BitsOf(ch) (g_uchar_bits_lookup_tbl[(ch) & 0xFF])
#define IsUpperLetter(ch) (BitsOf(ch) & UC_LETTER_MASK)
#define IsLowerLetter(ch) (BitsOf(ch) & LC_LETTER_MASK)
#define IsLetter(ch) (BitsOf(ch) & LETTER_MASK)
#define IsDigit(ch) (BitsOf(ch) & DIGIT_MASK)
#define IsOperator(ch) (BitsOf(ch) & OPERATOR_MASK)
#define IsHex(ch) (IsDigit(ch) ||\
'A' == (ch) || 'B' == (ch) || 'C' == (ch) ||\
'D' == (ch) || 'E' == (ch) || 'F' == (ch) ||\
'a' == (ch) || 'b' == (ch) || 'c' == (ch) ||\
'd' == (ch) || 'e' == (ch) || 'f' == (ch)\
)
int CAssembler::ReportFails() const
{
int i;
int nErrCount;
int nWarnCount;
ERRINFO ei;
WARNINFO wi;
CString strTmp;
CString strOutputMsg;
char *szErrTemplate1 = "%s(%d) : error C%02d: '%s' : %s\n";
char *szErrTemplate2 = "%s(%d) : error C%02d: %s\n";
char *szWarnTemplate1 = "%s(%d) : warning W%02d: '%s' : %s\n";
nErrCount = m_pErrList->GetCount();
for (i = 0; i < nErrCount; ++i)
{
ei = m_pErrList->GetAt(m_pErrList->FindIndex(i));
switch (ei.ErrType)
{
case ERR_NOID:
strTmp.Format(
szErrTemplate1,
ei.FileName, ei.LineNum, ei.ErrType, ei.TokenName,
"undeclared identifier"
);
strOutputMsg += strTmp;
break;
case ERR_BADBLOCKCMT:
strTmp.Format(
szErrTemplate2,
ei.FileName, ei.LineNum, ei.ErrType,
"bad block comment syntax"
);
strOutputMsg += strTmp;
break;
case ERR_REDEFID:
strTmp.Format(
szErrTemplate1,
ei.FileName, ei.LineNum, ei.ErrType, ei.TokenName,
"redefinition"
);
strOutputMsg += strTmp;
break;
case ERR_REDEFLABEL:
strTmp.Format(
szErrTemplate1,
ei.FileName, ei.LineNum, ei.ErrType, ei.TokenName,
"label redefined"
);
strOutputMsg += strTmp;
break;
case ERR_BADHEX:
strTmp.Format(
szErrTemplate1,
ei.FileName, ei.LineNum, ei.ErrType, ei.TokenName,
"hex constants must have at least one hex digit"
);
strOutputMsg += strTmp;
break;
case ERR_NOLINEEND:
strTmp.Format(
szErrTemplate1,
ei.FileName, ei.LineNum, ei.ErrType, ei.TokenName,
"expected end of line"
);
strOutputMsg += strTmp;
break;
case ERR_NEWLINEINCONST:
strTmp.Format(
szErrTemplate2,
ei.FileName, ei.LineNum, ei.ErrType,
"new line in constant"
);
strOutputMsg += strTmp;
break;
case ERR_NOINCLUDEFILE:
strTmp.Format(
"%s(%d) : fatal error C%d: Cannot open include file: '%s': "
"No such file or directory\n",
ei.FileName, ei.LineNum, ei.ErrType, ei.TokenName
);
strOutputMsg += strTmp;
break;
case ERR_ERRORONID:
strTmp.Format(
szErrTemplate1,
ei.FileName, ei.LineNum, ei.ErrType, ei.TokenName,
"some error(s) following"
);
strOutputMsg += strTmp;
break;
case ERR_EXPECTED:
strTmp.Format(
"%s(%d) : error C%02d: instruction expected type '%s'\n",
ei.FileName, ei.LineNum, ei.ErrType, ei.TokenName
);
strOutputMsg += strTmp;
break;
case ERR_IDISAKEYWORDCANTBEALABEL:
strTmp.Format(
szErrTemplate1,
ei.FileName, ei.LineNum, ei.ErrType, ei.TokenName,
"is a keyword, can not be a label name"
);
strOutputMsg += strTmp;
break;
case ERR_MUSTFOLLOWAREGISTER:
strTmp.Format(
szErrTemplate1,
ei.FileName, ei.LineNum, ei.ErrType, ei.TokenName,
"must follow a register"
);
strOutputMsg += strTmp;
break;
case ERR_EMBEDDEDCODESIZETOOBIG:
strTmp.Format(
szErrTemplate2,
ei.FileName, ei.LineNum, ei.ErrType, ei.TokenName
);
strOutputMsg += strTmp;
break;
case ERR_EMBEDDEDASMCANTCOMPILE:
strTmp.Format(
szErrTemplate1,
ei.FileName, ei.LineNum, ei.ErrType, ei.TokenName,
"compile error in embedded asm block, maybe syntax error"
);
strOutputMsg += strTmp;
break;
}
}
nWarnCount = m_pWarnList->GetCount();
for (i = 0; i < nWarnCount; ++i)
{
wi = m_pWarnList->GetAt(m_pWarnList->FindIndex(i));
switch (wi.WarnType)
{
case WARN_UNREFLABEL:
strTmp.Format(
szWarnTemplate1,
wi.FileName, wi.LineNum, wi.WarnType, wi.TokenName,
"unreferenced label"
);
strOutputMsg += strTmp;
break;
}
}
if (nErrCount || nWarnCount)
{
strTmp.Format("\n%d error(s), %d warning(s)", nErrCount, nWarnCount);
strOutputMsg += strTmp;
ShowErrMsg(strOutputMsg);
}
return nErrCount;
}
int CAssembler::Assemble(
/* [size_is][in] */ const UINT unFileNameSize,
/* [in] */ const char *szFileName
)
{
ASSERT(szFileName);
int nRetResult = 0;
int nRetCode;
OM_PROCESS_ERROR((strlen(szFileName) + 1) == unFileNameSize);
// ------------
// Pass 1 begin
nRetCode = MakeSymbolTable(unFileNameSize, szFileName);
OM_PROCESS_ERROR(nRetCode);
// Pass 1 end
// ----------
// reset to zero
m_unCodeLen = 0;
// ------------
// Pass 2 begin
nRetCode = GenerateCodes(unFileNameSize, szFileName);
OM_PROCESS_ERROR(nRetCode);
CheckLabelRef();
// Pass 2 end
// ----------
if (m_nStopFlag)
goto Exit0;
nRetResult = 1;
Exit0:
nRetCode = ReportFails();
if (nRetCode)
return 0;
return nRetResult;
}
TOKENTYPE CAssembler::NextToken(
/* [in][out] */ FILEINFO *fii,
/* [out] */ CString *strTokenName,
/* [in] */ const int nIsInEmbeddedAsm
)
{
ASSERT(fii);
ASSERT(strTokenName);
int nRetCode;
int ch;
TOKENTYPE state = TOK_UNKNOWN;
strTokenName->Empty();
for (;;)
{
switch (state)
{
case TOK_UNKNOWN:
ch = getc(fii->fp);
if ((' ' == ch) || ('\t' == ch))
state = TOK_UNKNOWN;
else if (IsLetter(ch) || ('_' == ch))
state = TOK_TOKEN;
else if (IsDigit(ch) || ('-' == ch))
state = TOK_DIGIT_DEC;
else if (';' == ch)
state = TOK_LINECOMMENT;
else if ('/' == ch)
state = TOK_COMMENT0;
else if (',' == ch)
return TOK_COMMA;
else if ('\n' == ch)
{
++fii->LineNum;
state = TOK_UNKNOWN;
}
else if ('"' == ch)
state = TOK_DQUOTE;
else if ('{' == ch)
return TOK_LBRACE;
else if ('}' == ch)
return TOK_RBRACE;
else if (nIsInEmbeddedAsm && '[' == ch)
return TOK_LBRACKET;
else if (nIsInEmbeddedAsm && ']' == ch)
return TOK_RBRACKET;
else if (nIsInEmbeddedAsm && '.' == ch)
return TOK_DOT;
else if (nIsInEmbeddedAsm && IsOperator(ch))
{
*strTokenName = ch;
return TOK_OPERATOR;
}
else if (EOF == ch)
return TOK_EOF;
else
{
*strTokenName += ch;
return TOK_ERR;
}
break;
case TOK_TOKEN:
*strTokenName += ch;
ch = getc(fii->fp);
if (IsLetter(ch) || IsDigit(ch) || ('_' == ch))
state = TOK_TOKEN;
else if (':' == ch)
return TOK_LABEL;
else
{
ungetc(ch, fii->fp);
nRetCode = strTokenName->CompareNoCase("indexof");
if (0 == nRetCode)
return TOK_INDEXOF;
nRetCode = IsRegister(
strTokenName->GetLength() + 1,
*strTokenName
);
if (nRetCode)
return TOK_REGISTER;
return TOK_TOKEN;
}
break;
case TOK_DIGIT_DEC:
*strTokenName += ch;
ch = getc(fii->fp);
if (IsDigit(ch))
state = TOK_DIGIT_DEC;
else if (('x' == ch) || ('X' == ch))
state = TOK_DIGIT_HEX;
else
{
ungetc(ch, fii->fp);
return TOK_DIGIT_DEC;
}
break;
case TOK_DIGIT_HEX:
*strTokenName += ch;
ch = getc(fii->fp);
if (IsHex(ch))
state = TOK_DIGIT_HEX;
else if (2 < strTokenName->GetLength())
{
ungetc(ch, fii->fp);
return TOK_DIGIT_HEX;
}
else
{
ungetc(ch, fii->fp);
return TOK_BAD_DIGIT_HEX;
}
break;
case TOK_LINECOMMENT:
ch = getc(fii->fp);
if (('\n' == ch) || (EOF == ch))
{
++fii->LineNum;
state = TOK_UNKNOWN;
}
break;
case TOK_COMMENT0:
ch = getc(fii->fp);
if ('/' == ch)
state = TOK_LINECOMMENT;
else if ('*' == ch)
state = TOK_BLOCKCOMMENT;
else
{
ungetc(ch, fii->fp);
Error(
ERR_BADBLOCKCMT,
*strTokenName,
fii->FileName,
fii->LineNum
);
return TOK_BAD_COMMENT;
}
break;
case TOK_BLOCKCOMMENT:
ch = getc(fii->fp);
if ('\n' == ch)
++fii->LineNum;
else if ('*' == ch)
{
ch = getc(fii->fp);
if ('/' == ch)
state = TOK_UNKNOWN;
else
ungetc(ch, fii->fp);
}
break;
case TOK_DQUOTE:
ch = getc(fii->fp);
if ('\n' == ch)
{
ungetc(ch, fii->fp);
return TOK_ERR;
}
else if (EOF == ch) return TOK_ERR;
else if ('"' == ch) return TOK_DQUOTE;
else if ('\\' == ch)
{
ch = getc(fii->fp);
if ('n' == ch)
*strTokenName += '\n';
else if ('r' == ch)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -