📄 lexer.cs
字号:
{
m_reader = reader;
// Set line number info
m_stFilenameHint = stFilenameHint;
m_row = 1;
m_col = 1;
m_fStartOfLine = true;
m_fIsErrorMode = false;
InitPreprocessor(stDefines);
}
#endregion
#region Static Construction
// Fill out keyword hash. We only need one copy for all the lexers
static Lexer()
{
m_keywords["return"] = Token.Type.cReturn;
m_keywords["class"] = Token.Type.cClass;
m_keywords["interface"] = Token.Type.cInterface;
m_keywords["struct"] = Token.Type.cStruct;
m_keywords["enum"] = Token.Type.cEnum;
m_keywords["delegate"] = Token.Type.cDelegate;
m_keywords["event"] = Token.Type.cEvent;
m_keywords["operator"] = Token.Type.cOperator;
m_keywords["new"] = Token.Type.cNew;
m_keywords["if"] = Token.Type.cIf;
m_keywords["switch"] = Token.Type.cSwitch;
m_keywords["else"] = Token.Type.cElse;
m_keywords["using"] = Token.Type.cUsing;
m_keywords["namespace"] = Token.Type.cNamespace;
m_keywords["out"] = Token.Type.cOut;
m_keywords["ref"] = Token.Type.cRef;
m_keywords["params"] = Token.Type.cParams;
m_keywords["get"] = Token.Type.cGet;
m_keywords["set"] = Token.Type.cSet;
//m_keywords["value"] = Token.Type.cValue;
m_keywords["do"] = Token.Type.cDo;
m_keywords["while"] = Token.Type.cWhile;
m_keywords["for"] = Token.Type.cFor;
m_keywords["foreach"] = Token.Type.cForEach;
m_keywords["in"] = Token.Type.cIn;
m_keywords["goto"] = Token.Type.cGoto;
m_keywords["break"] = Token.Type.cBreak;
m_keywords["continue"] = Token.Type.cContinue;
m_keywords["default"] = Token.Type.cDefault;
m_keywords["case"] = Token.Type.cCase;
m_keywords["is"] = Token.Type.cIs;
m_keywords["as"] = Token.Type.cAs;
m_keywords["try"] = Token.Type.cTry;
m_keywords["catch"] = Token.Type.cCatch;
m_keywords["finally"] = Token.Type.cFinally;
m_keywords["throw"] = Token.Type.cThrow;
// Literal keywords
m_keywords["true"] = Token.Type.cBool;
m_keywords["false"] = Token.Type.cBool;
m_keywords["null"] = Token.Type.cNull;
// Modifiers
m_keywords["public"] = Token.Type.cAttrPublic;
m_keywords["private"] = Token.Type.cAttrPrivate;
m_keywords["protected"] = Token.Type.cAttrProtected;
m_keywords["static"] = Token.Type.cAttrStatic;
m_keywords["virtual"] = Token.Type.cAttrVirtual;
m_keywords["abstract"] = Token.Type.cAttrAbstract;
m_keywords["override"] = Token.Type.cAttrOverride;
m_keywords["internal"] = Token.Type.cAttrInternal;
m_keywords["sealed"] = Token.Type.cAttrSealed;
m_keywords["readonly"] = Token.Type.cAttrReadOnly;
m_keywords["const"] = Token.Type.cAttrConst;
m_keywords["typeof"] = Token.Type.cTypeOf;
// Preprocessor directives
m_keywords["#if"] = Token.Type.cPP_If;
m_keywords["#elif"] = Token.Type.cPP_ElseIf;
m_keywords["#else"] = Token.Type.cPP_Else;
m_keywords["#endif"] = Token.Type.cPP_Endif;
m_keywords["#define"] = Token.Type.cPP_Define;
m_keywords["#undef"] = Token.Type.cPP_Undef;
m_keywords["#region"] = Token.Type.cPP_Region;
m_keywords["#endregion"] = Token.Type.cPP_EndRegion;
}
// If we find an identifier, we lookup in this table to see
// if it's actually a keyword. If so, return the keyword (else return the id)
protected static Hashtable m_keywords = new Hashtable();
#endregion
// Are we in error mode (in which case we always return EOF)
bool m_fIsErrorMode;
string m_stFilenameHint;
#region Errors
// Error codes. Mostly from preprocessor / bad EOF
internal enum ErrorCode
{
cUnmatchedEndRegion, // Missing a #region for this #endregion
cMissingEndifBeforeEOF,
cUnterminatedComment,
cPreProcDirMustBeAtStartOfLine,
cInvalidPreProcDir,
cUnterminatedChar,
cNoNewlineInString,
cUnexpectedEOF,
cUnrecognizedEscapeSequence,
}
// Main error hub for lexer
internal void ThrowError(LexerException e)
{
Blue.Driver.StdErrorLog.ThrowError(e);
}
// We have a #region, but no matching #endregion
LexerException E_MissingEndRegion()
{
return new LexerException(ErrorCode.cUnmatchedEndRegion, CalcCurFileRange(), "Missing a #region for this #endregion.");
}
LexerException E_MissingEndifBeforeEOF()
{
return new LexerException(ErrorCode.cMissingEndifBeforeEOF, CalcCurFileRange(),
"Expected #endif before end-of-file.");
}
LexerException E_UnterminatedComment()
{
return new LexerException(ErrorCode.cUnterminatedComment, CalcCurFileRange(),
"Must terminate multi-line comment with '*/' before end-of-file.");
}
LexerException E_PreProcDirMustBeAtStartOfLine()
{
return new LexerException(ErrorCode.cPreProcDirMustBeAtStartOfLine, CalcCurFileRange(),
"Preprocessor directives must be the first non-whitespace token in a line.");
}
LexerException E_InvalidPreProcDir(string stHint)
{
return new LexerException(ErrorCode.cInvalidPreProcDir, CalcCurFileRange(),
"'" + stHint + "' is not a valid preprocessor directive.");
}
LexerException E_UnterminatedChar()
{
return new LexerException(ErrorCode.cUnterminatedChar, CalcCurFileRange(),
"Unterminated character constant.");
}
LexerException E_NoNewlineInString()
{
return new LexerException(ErrorCode.cNoNewlineInString, CalcCurFileRange(),
"Can not have a newline in a string.");
}
LexerException E_UnexpectedEOF()
{
return new LexerException(ErrorCode.cUnexpectedEOF, CalcCurFileRange(),
"Unexpected EOF.");
}
LexerException E_UnrecognizedEscapeSequence(char ch)
{
return new LexerException(ErrorCode.cUnrecognizedEscapeSequence, CalcCurFileRange(),
"Unrecognized escape sequence '\\" + ch + "'.");
}
#endregion
#region Data for stream
// The lexer is really just a high level wrapper around the TextReader
protected TextReader m_reader;
// Used to track where in the file we are
int m_row;
int m_col;
bool m_fStartOfLine; // are we the first token on a new line?
// Wrappers around the TextReader to track line number info
int Read()
{
int iCh = m_reader.Read();
m_col++;
if (iCh == '\n') {
m_row++;
m_col = 1;
m_fStartOfLine = true;
}
return iCh;
}
int Peek()
{
return m_reader.Peek();
}
string ReadLine()
{
// Reading a line will consume a '\n', thus bump us up.
m_row++;
m_col = 1;
m_fStartOfLine = true;
return m_reader.ReadLine();
}
// Cache this at the beginning of a lexeme
protected CursorPos m_StartPos;
protected FileRange CalcCurFileRange()
{
FileRange r = new FileRange();
r.Filename = this.m_stFilenameHint;
r.ColStart = m_StartPos.col;
r.RowStart = m_StartPos.row;
r.ColEnd = m_col;
r.RowEnd = m_row;
return r;
}
#endregion
#region Public Interface Methods
// Get
public Token GetNextToken()
{
if (m_tknNext != null)
{
Token t = m_tknNext;
m_tknNext = null;
return t;
}
return SafeGetNextToken();
}
// Peek
public Token PeekNextToken()
{
if (m_tknNext == null)
m_tknNext = SafeGetNextToken();
return m_tknNext;
}
// For peeking, we remember the next token.
protected Token m_tknNext = null;
// Safe wrapper around GetNextToken
// Catch exceptions and convert them to Error tokens
private Token SafeGetNextToken()
{
// Once in error mode, we stay in error mode.
Token t = null;
if (!m_fIsErrorMode)
{
try {
// Do the real work.
t = GetNextToken_PreprocessorFilter();
}
catch(ManualParser.LexerException)
{
m_fIsErrorMode = true;
t = null;
}
}
if (t == null)
return new Token(Token.Type.cEOF, CalcCurFileRange());
return t;
}
#endregion
#region Helper Functions
// Helper funcs
public static bool IsWhitespace(int iCh)
{
return iCh == 0x20 || iCh == '\t' || iCh == '\n' || iCh == '\r';
}
public static bool IsDigit(int iCh)
{
return iCh >= '0' && iCh <= '9';
}
// Return -1 if not a hex digit, else return 0..15
public static int AsHexDigit(int iCh)
{
if (iCh >= '0' && iCh <= '9')
return iCh - '0';
if (iCh >= 'A' && iCh <= 'F')
return iCh - 'A' + 10;
if (iCh >= 'a' && iCh <= 'f')
return iCh - 'a' + 10;
return -1;
}
public static bool IsFirstIdChar(int iCh)
{
return (iCh == '_') || (iCh >= 'a' && iCh <= 'z') || (iCh >= 'A' && iCh <= 'Z');
}
public static bool IsIdChar(int iCh)
{
return IsFirstIdChar(iCh) || IsDigit(iCh);
}
#endregion
#region Preprocessor Layer
//-----------------------------------------------------------------------------
// The preprocessor works as a middle layer around GetNextTokenWorker()
// If manages a small symbol table (for #define / #undef) as well
// as conditionals (#if,#elif,#else,#endif) and strips away #region/#endregion
// Most of the errors that can occur in the lexer are in the preprocessor
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// Construction. Supply an optional list of predefined symbols
//-----------------------------------------------------------------------------
protected void InitPreprocessor(string [] stDefines)
{
m_tblPreprocSymbols = new Hashtable();
// Always add this as a predefined symbol
AddSymbol("__BLUE__");
if (stDefines != null)
{
foreach(string s in stDefines)
AddSymbol(s);
}
}
#region Preprocessor Filter
//-----------------------------------------------------------------------------
// When we're skipping over text (in a false branch of an #if), the text
// doesn't have to lex properly. But we still have to recognize nested #if,
// and the closing #endif, as well as an EOF.
// So we have a modified lexer to lexer stuff in dead code.
// Note that this lexer must preserve the expression after an #elsif
// This lexer is also #if..#endif nest aware
//-----------------------------------------------------------------------------
protected Token.Type GetNextDeadToken()
{
int iRowBefore = m_row;
int cIfDepth = 0;
string st;
do
{
// Does this line start with a preprocessor directive?
// If so, handle it delicately so that we can read the expression afterwards
SkipWhiteSpace();
#if true
int iCh;
do {
iCh = Read();
} while (iCh == '\n');
// Skip past opening whitespace
while(iCh == ' ' || iCh == '\t')
iCh = Read();
if (iCh == '#')
{
// Note that we don't want to call GetNextTokenWorker() because
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -