📄 lexerappendix.otx
字号:
\chapter{\langname{} Lexical Analyzer Source} \label{appendix:lexersource}
% Inger lexer source
\section{parser.h}
\scriptsize
\begin{verbatim}
#ifndef PARSER_H
#define PARSER_H
/* Define where a line starts (at position 1) */
#define LINECOUNTBASE 1
/* Define the position of a first character of a line. */
#define CHARPOSBASE 1
/* This enum contains all the keywords and operators
used in the language.
*/
enum
{
/* Keywords */
KW_BREAK = 1000, /* "break" keyword */
KW_CASE, /* "case" keyword */
KW_CONTINUE, /* "continue" keyword */
KW_DEFAULT, /* "default" keyword */
KW_DO, /* "do" keyword */
KW_ELSE, /* "else" keyword */
KW_FALSE, /* "false" keyword */
KW_GOTO, /* "goto" keyword */
KW_IF, /* "if" keyword */
KW_LABEL, /* "label" keyword */
KW_MODULE, /* "module" keyword */
KW_RETURN, /* "return"keyword */
KW_START, /* "start" keyword */
KW_SWITCH, /* "switch" keyword */
KW_TRUE, /* "true" keyword */
KW_WHILE, /* "while" keyword */
/* Type identifiers */
KW_BOOL = 2000, /* "bool" identifier */
KW_CHAR, /* "char" identifier */
KW_FLOAT, /* "float" identifier */
KW_INT, /* "int" identifier */
KW_POINTER, /* "pointer" identifier */
KW_STRING, /* "string" identifier */
KW_VOID, /* "void" identifier */
/* Variable lexer tokens */
CHAR = 3000, /* character constant */
FLOAT, /* floating point constant */
IDENTIFIER, /* identifier */
INT, /* integer constant */
STRING, /* string constant */
/* Operators */
OP_ADD = 4000, /* "+" */
OP_ASSIGN, /* "=" */
OP_BITWISE_AND, /* "&" */
OP_BITWISE_COMPLEMENT, /* "~" */
OP_BITWISE_LSHIFT, /* "<<" */
OP_BITWISE_OR, /* "|" */
OP_BITWISE_RSHIFT, /* ">>" */
OP_BITWISE_XOR, /* "^" */
OP_DIVIDE, /* "/" */
OP_EQUAL, /* "==" */
OP_GREATER, /* ">" */
OP_GREATEREQUAL, /* ">=" */
OP_LESS, /* "<" */
OP_LESSEQUAL, /* "<=" */
OP_LOGICAL_AND, /* "&&" */
OP_LOGICAL_OR, /* "||" */
OP_MODULUS, /* "%" */
OP_MULTIPLY, /* "*" */
OP_NOT, /* "!" */
OP_NOTEQUAL, /* "!=" */
OP_SUBTRACT, /* "-" */
OP_TERNARY_IF, /* "?" */
/* Delimiters */
ARROW = 5000, /* "->" */
LBRACE, /* "{" */
RBRACE, /* "}" */
LBRACKET, /* "[" */
RBRACKET, /* "]" */
COLON, /* ":" */
COMMA, /* "," */
LPAREN, /* "(" */
RPAREN, /* ")" */
SEMICOLON /* ";" */
} tokens;
#endif
\end{verbatim}
\normalsize
\section{lexer.l}
\scriptsize
\begin{verbatim}
%{
/* Define DEBUG to receive debug info (if any) */
#define DEBUG
/* The token #defines are defined in parser.h. */
#include "parser.h"
/******************************************************
* GLOBALS *
******************************************************/
/* lineCount keeps track of the current line number
in the source input file. */
int lineCount = 0;
/* charPos keeps track of the current character
position on the current source input line. */
int charPos = 0;
/* commentsLevel keeps track of the current
comment nesting level, in order to ignore nested
comments properly. */
static int commentsLevel = 0;
%}
/*******************************************************
* LEXER STATES *
*******************************************************/
/* Exclusive state in which the lexer ignores all input
until a nested comment ends. */
%x STATE_COMMENTS
/* Exclusive state in which the lexer returns all input
until a string terminates with a double quote. */
%x STATE_STRING
/*******************************************************
* REGULAR EXPRESSIONS *
*******************************************************/
%%
/********************************************************
* KEYWORDS *
********************************************************/
start { return KW_START; }
true { return KW_TRUE; }
false { return KW_FALSE; }
int { return KW_INT; }
bool { return KW_BOOL; }
string { return KW_STRING; }
float { return KW_FLOAT; }
pointer { return KW_POINTER; }
char { return KW_CHAR; }
if { return KW_IF; }
else { return KW_ELSE; }
do { return KW_DO; }
while { return KW_WHILE; }
goto_considered_harmful { return KW_GOTO; }
label { return KW_LABEL; }
switch { return KW_SWITCH; }
case { return KW_CASE; }
default { return KW_DEFAULT; }
break { return KW_BREAK; }
module { return KW_MODULE; }
void { return KW_VOID; }
return { return KW_RETURN; }
/********************************************************
* OPERATORS *
********************************************************/
"->" { return ARROW; }
"==" { return OP_EQUAL; }
"!=" { return OP_NOTEQUAL; }
"&&" { return OP_LOGICAL_AND; }
"||" { return OP_LOGICAL_OR; }
">=" { return OP_GREATEREQUAL; }
"<=" { return OP_LESSEQUAL; }
"<<" { return OP_BITWISE_LSHIFT; }
">>" { return OP_BITWISE_RSHIFT; }
"+" { return OP_ADD; }
"-" { return OP_SUBTRACT; }
"*" { return OP_MULTIPLY; }
"/" { return OP_DIVIDE; }
"!" { return OP_NOT; }
"~" { return OP_BITWISE_COMPLEMENT; }
"%" { return OP_MODULUS; }
"=" { return OP_ASSIGN; }
">" { return OP_GREATER; }
"<" { return OP_LESS; }
"&" { return OP_BITWISE_AND; }
"|" { return OP_BITWISE_OR; }
"^" { return OP_BITWISE_XOR; }
"?" { return OP_TERNARY_IF; }
/********************************************************
* DELIMITERS *
********************************************************/
"(" { return LPAREN; }
")" { return RPAREN; }
"[" { return LBRACKET; }
"]" { return RBRACKET; }
":" { return COLON; }
";" { return SEMICOLON; }
"{" { return LBRACE; }
"}" { return RBRACE; }
"," { return COMMA; }
/********************************************************
* VALUE TOKENS *
********************************************************/
[0-9]+ { /* integer constant */
return INT; }
[_A-Za-z]+[_A-Za-z0-9]* { /* identifier */
return IDENTIFIER; }
[0-9]*\.[0-9]+ { /* floating point constant */
return FLOAT; }
[0-9]*\.[0-9]+[eE][\+-][0-9]+ { /* float, scientific notation */
return FLOAT; }
\'.\' { /* single character */
return CHAR; }
/********************************************************
* STRINGS *
********************************************************/
\" { ++charPos;
/* begin of string */
BEGIN STATE_STRING;
}
<STATE_STRING>\" { BEGIN 0;
/* end of string */
return STRING;
}
<STATE_STRING>\n { charPos = 0;
++lineCount;
printf( "Unterminated string\n" );
/* newline in string */
}
<STATE_STRING>. { /* add character to string */
++charPos; }
<STATE_STRING>\\\" { /* add newline to string */
charPos += 2; }
/********************************************************
* LINE COMMENTS *
********************************************************/
"//"[^\n]* { /* ignore comment lines */ }
/********************************************************
* BLOCK COMMENTS *
********************************************************/
"/*" { charPos += 2;
++commentsLevel;
/* begin of comments */
BEGIN STATE_COMMENTS;
}
<STATE_COMMENTS>"/*" { charPos += 2;
/* begin of deeper nested comments */
++commentsLevel;
}
<STATE_COMMENTS>. { /* ignore all characters */
++charPos; }
<STATE_COMMENTS>\n { charPos = 0;
++lineCount; /* ignore newlines */
}
<STATE_COMMENTS>"*/" { charPos += 2;
/* end of comments*/
if ( --commentsLevel == 0 ) BEGIN 0;
}
/********************************************************
* WHITESPACE *
********************************************************/
[\t ] { ++charPos; /* ignore whitespaces */ }
\n { ++lineCount;
charPos = 0; /* ignored newlines */
}
. { /* unmatched character */
return yytext[0]; }
%%
/********************************************************
* ADDITIONAL VERBATIM C CODE *
********************************************************/
\end{verbatim}
\normalsize
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -