📄 lexerappendix.otx

📁 inger小型c编译器源码
💻 OTX
字号:
\chapter{\langname{} Lexical Analyzer Source} \label{appendix:lexersource}

    % Inger lexer source

	\section{parser.h}
	
	\scriptsize
	\begin{verbatim}
#ifndef PARSER_H
#define PARSER_H

/* Define where a line starts (at position 1) */
#define LINECOUNTBASE 1
/* Define the position of a first character of a line. */
#define CHARPOSBASE 1

/* This enum contains all the keywords and operators
   used in the language.
*/
enum
{
    /* Keywords */
    KW_BREAK            = 1000, /* "break" keyword */
    KW_CASE,                    /* "case" keyword */
    KW_CONTINUE,                /* "continue" keyword */
    KW_DEFAULT,                 /* "default" keyword */
    KW_DO,                      /* "do" keyword */
    KW_ELSE,                    /* "else" keyword */
    KW_FALSE,                   /* "false" keyword */
    KW_GOTO,                    /* "goto" keyword */
    KW_IF,                      /* "if" keyword */
    KW_LABEL,                   /* "label" keyword */
    KW_MODULE,                  /* "module" keyword */
    KW_RETURN,                  /* "return"keyword */
    KW_START,                   /* "start" keyword */
    KW_SWITCH,                  /* "switch" keyword */
    KW_TRUE,                    /* "true" keyword */
    KW_WHILE,                   /* "while" keyword */

    /* Type identifiers */
    KW_BOOL             = 2000, /* "bool" identifier */
    KW_CHAR,                    /* "char" identifier */
    KW_FLOAT,                   /* "float" identifier */
    KW_INT,                     /* "int" identifier */
    KW_POINTER,                 /* "pointer" identifier */
    KW_STRING,                  /* "string" identifier */
    KW_VOID,                    /* "void" identifier */

    /* Variable lexer tokens */
    CHAR                = 3000, /* character constant */
    FLOAT,                      /* floating point constant */
    IDENTIFIER,                 /* identifier */
    INT,                        /* integer constant */
    STRING,                     /* string constant */

    /* Operators */
    OP_ADD              = 4000, /* "+"  */
    OP_ASSIGN,                  /* "="  */
    OP_BITWISE_AND,             /* "&" */
    OP_BITWISE_COMPLEMENT,      /* "~"  */
    OP_BITWISE_LSHIFT,          /* "<<" */
    OP_BITWISE_OR,              /* "|" */
    OP_BITWISE_RSHIFT,          /* ">>" */
    OP_BITWISE_XOR,             /* "^"  */
    OP_DIVIDE,                  /* "/"  */
    OP_EQUAL,                   /* "==" */
    OP_GREATER,                 /* ">"  */
    OP_GREATEREQUAL,            /* ">=" */
    OP_LESS,                    /* "<"  */
    OP_LESSEQUAL,               /* "<=" */
    OP_LOGICAL_AND,             /* "&&" */
    OP_LOGICAL_OR,              /* "||" */
    OP_MODULUS,                 /* "%"  */
    OP_MULTIPLY,                /* "*"  */
    OP_NOT,                     /* "!"  */
    OP_NOTEQUAL,                /* "!=" */
    OP_SUBTRACT,                /* "-"  */
    OP_TERNARY_IF,              /* "?"  */
   
    /* Delimiters */
    ARROW               = 5000, /* "->" */
    LBRACE,                     /* "{"  */
    RBRACE,                     /* "}"  */
    LBRACKET,                   /* "["  */
    RBRACKET,                   /* "]"  */
    COLON,                      /* ":"  */
    COMMA,                      /* ","  */
    LPAREN,                     /* "("  */
    RPAREN,                     /* ")"  */
    SEMICOLON                   /* ";"  */
} tokens;

#endif
\end{verbatim}
\normalsize
	
	\section{lexer.l}

\scriptsize
\begin{verbatim}    
%{
/* Define DEBUG to receive debug info (if any) */
#define DEBUG

/* The token #defines are defined in parser.h. */
#include "parser.h"

/******************************************************
 * GLOBALS                                            *
 ******************************************************/

/* lineCount keeps track of the current line number
   in the source input file. */
int lineCount = 0;
/* charPos keeps track of the current character
   position on the current source input line. */
int charPos = 0;
/* commentsLevel keeps track of the current
   comment nesting level, in order to ignore nested
   comments properly. */
static int commentsLevel = 0;

%}

/*******************************************************
 * LEXER STATES                                        *
 *******************************************************/
/* Exclusive state in which the lexer ignores all input
   until a nested comment ends. */
%x STATE_COMMENTS
/* Exclusive state in which the lexer returns all input
   until a string terminates with a double quote. */
%x STATE_STRING


/*******************************************************
 * REGULAR EXPRESSIONS                                 *
 *******************************************************/
%%

 /********************************************************
  * KEYWORDS                                             *
  ********************************************************/

start                { return KW_START; }
true                 { return KW_TRUE; }
false                { return KW_FALSE; }
int                  { return KW_INT; }
bool                 { return KW_BOOL; }
string               { return KW_STRING; }
float                { return KW_FLOAT; }
pointer              { return KW_POINTER; }
char                 { return KW_CHAR; }
if                   { return KW_IF; }
else                 { return KW_ELSE; }
do                   { return KW_DO; }
while                { return KW_WHILE; }
goto_considered_harmful   { return KW_GOTO; }
label                { return KW_LABEL; }
switch               { return KW_SWITCH; }
case                 { return KW_CASE; }
default              { return KW_DEFAULT; }
break                { return KW_BREAK; }
module               { return KW_MODULE; }
void                 { return KW_VOID; }
return               { return KW_RETURN; }

 /********************************************************
  *  OPERATORS                                           *
  ********************************************************/

"->"                 { return ARROW; }
"=="                 { return OP_EQUAL; }
"!="                 { return OP_NOTEQUAL; }
"&&"                 { return OP_LOGICAL_AND; }
"||"                 { return OP_LOGICAL_OR; }
">="                 { return OP_GREATEREQUAL; }
"<="                 { return OP_LESSEQUAL; }
"<<"                 { return OP_BITWISE_LSHIFT; }
">>"                 { return OP_BITWISE_RSHIFT; }
"+"                  { return OP_ADD; }
"-"                  { return OP_SUBTRACT; }
"*"                  { return OP_MULTIPLY; }
"/"                  { return OP_DIVIDE; }
"!"                  { return OP_NOT; }
"~"                  { return OP_BITWISE_COMPLEMENT; }
"%"                  { return OP_MODULUS; }
"="                  { return OP_ASSIGN; }
">"                  { return OP_GREATER; }
"<"                  { return OP_LESS; }
"&"                  { return OP_BITWISE_AND; }
"|"                  { return OP_BITWISE_OR; }
"^"                  { return OP_BITWISE_XOR; }
"?"                  { return OP_TERNARY_IF; }

 /********************************************************
  * DELIMITERS                                           *
  ********************************************************/

"("                  { return LPAREN; }
")"                  { return RPAREN; }
"["                  { return LBRACKET; }
"]"                  { return RBRACKET; }
":"                  { return COLON; }
";"                  { return SEMICOLON; }
"{"                  { return LBRACE; }
"}"                  { return RBRACE; }
","                  { return COMMA; }
 
 /********************************************************
  * VALUE TOKENS                                         *
  ********************************************************/

[0-9]+                        { /* integer constant */
                                return INT;  }
[_A-Za-z]+[_A-Za-z0-9]*       { /* identifier */
                                return IDENTIFIER;  }
[0-9]*\.[0-9]+                { /* floating point constant */
                                return FLOAT;  }
[0-9]*\.[0-9]+[eE][\+-][0-9]+ { /* float, scientific notation */
                                return FLOAT;  }
\'.\'                         { /* single character */
                                return CHAR;  }

 /********************************************************
  * STRINGS                                              *
  ********************************************************/

\"                   { ++charPos; 
                       /* begin of string */
                       BEGIN STATE_STRING;  
                     }
<STATE_STRING>\"     { BEGIN 0; 
                       /* end of string */  
                       return STRING;  
                     }
<STATE_STRING>\n     { charPos = 0; 
                       ++lineCount; 
                       printf( "Unterminated string\n" ); 
                       /* newline in string */ 
                     }
<STATE_STRING>.      { /* add character to string */
                       ++charPos; }
<STATE_STRING>\\\"   { /* add newline to string */
                       charPos += 2; }

 /********************************************************
  * LINE COMMENTS                                        *
  ********************************************************/

"//"[^\n]*           { /* ignore comment lines */ }

 /********************************************************
  * BLOCK COMMENTS                                       *
  ********************************************************/

"/*"                 { charPos += 2; 
                       ++commentsLevel; 
                       /* begin of comments */ 
                       BEGIN STATE_COMMENTS; 
                     }
<STATE_COMMENTS>"/*" { charPos += 2; 
                       /* begin of deeper nested comments */
                       ++commentsLevel;  
                     }
<STATE_COMMENTS>.    { /* ignore all characters */
                       ++charPos;  }
<STATE_COMMENTS>\n   { charPos = 0; 
                       ++lineCount; /* ignore newlines */ 
                     }
<STATE_COMMENTS>"*/" { charPos += 2; 
                       /* end of comments*/
                       if ( --commentsLevel == 0 ) BEGIN 0;  
                     }
  
 /********************************************************
  * WHITESPACE                                           *
  ********************************************************/

[\t ]                { ++charPos; /* ignore whitespaces */ }
\n                   { ++lineCount; 
                       charPos = 0; /* ignored newlines */ 
                     }
.                    { /* unmatched character */
                       return yytext[0];  }

%%


 /********************************************************
  * ADDITIONAL VERBATIM C CODE                           *
  ********************************************************/

\end{verbatim}  
\normalsize
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -