📄 scanner.l

📁 一个面向对像语言的编译器
💻 L
字号:
/* * File:  scanner.l * ---------------- * Lex inupt file to generate the scanner for the compiler. */%{#include "scanner.h"#include "utility.h" // for PrintDebug()#include "errors.h"#include <string.h>#include "parser.h"#define TAB_SIZE 8/* Global variables * ---------------- * (For shame!) But we need a few to keep track of things that are * preserved between calls to yylex or used outside the scanner. */static int curLineNum, curColNum;static char curLine[512];static void DoBeforeEachAction(); #define YY_USER_ACTION DoBeforeEachAction();%}/* States * ------ * Our strategy for handling nested comments uses two lex states (N & C) * N = Normal (not inside a comment, we start in this state) * C = Comment (currently inside a comment) * Both are inclusive states (i.e. apply when explicitly named or none named) * Most rules will trigger when in normal mode, e.g. processing keywords * and identifiers only happens outside a comment. A few rules apply * when inside a comment (end-comment, EOF), and a few rules are used * in both states (ignoring whitespace, counting newlines, starting * another comment).  To track nesting depth, we turn on the stack option * so we can use lex's state stack. Each time we find a comment start, * we push a comment state, each time we find an end-comment, we pop. * Eventually this will return to the normal state in which we started. * (We could have also have tracked this with our own integer counter). * Another little wrinkle on states is the COPY exclusive state which * I added to first match each line and copy it to a saved buffer * before re-processing it. This allows us to print the entire line * to provide context on errors. */%option stack%s N C%x COPY/* Definitions * ----------- * To make our rules more readable, we establish some definitions here. */DIGIT             ([0-9])HEX_DIGIT         ([0-9a-fA-F])HEX_INTEGER       (0[Xx]{HEX_DIGIT}+)INTEGER           ({DIGIT}+)EXPONENT          ([Ee][-+]?{INTEGER})DOUBLE            ({INTEGER}"."{DIGIT}*{EXPONENT}?)BEG_STRING        (\"[^"\n]*)STRING            ({BEG_STRING}\")IDENTIFIER        ([a-zA-Z][a-zA-Z_0-9]*)OPERATOR          ([+\-*/%=<>\\.,;!()\[\]{}])BEG_COMMENT       ("/*")END_COMMENT       ("*/")SINGLE_COMMENT    ("//"[^\n]*)%%             /* BEGIN RULES SECTION */<COPY>.*               { strncpy(curLine, yytext, sizeof(curLine));                         curColNum = 1;                         yy_pop_state(); yyless(0); }<COPY><<EOF>>          { yy_pop_state();}<*>\n                  { curLineNum++; curColNum = 1;                         if (YYSTATE != COPY) yy_push_state(COPY); }[ ]+                   { /* ignore all spaces in normal or comment */  }[\t]                   { curColNum += TAB_SIZE - curColNum%TAB_SIZE + 1; } /* -------------------- Comments ----------------------------- */{BEG_COMMENT}          { yy_push_state(C); }<C>{END_COMMENT}       { yy_pop_state(); }<C><<EOF>>             { ReportError(&yylloc, err_unterm_comment);                         return 0; }<C>[^*\n\t/]*          { /* grab all non-star, non-slash, non-newline */}<C>.                   { /* ignore everything else that doesn't match */ }<N>{SINGLE_COMMENT}    { /* skip to end of line for // comment */ }  /* --------------------- Keywords ------------------------------- */<N>"void"              { return T_Void;        }<N>"int"               { return T_Int;         }<N>"double"            { return T_Double;      }<N>"bool"              { return T_Bool;        }<N>"string"            { return T_String;      }<N>"null"              { return T_Null;        }<N>"class"             { return T_Class;       }<N>"extends"           { return T_Extends;     }<N>"this"              { return T_This;        }<N>"while"             { return T_While;       }<N>"for"               { return T_For;         }<N>"if"                { return T_If;          }<N>"else"              { return T_Else;        }<N>"return"            { return T_Return;      }<N>"break"             { return T_Break;       }<N>"New"               { return T_New;         }<N>"NewArray"          { return T_NewArray;    }<N>"Print"             { return T_Print;       }<N>"ReadInteger"       { return T_ReadInteger; }<N>"ReadLine"          { return T_ReadLine;    } /* -------------------- Operators ----------------------------- */<N>"<="                { return T_LessEqual;   }<N>">="                { return T_GreaterEqual;}<N>"=="                { return T_Equal;       }<N>"!="                { return T_NotEqual;    }<N>"&&"                { return T_And;         }<N>"||"                { return T_Or;          }<N>{OPERATOR}          { return yytext[0];     } /* -------------------- Constants ------------------------------ */<N>"true"|"false"      { yylval.boolConstant = (yytext[0] == 't');                         return T_BoolConstant; }<N>{INTEGER}           { yylval.integerConstant = strtol(yytext, NULL, 10);                         return T_IntConstant; }<N>{HEX_INTEGER}       { yylval.integerConstant = strtol(yytext, NULL, 16);                         return T_IntConstant; }<N>{DOUBLE}            { yylval.doubleConstant = atof(yytext);                         return T_DoubleConstant; }<N>{STRING}            { yylval.stringConstant = strdup(yytext);                          return T_StringConstant; }<N>{BEG_STRING}        { ReportError(&yylloc, err_unterm_string, yytext); } /* -------------------- Identifiers --------------------------- */<N>{IDENTIFIER}        {                        /* The lexer records the identifier name in yylval.                         * The parser is reponsible for looking up the name                         * in the appropriate scope(s) to find the decl.                         */                         strncpy(yylval.identifier, yytext,                                 sizeof(yylval.identifier)-1);                         return T_Identifier; } /* -------------------- Default rule (error) -------------------- */<N>.                   { ReportError(&yylloc, err_unrecog_char, yytext[0]); }%%int yywrap(){	return 1;}/* * Function: Inityylex() * -------------------- * This function will be called before any calls to yylex().  It is designed * to give you an opportunity to do anything that must be done to initialize * the scanner (set global variables, configure starting state, etc.). One * thing it already does for you is assign the value of the global variable * yy_flex_debug that controls whether flex prints debugging information * about each token and what rule was matched. If set to false, no information * is printed. Setting it to true will give you a running trail that might * be helpful when debugging your scanner. Please be sure the variable is * set to false when submitting your final version. */void Inityylex(){    PrintDebug("lex", "Initializing scanner");    yy_flex_debug = false;    BEGIN(N);  // Start in Normal state    yy_push_state(COPY);  // but copy first line at start    curLineNum = 1;    curColNum = 1;}/* * Function: DoBeforeEachAction() * ------------------------------ * This function is installed as the YY_USER_ACTION. This is a place * to group code common to all actions. * On each match, we fill in the fields to record its location and * update our column counter. */static void DoBeforeEachAction(){   yylloc.first_line = curLineNum;   yylloc.first_column = curColNum;   yylloc.last_column = curColNum + yyleng - 1;   curColNum += yyleng;}/* Function: GetLineNumbered() * --------------------------- * Returns string with contents of line numbered n or NULL if the * contents of that line are no longer available. Basically only the * line currently being scanned is available, although we could keep * a few lines back if we put more effort into it :-). The pointer * returned is to an internally maintained static buffer which will * be overwritten. If you want to preserve, be sure to copy elsewhere. */const char *GetLineNumbered(int num) {    return (num == curLineNum) ? curLine : NULL;}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -