📄 lex.l

📁 一个编译器修改的例子
💻 L
字号:
%{
#include "grammar.tab.h"
#include "error.h"
#include "io.h"
#include "lex.h"
#include "salloc.h"
#include "token.h"

#define MAX_INCLUDE_DEPTH 32

unsigned		current_line	= 1;
static unsigned		comment_level	= 0, comment_start;
static unsigned		include_level	= 0;

static struct
{
    unsigned		current_line;
    const char		*current_file;
    YY_BUFFER_STATE	buffer_state;
}
include_stack [MAX_INCLUDE_DEPTH];

static void	new_input();

#if defined __GNUC__ /* prevent some compiler warnings */
#if !(defined __OpenBSD__ || defined __FreeBSD__) /* fix compile error on OpenBSD */
extern int	fileno(FILE *);
#endif
static void	yyunput(int c, register char *yy_bp) __attribute__((unused));
#endif
%}

IDENTIFIER	[A-Za-z_][A-Za-z_0-9]*

DEC_CONST	[1-9][0-9]*
HEX_CONST	0[Xx][0-9A-Fa-f]+
OCT_CONST	0[0-7]*
INTEGER_CONST	{DEC_CONST}|{HEX_CONST}|{OCT_CONST}

FINT		[0-9]+
FRAC		\.{FINT}
EXP		[Ee][+-]?{FINT}
REAL_CONST	{FINT}?{FRAC}{EXP}?|{FINT}(\.|{EXP})

 /* Matches both ternminated and unterminated character/string constants */
CHAR_CONST		\'([^'\n\\]|\\[^\n])*\'
UNTERM_CHAR_CONST	\'([^'\n\\]|\\[^\n])*
STRING_CONST		\"([^"\n\\]|\\[^\n])*\"
UNTERM_STRING_CONST	\"([^"\n\\]|\\[^\n])*

%x		COMMENT
%x		INCLUDE

%%

[ \t\r]+
\n		++ current_line;

"(*"		comment_level = 1; comment_start = current_line; BEGIN(COMMENT);

<COMMENT>
{
    "(*"	++ comment_level;
    "*)"	if (-- comment_level == 0) BEGIN(INITIAL);
    "\n"	++ current_line;
    .
}

^include	BEGIN(INCLUDE);

<INCLUDE>
{
    [ \t]+
    [^ \t\n\r]+	new_input();
}

":="		return ASSIGNMENT;
">="		yylval.token = new_token(); return GREATER_EQUAL;
"<>"		yylval.token = new_token(); return NOT_EQUAL;
"<="		yylval.token = new_token(); return SMALLER_EQUAL;

"and"		yylval.token = new_token(); return AND;
"array"		return ARRAY;
"begin"		return _BEGIN;
"bool"		return BOOL;
"by"		return BY;
"char"		return CHAR;
"delete"	return DELETE;
"do"		return DO;
"else"		return ELSE;
"end"		return END;
"false"		yylval.token = new_token(); return FALSE;
"for"		return FOR;
"function"	return FUNCTION;
"if"		return IF;
"int"		return INT;
"is"		return IS;
"new"		yylval.token = new_token(); return NEW;
"not"		yylval.token = new_token(); return NOT;
"null"		yylval.token = new_token(); return __NULL;
"of"		return OF;
"or"		yylval.token = new_token(); return OR;
"procedure"	return PROCEDURE;
"real"		return REAL;
"repeat"	return REPEAT;
"return"	yylval.token = new_token(); return RETURN;
"size"		yylval.token = new_token(); return SIZE;
"string"	return STRING;
"then"		return THEN;
"to"		return TO;
"true"		yylval.token = new_token(); return TRUE;
"until"		return UNTIL;
"var"		return VAR;
"while"		return WHILE;
"record"        return RECORD; /* add : new token record */

{CHAR_CONST}		yylval.token = new_token(); return CHAR_CONSTANT;
{UNTERM_CHAR_CONST}	yylval.token = new_token(); error(0, "unterminated character constant"); return CHAR_CONSTANT;
{STRING_CONST}		yylval.token = new_token(); return STRING_CONSTANT;
{UNTERM_STRING_CONST}	yylval.token = new_token(); error(0, "unterminated string constant"); return STRING_CONSTANT;
{IDENTIFIER}	yylval.token = new_token(); return IDENTIFIER;
{INTEGER_CONST}	yylval.token = new_token(); return INTEGER_CONSTANT;
{REAL_CONST}	yylval.token = new_token(); return REAL_CONSTANT;

[\+\-\*\%\/\<\>\=\[]	yylval.token = new_token(); return *yytext;
.		return * (unsigned char *) yytext;

%%

int yywrap(void)
{
    if (YYSTATE == COMMENT) {
	error(0, "unterminated comment starting at line %u", comment_start);
	BEGIN(INITIAL);
    }

    yy_delete_buffer(YY_CURRENT_BUFFER);
    fclose(yyin);

    if (include_level == 0) {
        yytext = "";
	return 1;
    }

    /* The input_file_name is only allocated for include files, and not for
       the original input file. Therefore this call to free is after the
       check on the include_level. */
    free((char *) input_file_name);

    -- include_level;
    yy_switch_to_buffer(include_stack [include_level].buffer_state);
    current_line      = include_stack [include_level].current_line;
    input_file_name   = include_stack [include_level].current_file;
    return 0;
}


static void try_search_path()
{
    unsigned i;

    for (i = 0; i < list_size(search_path); i ++)
    {
	char path [1024];

	sprintf(path, "%s/%s", (char *) list_index(search_path, i), yytext);

	if ((yyin = fopen(path, "r")) != 0)
	    return;
    }

    error(0, "cannot open %s", yytext);
    exit(1);
}


static void new_input()
{
    if (include_level == MAX_INCLUDE_DEPTH)
    {
	error(0, "includes nested too deeply");
	exit(1);
    }

    include_stack [include_level].buffer_state = YY_CURRENT_BUFFER;
    include_stack [include_level].current_line = current_line;
    include_stack [include_level].current_file = input_file_name;
    ++ include_level;
    try_search_path();
    input_file_name = safe_strdup(yytext);
    current_line = 1;
    yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
    BEGIN(INITIAL);
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -