📄 lex.l
字号:
%{
/***********************************************************
Lex script of DCC
dusiqi 2007
***********************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "utility.h"
#include "syntax.h"
#include <string.h>
#include "yacc.h"
//to allow define a variable with the same name as his type
//but it seems so troublesome, no supprt to it this moment
bool all_id_flag = false;
//if support backslash-newline, all these code need modify
//even recreate.
static int check_type(const char *text);
static char *charmap(char *ch, int *val);
static char *parsestring(char *str, int len);
%}
letter [_a-zA-Z]
letnum [_a-zA-Z0-9]
decdigit [0-9]
octdigit [0-7]
hexdigit [a-fA-F0-9]
identifier {letter}{letnum}*
decint ([+-]?([1-9]{decdigit}*))
octint ([+-]?0{octdigit}*)
hexint ([+-]?0[xX]{hexdigit}+)
epnt ([Ee][+-]?{decdigit}+)
fnumber0 ([+-]?{decdigit}+{epnt}?)
fnumber1 ([+-]?{decdigit}*"\."{decdigit}+{epnt}?)
fnumber2 ([+-]?{decdigit}+"\."{decdigit}*{epnt}?)
singlechar '(\\.|[^\\'])+'
string \"(\\.|[^\\"])*\"
whitespace [ \t]+
/*Comments must be removed after cpp was executed*/
/*now I put cpp into lexer*/
%x comment preprocess
%%
"short" {return SHORT;}
"long" {return LONG;}
"unsigned" {return UNSIGNED;}
"int" {return INT;}
"float" {return FLOAT;}
"double" {return DOUBLE;}
"char" {return CHAR;}
"bool" {return BOOL;}
"true" {yylval.val.i = 1;return CBOOL;}
"false" {yylval.val.i = 0;return CBOOL;}
"void" {return VOID;}
"struct" {return STRUCT;}
"union" {return UNION;}
"enum" {return ENUM;}
"typedef" {return TYPEDEF;}
"static" {return STATIC;}
"extern" {return EXTERN;}
"auto" {return AUTO;}
"const" {return CONST;}
"register" {return REGISTER;}
"volatile" {return VOLATILE;}
"..." {return ELLIPSIS;}
"if" {return IF;}
"else" {return ELSE;}
"for" {return FOR;}
"while" {return WHILE;}
"do" {return DO;}
"switch" {return SWITCH;}
"case" {return CASE;}
"default" {return DEFAULT;}
"goto" {return GOTO;}
"continue" {return CONTINUE;}
"break" {return BREAK;}
"return" {return RETURN;}
"_asm" {return ASM;}
"." {return NTOMEM;}
"->" {return PTOMEM;}
"sizeof" {return SIZEOF;}
"&" {return '&';}
"|" {return '|';}
"^" {return '^';}
"~" {return '~';}
"++" {return INCR;}
"--" {return DECR;}
"+" {return '+';}
"-" {return '-';}
"*" {return '*';}
"/" {return '/';}
"%" {return '%';}
"<<" {return LSH;}
">>" {return RSH;}
"=" {return ASSIGN;}
"<" {return LES;}
"<=" {return LEQ;}
">" {return GTR;}
">=" {return GEQ;}
"==" {return EQ;}
"!=" {return UEQ;}
"&&" {return LAND;}
"||" {return LOR;}
"!" {return '!';}
"?" {return '?';}
":" {return ':';}
";" {return ';';}
"," {return ',';}
"(" {return '(';}
")" {return ')';}
"[" {return '[';}
"]" {return ']';}
"{" {return '{';}
"}" {return '}';}
"+=" {return ADDAS;}
"-=" {return SUBAS;}
"*=" {return MULAS;}
"/=" {return DIVAS;}
"%=" {return MODAS;}
"<<=" {return LSHAS;}
">>=" {return RSHAS;}
"^=" {return XORAS;}
"|=" {return ORAS;}
"&=" {return ANDAS;}
{identifier} {
int tt=check_type(yytext);
if(tt)return tt;
}
{decint} {
if(yytext[0] == '-'){sscanf(yytext, "%ld", &(yylval.val.i));return CINT;}
else {sscanf(yytext, "%lu", &(yylval.val.u)); return yylval.val.u>0x7FFFFFFF?CUINT:CINT;}
}
{octint} {
if(yytext[0] == '-'){sscanf(yytext, "%lo", &(yylval.val.i));yylval.val.i=-yylval.val.i;return CINT;}
else {sscanf(yytext, "%lo", &(yylval.val.u)); return yylval.val.u>0x7FFFFFFF?CUINT:CINT;}
}
{hexint} {
if(yytext[0] == '-'){sscanf(yytext, "%lx", &(yylval.val.i));yylval.val.i=-yylval.val.i;return CINT;}
else {sscanf(yytext, "%lx", &(yylval.val.u)); return yylval.val.u>0x7FFFFFFF?CUINT:CINT;}
}
{fnumber0}|{fnumber1}|{fnumber2} {
sscanf(yytext, "%lf", &(yylval.val.f));
return CFLOAT;
}
{singlechar} {
if(yytext[1]=='\\')charmap(yytext+2, &(yylval.val.i));
else yylval.val.i=yytext[1];
return CINT;
}
{string} {
yylval.val.s=parsestring(yytext, yyleng);
return STRING;
}
\n {lineno++;}
{whitespace} {}
"/*" {BEGIN(comment);}
<comment>\n {lineno++;}
<comment>"*/" {BEGIN(INITIAL);}
<comment>. {}
"//" {
int c;
while(c=input())
if(c=='\n')break;
lineno++;
}
. {
// genwarning(ERR_UNKNOWN_CHAR, NULL);
// return UNKNOWN;
}
%%
//return next pointer to character
static char* charmap(char *ch, int *val)
{
int index=8, tmp;
if(*ch=='x'){index=16;ch++;}
*val=0;
while(1)
{
if(*ch>='0'&&*ch<='9')tmp=*ch-'0';
else if(*ch>='a'&&(*ch)<='f')tmp=*ch-'a'+10;
else if(*ch>='A'&&(*ch)<='F')tmp=*ch-'A'+10;
else break;
*val=*val*index+tmp;
ch++;
}
if(*val!=0)return ch;
//Yes, there is a bug, but I assume that no one will be stupid like that
switch(*ch)
{
case 't':*val = '\t';break;
case 'n':*val = '\n';break;
case '\\':*val = '\\';break;
case 'r':*val = '\r';break;
case 'b':*val = '\b';break;
case '\"':*val = '\"';break;
case '\'':*val = '\'';break;
case 'f':*val = '\f';break;
case 'v':*val = '\v';break;
case '\?':*val = '\?';break;
case 'a':*val = '\a';break;
default:
genwarning(ERR_UNKNOWN_CHAR, ch);
*val = *ch;
}
return ++ch;
}
//return next pinter to character
static char *parsestring(char *str, int len)
{
char *tmp;
int i=0, val;
tmp = dmalloc(len, true);
str++;
while(*str)
{
if(*str == '\"')
{
tmp[i]=0;
break;
}
if(*str == '\\')
{
str = charmap(++str, &val);
tmp[i++]=val;
}
else
{
tmp[i++]=*str++;
}
}
return tmp;
}
int yywrap()
{
return 1;
}
static int check_type(const char *text)
{
//if it is a macro, expand it here
//faint, it could be a typename or a variable name
//so troublesome, we let typename first
//and you can't define a variable with the same name of his type
Type *type;
char *name;
HashNode *hashnode;
int len = strlen(text);
name = dmalloc(len+4, true);
sprintf(name, "_%s", text);
hashnode = lookup_node(typetab, TYPETABSIZE, name);
if(hashnode)
{
type = hashnode->val;
if(type->basic_type == bt_typedef)
{
yylval.type = type->subtype;
return USERTYPE;
}
}
yylval.val.s = name;
return IDENTIFIER;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -