⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lex.l

📁 用C++编写的一个编译器
💻 L
字号:
%{
/***********************************************************
 Lex script of DCC
 dusiqi 2007
***********************************************************/

#include <stdio.h>
#include <stdlib.h>
#include "utility.h"
#include "syntax.h"
#include <string.h>
#include "yacc.h"

//to allow define a variable with the same name as his type
//but it seems so troublesome, no supprt to it this moment
bool all_id_flag = false;

//if support backslash-newline, all these code need modify
//even recreate.

static int check_type(const char *text);
static char *charmap(char *ch, int *val);
static char *parsestring(char *str, int len);
%}

letter      [_a-zA-Z]
letnum		[_a-zA-Z0-9]
decdigit    [0-9]
octdigit	[0-7]
hexdigit	[a-fA-F0-9]

identifier  {letter}{letnum}*
decint      ([+-]?([1-9]{decdigit}*))
octint		([+-]?0{octdigit}*)
hexint		([+-]?0[xX]{hexdigit}+)
epnt		([Ee][+-]?{decdigit}+)

fnumber0	([+-]?{decdigit}+{epnt}?)
fnumber1	([+-]?{decdigit}*"\."{decdigit}+{epnt}?)
fnumber2	([+-]?{decdigit}+"\."{decdigit}*{epnt}?)

singlechar	'(\\.|[^\\'])+'
string		\"(\\.|[^\\"])*\"

whitespace  [ \t]+

/*Comments must be removed after cpp was executed*/ 
/*now I put cpp into lexer*/
%x comment preprocess
%%
"short"			{return SHORT;}
"long"			{return LONG;}
"unsigned"		{return UNSIGNED;}
"int"           {return INT;}
"float"         {return FLOAT;}
"double"		{return DOUBLE;}
"char"          {return CHAR;}
"bool"			{return BOOL;}
"true"			{yylval.val.i = 1;return CBOOL;}
"false"			{yylval.val.i = 0;return CBOOL;}
"void"          {return VOID;}
"struct"		{return STRUCT;}
"union"			{return UNION;}
"enum"			{return ENUM;}
"typedef"		{return TYPEDEF;}

"static"		{return STATIC;}
"extern"		{return EXTERN;}
"auto"			{return	AUTO;}
"const"			{return CONST;}
"register"		{return REGISTER;}
"volatile"		{return VOLATILE;}
"..."			{return ELLIPSIS;}

"if"            {return IF;}
"else"          {return ELSE;}
"for"			{return FOR;}
"while"         {return WHILE;}
"do"			{return DO;}
"switch"		{return SWITCH;}
"case"			{return CASE;}
"default"		{return DEFAULT;}
"goto"			{return GOTO;}
"continue"      {return CONTINUE;}
"break"         {return BREAK;}
"return"        {return RETURN;}
"_asm"			{return ASM;}

"."				{return NTOMEM;}
"->"			{return PTOMEM;}
"sizeof"		{return SIZEOF;}
"&"				{return '&';}
"|"				{return '|';}
"^"				{return '^';}
"~"				{return '~';}
"++"			{return INCR;}
"--"			{return DECR;}

"+"             {return '+';}
"-"             {return '-';}
"*"             {return '*';}
"/"             {return '/';}
"%"				{return '%';}
"<<"			{return LSH;}
">>"			{return RSH;}
"="             {return ASSIGN;}
"<"             {return LES;}
"<="			{return LEQ;}
">"             {return GTR;}
">="			{return GEQ;}
"=="            {return EQ;}
"!="			{return UEQ;}
"&&"            {return LAND;}
"||"            {return LOR;}
"!"             {return '!';}
"?"				{return '?';}
":"				{return ':';}
";"             {return ';';}
","             {return ',';}
"("             {return '(';}
")"             {return ')';}
"["             {return '[';}
"]"             {return ']';}
"{"             {return '{';}
"}"             {return '}';}

"+="			{return ADDAS;}
"-="			{return SUBAS;}
"*="			{return MULAS;}
"/="			{return DIVAS;}
"%="			{return MODAS;}
"<<="			{return LSHAS;}
">>="			{return RSHAS;}

"^="			{return XORAS;}
"|="			{return ORAS;}
"&="			{return ANDAS;}

{identifier}    {
					int tt=check_type(yytext);
					
					if(tt)return tt;
				}
{decint}        {
					if(yytext[0] == '-'){sscanf(yytext, "%ld", &(yylval.val.i));return CINT;}
					else {sscanf(yytext, "%lu", &(yylval.val.u)); return yylval.val.u>0x7FFFFFFF?CUINT:CINT;}
				}
{octint}		{
					if(yytext[0] == '-'){sscanf(yytext, "%lo", &(yylval.val.i));yylval.val.i=-yylval.val.i;return CINT;}
					else {sscanf(yytext, "%lo", &(yylval.val.u)); return yylval.val.u>0x7FFFFFFF?CUINT:CINT;}
				}
{hexint}		{
					if(yytext[0] == '-'){sscanf(yytext, "%lx", &(yylval.val.i));yylval.val.i=-yylval.val.i;return CINT;}
					else {sscanf(yytext, "%lx", &(yylval.val.u)); return yylval.val.u>0x7FFFFFFF?CUINT:CINT;}
				}
{fnumber0}|{fnumber1}|{fnumber2} {
				
					sscanf(yytext, "%lf", &(yylval.val.f));
					return CFLOAT;
				}
{singlechar}	{
					if(yytext[1]=='\\')charmap(yytext+2, &(yylval.val.i));
					else yylval.val.i=yytext[1];
					return CINT;
				}
{string}		{
					yylval.val.s=parsestring(yytext, yyleng);
					return STRING;
				}
 
\n				{lineno++;}
{whitespace}    {}

"/*"			{BEGIN(comment);}
<comment>\n		{lineno++;}
<comment>"*/"	{BEGIN(INITIAL);}
<comment>.		{}
"//"			{
					int c;
					while(c=input())
						if(c=='\n')break;
					lineno++;
				}
.               {
				//	genwarning(ERR_UNKNOWN_CHAR, NULL);
				//	return UNKNOWN;
				}

%%

//return next pointer to character
static char* charmap(char *ch, int *val)
{
	int index=8, tmp;
	if(*ch=='x'){index=16;ch++;}
	*val=0;
	while(1)
	{
		if(*ch>='0'&&*ch<='9')tmp=*ch-'0';
		else if(*ch>='a'&&(*ch)<='f')tmp=*ch-'a'+10;
		else if(*ch>='A'&&(*ch)<='F')tmp=*ch-'A'+10;
		else break;
		*val=*val*index+tmp;
		ch++;
	}
	if(*val!=0)return ch;
	//Yes, there is a bug, but I assume that no one will be stupid like that
	switch(*ch)
	{
	case 't':*val = '\t';break;
	case 'n':*val = '\n';break;
	case '\\':*val = '\\';break;
	case 'r':*val = '\r';break;
	case 'b':*val = '\b';break;
	case '\"':*val = '\"';break;
	case '\'':*val = '\'';break;
	case 'f':*val = '\f';break;
	case 'v':*val = '\v';break;
	case '\?':*val = '\?';break;
	case 'a':*val = '\a';break;
	default:
		genwarning(ERR_UNKNOWN_CHAR, ch);
		*val = *ch;
	}
	return ++ch;
}
//return next pinter to character
static char *parsestring(char *str, int len)
{
	char *tmp;
	int i=0, val;
	tmp = dmalloc(len, true);
	str++;
	while(*str)
	{
		if(*str == '\"')
		{
			tmp[i]=0;
			break;
		}
				
		if(*str == '\\')
		{
			str = charmap(++str, &val);
			tmp[i++]=val;
		}
		else
		{
			tmp[i++]=*str++;
		}
	}
	return tmp;
}

int yywrap()
{
	return 1;
}
static int check_type(const char *text)
{
	//if it is a macro, expand it here
	//faint, it could be a typename or a variable name
	//so troublesome, we let typename first
	//and you can't define a variable with the same name of his type
	Type *type;
	char *name;
	HashNode *hashnode;
	int len = strlen(text);
	name = dmalloc(len+4, true);
	sprintf(name, "_%s", text);
	hashnode = lookup_node(typetab, TYPETABSIZE, name);
	if(hashnode)
	{
		type = hashnode->val;
		if(type->basic_type == bt_typedef)
		{
			yylval.type = type->subtype;
			return USERTYPE;
		}
	}
	yylval.val.s = name;
	return IDENTIFIER;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -