finctoken.c

来自「FinC编译器源代码」· C语言 代码 · 共 1,354 行 · 第 1/2 页

C
1,354
字号
#include <finc/finctoken.h>#include <finc/finclang.h>#include <finc/finccontext.h>#define is_digit(ch)	((ch) >= '0' && (ch) <= '9')#define is_separator(ch) !(((ch) >= 'a' && (ch) <= 'z') \     || ((ch) >= 'A' && (ch) <= 'Z') || ((ch) >= '0' && (ch) <= '9') || ((ch) == '_'))#define is_eof(self) (self)->eof#ifdef FINC_DEBUG#define	HERE	kprintf("here, %s:%d\n", __FILE__, __LINE__)#else#define HERE#endiftypedef struct _NameTable NameTable;struct _NameTable{	char* name;	FinCTokenType type;};static NameTable name_table[] ={	{"void",		FinCTokenType_Type_Void},	{"int",			FinCTokenType_Type_Int},	{"char",		FinCTokenType_Type_Char},	{"short",		FinCTokenType_Type_Short},	#ifdef USING_DOUBLE	{"double",		FinCTokenType_Type_Double},	#endif	#ifdef USING_LONG	{"long",		FinCTokenType_Type_Long},	#endif	#ifdef USING_FLOAT	{"float",		FinCTokenType_Type_Float},	#endif	{"bool",		FinCTokenType_Type_Bool},	{"pointer",		FinCTokenType_Type_Pointer},	{"string",		FinCTokenType_Type_String},	{"static",		FinCTokenType_Static},	{"unsigned",	FinCTokenType_Unsigned},	{"struct",		FinCTokenType_Struct},	{"false",		FinCTokenType_False},	{"true",		FinCTokenType_True},	{"null",		FinCTokenType_Null},	{"for",			FinCTokenType_For},	{"while",		FinCTokenType_While},	{"if",			FinCTokenType_If},	{"else",		FinCTokenType_Else},	{"return",		FinCTokenType_Return},	{"break",		FinCTokenType_Break},	{"continue",	FinCTokenType_Continue},	{"import",		FinCTokenType_Import},	{"package",	    FinCTokenType_Package},	{"addrof",		FinCTokenType_Addrof},	{"valueof",		FinCTokenType_Valueof}};static String* token_get_string(FinCTokenEnv* self);static void token_run(FinCTokenEnv* self);static Bool token_match_name(FinCTokenEnv* self, const char* str);static void token_trim_comment(FinCTokenEnv* self);static void token_trim_line_comment(FinCTokenEnv* self);static void token_trim_space(FinCTokenEnv* self);static char token_proc_char(FinCTokenEnv* self);static String* token_proc_string(FinCTokenEnv* self);static void token_proc_number(FinCTokenEnv* self);#ifdef USING_DOUBLEstatic double token_spec_number(char* string, int length, int b);#elsestatic long token_spec_number(char* string, int length, int b);#endifstatic int token_proc_escape(FinCTokenEnv* self);static char token_next_char(FinCTokenEnv* self);static void token_prev_char(FinCTokenEnv* self);FinCTokenEnv* finc_token_new(){	FinCTokenEnv* self;	self = (FinCTokenEnv*)mem_new (sizeof(FinCTokenEnv));	object_init_object (OBJECT (self), finc_token_destroy);	self->eof = FALSE;	self->replay = FALSE;	self->line = 0;	self->position = 0;	self->current_token = FinCTokenType_Bad;	self->line_str = NULL;	self->last_str = NULL;	self->last_char = '\0';	self->last_double = 0;	#ifdef USING_FLOAT	self->last_float = 0;	#endif	self->last_long = 0;	return self;}void finc_token_destroy(Object* self){	FinCTokenEnv* real;	real = (FinCTokenEnv*)self;	unref(real->line_str);	unref(real->last_str);	mem_destroy(self);}FinCTokenType finc_token_token(FinCTokenEnv* self){	if ( self->replay )		self->replay = FALSE;	else	{		token_run(self);#ifdef FINC_TOKEN_DEBUG		finc_token_debug(self);kprintf(" ");#endif	}	return self->current_token;}String* finc_token_get_token(FinCTokenEnv* self){	return addref(String, self->last_str);}String* token_get_string(FinCTokenEnv* self){	char str[80];/*the max identifier length is 80*/	char *p=str;	char ch;		ch = token_next_char(self);	if (is_eof(self))return NULL;	str[0] = '\0';	if ( is_digit(ch) )/*the first character of identifier is not a digit.*/	{		token_prev_char(self);		return NULL;	}	while ( !is_separator(ch)&&!is_eof(self) )	{		*p = ch;		ch = token_next_char(self);		p++;	}	self->eof = FALSE;	token_prev_char(self);	*p = '\0';	return string_new_str(str);}/*get next character.*/static char token_next_char(FinCTokenEnv* self){	if (self->eof) return '\0';	if ( self->position == string_get_size( self->line_str ) )	{		unref(self->line_str);		self->line_str = string_new_str( (*self->read_line)(self->arg) );		HERE;		if (string_is_empty(self->line_str))		{			HERE;			self->eof = TRUE;			self->position = 0;			return '\0';		}		self->line++;#ifdef FINC_TOKEN_DEBUG		kprintf("\n");#endif		self->position = 0;		return token_next_char(self);	}	else if ( string_at(self->line_str, self->position) =='\n' )	{		unref(self->line_str);		self->line_str = string_new_str( (*self->read_line)(self->arg) );		HERE;		if (string_is_empty(self->line_str))		{			HERE;			self->eof = TRUE;			self->position = 0;			return '\0';		}		self->line++;#ifdef FINC_TOKEN_DEBUG		kprintf("\n");#endif		self->position = 0;		return token_next_char(self);	}	else	{		return string_at( self->line_str, self->position++);	}}static void token_prev_char(FinCTokenEnv* self){	if ( self->eof ) return;	if ( self->position == 0 )	{		return;	}	self->position--;}static void token_run(FinCTokenEnv* self){	char ch;	token_trim_space(self);/*first trim space and tab.*/	unref(self->last_str);	self->last_str = token_get_string(self);/*get the last string( identifier or key word ).*/	if ( is_eof(self) )/*if it is eof, break;*/	{		self->current_token = FinCTokenType_Eof;		return ;	}	if ( !string_is_empty(self->last_str) )/*It is a key word or a identifier.*/	{		if ( !token_match_name(self, string_get_str (self->last_str)) )		{			self->current_token = FinCTokenType_Identifier;		}		return;	}	else/*It is a operator character.*/	{		ch = token_next_char(self);		switch ( ch )		{		case '(':			self->current_token = FinCTokenType_Left_Paren;			break;		case ')':			self->current_token = FinCTokenType_Right_Paren;			break;		case '{':			self->current_token = FinCTokenType_Left_Curly;			break;		case '}':			self->current_token = FinCTokenType_Right_Curly;			break;		case '[':			self->current_token = FinCTokenType_Left_Brace;			break;		case ']':			self->current_token = FinCTokenType_Right_Brace;			break;		case ',':			self->current_token = FinCTokenType_Comma;			break;		case ':':			self->current_token = FinCTokenType_Colon;			break;		case ';':			self->current_token = FinCTokenType_Semicolon;			break;		case '?':			self->current_token = FinCTokenType_Question;			break;		case '!':			ch = token_next_char(self);			if ( ch == '=' )			{				self->current_token = FinCTokenType_Not_Eqs;			}			else			{				token_prev_char(self);				self->current_token = FinCTokenType_Not;			}			break;		case '&':			ch = token_next_char(self);			if ( ch == '&' )			{				self->current_token = FinCTokenType_Logic_And;			}			else			{				self->current_token = FinCTokenType_And;				token_prev_char(self);			}			break;		case '*':			self->current_token = FinCTokenType_Mul;			break;		case '+':			ch = token_next_char(self);			if ( ch == '+' )			{				self->current_token = FinCTokenType_Inc;			}			else			{				token_prev_char(self);				self->current_token = FinCTokenType_Add;			}			break;		case '-':			ch = token_next_char(self);			if ( ch == '-' )			{				self->current_token = FinCTokenType_Dec;			}			else			{				token_prev_char(self);				self->current_token = FinCTokenType_Sub;			}			break;		case '/':			self->current_token = FinCTokenType_Div;			break;		case '<':			ch = token_next_char(self);			if ( ch == '=' )			{				self->current_token = FinCTokenType_Less_Eqs;			}			else if ( ch == '<' )			{				self->current_token = FinCTokenType_SHL;			}			else			{				token_prev_char(self);				self->current_token = FinCTokenType_LT;			}			break;		case '>':			ch = token_next_char(self);			if ( ch == '=' )			{				self->current_token = FinCTokenType_Greater_Eqs;			}			else if ( ch == '>' )			{				self->current_token = FinCTokenType_SHR;			}			else			{				token_prev_char(self);				self->current_token = FinCTokenType_GT;			}			break;		case '|':			ch = token_next_char(self);			if ( ch == '|' )			{				self->current_token = FinCTokenType_Logic_Or;			}			else			{				self->current_token = FinCTokenType_Or;				token_prev_char(self);			}			break;		case '%':			self->current_token = FinCTokenType_Mod;			break;		case '~':			self->current_token = FinCTokenType_Bitwise;			break;		case '^':			self->current_token = FinCTokenType_XOR;			break;				case '=':			ch = token_next_char(self);			if ( ch =='=' )			{				self->current_token = FinCTokenType_Eq;			}			else			{				token_prev_char(self);				self->current_token = FinCTokenType_Assign;			}			break;		case '\'':			self->last_char = token_proc_char(self);			self->current_token = FinCTokenType_Char;			break;		case '"':			unref(self->last_str);			self->last_str = NULL;			self->last_str = token_proc_string(self);			self->current_token = FinCTokenType_String;			break;		case '.':			ch = token_next_char(self);			if ( is_digit(ch) )/*if it's a float number.*/			{				token_proc_number(self);				break;			}			else			{				token_prev_char(self);				self->current_token = FinCTokenType_Dot;			}			break;		default:			if ( is_digit(ch) )			{				token_prev_char(self);				token_proc_number(self);				break;			}			kprintf ( "Lex Error: Unknown characector\n");			finc_context_error_inc(g_finc_context);			self->current_token = FinCTokenType_Bad;			break;		}	}}static Bool token_match_name(FinCTokenEnv* self, const char* str){	int i;	for ( i=0; i<sizeof(name_table)/sizeof(*name_table); i++)	{		if ( strcmp(name_table[i].name, str)==0 )		{			self->current_token = name_table[i].type;			return TRUE;		}	}	return FALSE;}static void token_trim_comment(FinCTokenEnv* self){	char ch;	while( !is_eof(self) )	{		ch = token_next_char(self);		if ( ch =='*' )		{			ch = token_next_char(self);			if ( ch =='/' )				return;			token_prev_char(self);		}	}	kprintf( "Lex Error: Unexpecting end of comment.\n");	finc_context_error_inc(g_finc_context);}static void token_trim_line_comment(FinCTokenEnv* self){	unref(self->line_str);	self->line_str = string_new_str( (*self->read_line)(self->arg) );	if (string_is_empty(self->line_str)) self->eof = TRUE;	self->line++;	self->position = 0;}static void token_trim_space(FinCTokenEnv* self){	char ch;	while ( (ch = token_next_char(self)) ==' ' || ch == '\t' || ch == '/' )	{		if ( ch == '/' )		{			ch = token_next_char(self);			if ( ch == '*' )			{				token_trim_comment(self);			}			else if ( ch == '/' )			{				token_trim_line_comment(self);			}			else			{				token_prev_char(self);				break;			}		}	}	token_prev_char(self);}static char token_proc_char(FinCTokenEnv* self){	char ch;	char buf[4], *p;	p = buf;	ch = token_next_char(self);	if ( ch == '\\' )	{		ch = token_next_char(self);		switch ( ch )		{		case 'n': ch = '\n'; break;		case 't': ch = '\t'; break;		case 'v': ch = '\v'; break;		case 'b': ch = '\b'; break;		case 'r': ch = '\r'; break;		case '\\': ch = '\\';  break;		case '\'': ch = '\'';  break;		default :			while ( is_digit(ch) )/*for '\113' char*/			{				ch = token_next_char(self);				*p++ = ch;			}			token_prev_char(self);			*p = '\0';			ch = atoi(p);			break;		}	}	if ( token_next_char(self) != '\'' )	{		token_prev_char(self);		kprintf( "Lex Error: Expecting a '\''.\n");		finc_context_error_inc(g_finc_context);		return ch;	}	return ch;}static String* token_proc_string(FinCTokenEnv* self){	String* string;	string = string_new();	for ( ; ; )	{		char ch = token_next_char(self);		if ( is_eof(self) )		{			kprintf( "Lex Error: Unexpecting end in process string.\n");			finc_context_error_inc(g_finc_context);			unref(string);			return NULL;;		}		if ( ch == '\\' )		{			ch = token_proc_escape(self);		}		else if ( ch == '"' )/*end of string.*/		{			unref(self->last_str);			self->last_str = string;			return string;		}		string_add_char(string, ch);	}	return string;}static int token_proc_escape(FinCTokenEnv* self){	char ch;	int result=0;	ch = token_next_char(self);	switch (ch)	{	case 'n':		result = '\n';		break;	case 't':		result = '\t';		break;	case 'v':		result = '\v';		break;	case 'b':		result = '\b';		break;	case 'r':		result = '\r';		break;	case 'f':		result = '\f';		break;	case 'a':		result = '\007';		break;	case 'x':		result = 0;		ch  = token_next_char(self);		while ( (ch - '0')<16u )		{			result = result*16 + ch - '0';			ch = token_next_char(self);		}		token_prev_char(self);		break;	default:		if ( (ch - '0') < 8u)		{			result = 0;			while ( (ch - '0') < 8u )			{				result = result*8 + ch - '0';				ch = token_next_char(self);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?