finctoken.c
来自「FinC编译器源代码」· C语言 代码 · 共 1,354 行 · 第 1/2 页
C
1,354 行
#include <finc/finctoken.h>#include <finc/finclang.h>#include <finc/finccontext.h>#define is_digit(ch) ((ch) >= '0' && (ch) <= '9')#define is_separator(ch) !(((ch) >= 'a' && (ch) <= 'z') \ || ((ch) >= 'A' && (ch) <= 'Z') || ((ch) >= '0' && (ch) <= '9') || ((ch) == '_'))#define is_eof(self) (self)->eof#ifdef FINC_DEBUG#define HERE kprintf("here, %s:%d\n", __FILE__, __LINE__)#else#define HERE#endiftypedef struct _NameTable NameTable;struct _NameTable{ char* name; FinCTokenType type;};static NameTable name_table[] ={ {"void", FinCTokenType_Type_Void}, {"int", FinCTokenType_Type_Int}, {"char", FinCTokenType_Type_Char}, {"short", FinCTokenType_Type_Short}, #ifdef USING_DOUBLE {"double", FinCTokenType_Type_Double}, #endif #ifdef USING_LONG {"long", FinCTokenType_Type_Long}, #endif #ifdef USING_FLOAT {"float", FinCTokenType_Type_Float}, #endif {"bool", FinCTokenType_Type_Bool}, {"pointer", FinCTokenType_Type_Pointer}, {"string", FinCTokenType_Type_String}, {"static", FinCTokenType_Static}, {"unsigned", FinCTokenType_Unsigned}, {"struct", FinCTokenType_Struct}, {"false", FinCTokenType_False}, {"true", FinCTokenType_True}, {"null", FinCTokenType_Null}, {"for", FinCTokenType_For}, {"while", FinCTokenType_While}, {"if", FinCTokenType_If}, {"else", FinCTokenType_Else}, {"return", FinCTokenType_Return}, {"break", FinCTokenType_Break}, {"continue", FinCTokenType_Continue}, {"import", FinCTokenType_Import}, {"package", FinCTokenType_Package}, {"addrof", FinCTokenType_Addrof}, {"valueof", FinCTokenType_Valueof}};static String* token_get_string(FinCTokenEnv* self);static void token_run(FinCTokenEnv* self);static Bool token_match_name(FinCTokenEnv* self, const char* str);static void token_trim_comment(FinCTokenEnv* self);static void token_trim_line_comment(FinCTokenEnv* self);static void token_trim_space(FinCTokenEnv* self);static char token_proc_char(FinCTokenEnv* self);static String* token_proc_string(FinCTokenEnv* self);static void token_proc_number(FinCTokenEnv* self);#ifdef USING_DOUBLEstatic double token_spec_number(char* string, int length, int b);#elsestatic long token_spec_number(char* string, int length, int b);#endifstatic int token_proc_escape(FinCTokenEnv* self);static char token_next_char(FinCTokenEnv* self);static void token_prev_char(FinCTokenEnv* self);FinCTokenEnv* finc_token_new(){ FinCTokenEnv* self; self = (FinCTokenEnv*)mem_new (sizeof(FinCTokenEnv)); object_init_object (OBJECT (self), finc_token_destroy); self->eof = FALSE; self->replay = FALSE; self->line = 0; self->position = 0; self->current_token = FinCTokenType_Bad; self->line_str = NULL; self->last_str = NULL; self->last_char = '\0'; self->last_double = 0; #ifdef USING_FLOAT self->last_float = 0; #endif self->last_long = 0; return self;}void finc_token_destroy(Object* self){ FinCTokenEnv* real; real = (FinCTokenEnv*)self; unref(real->line_str); unref(real->last_str); mem_destroy(self);}FinCTokenType finc_token_token(FinCTokenEnv* self){ if ( self->replay ) self->replay = FALSE; else { token_run(self);#ifdef FINC_TOKEN_DEBUG finc_token_debug(self);kprintf(" ");#endif } return self->current_token;}String* finc_token_get_token(FinCTokenEnv* self){ return addref(String, self->last_str);}String* token_get_string(FinCTokenEnv* self){ char str[80];/*the max identifier length is 80*/ char *p=str; char ch; ch = token_next_char(self); if (is_eof(self))return NULL; str[0] = '\0'; if ( is_digit(ch) )/*the first character of identifier is not a digit.*/ { token_prev_char(self); return NULL; } while ( !is_separator(ch)&&!is_eof(self) ) { *p = ch; ch = token_next_char(self); p++; } self->eof = FALSE; token_prev_char(self); *p = '\0'; return string_new_str(str);}/*get next character.*/static char token_next_char(FinCTokenEnv* self){ if (self->eof) return '\0'; if ( self->position == string_get_size( self->line_str ) ) { unref(self->line_str); self->line_str = string_new_str( (*self->read_line)(self->arg) ); HERE; if (string_is_empty(self->line_str)) { HERE; self->eof = TRUE; self->position = 0; return '\0'; } self->line++;#ifdef FINC_TOKEN_DEBUG kprintf("\n");#endif self->position = 0; return token_next_char(self); } else if ( string_at(self->line_str, self->position) =='\n' ) { unref(self->line_str); self->line_str = string_new_str( (*self->read_line)(self->arg) ); HERE; if (string_is_empty(self->line_str)) { HERE; self->eof = TRUE; self->position = 0; return '\0'; } self->line++;#ifdef FINC_TOKEN_DEBUG kprintf("\n");#endif self->position = 0; return token_next_char(self); } else { return string_at( self->line_str, self->position++); }}static void token_prev_char(FinCTokenEnv* self){ if ( self->eof ) return; if ( self->position == 0 ) { return; } self->position--;}static void token_run(FinCTokenEnv* self){ char ch; token_trim_space(self);/*first trim space and tab.*/ unref(self->last_str); self->last_str = token_get_string(self);/*get the last string( identifier or key word ).*/ if ( is_eof(self) )/*if it is eof, break;*/ { self->current_token = FinCTokenType_Eof; return ; } if ( !string_is_empty(self->last_str) )/*It is a key word or a identifier.*/ { if ( !token_match_name(self, string_get_str (self->last_str)) ) { self->current_token = FinCTokenType_Identifier; } return; } else/*It is a operator character.*/ { ch = token_next_char(self); switch ( ch ) { case '(': self->current_token = FinCTokenType_Left_Paren; break; case ')': self->current_token = FinCTokenType_Right_Paren; break; case '{': self->current_token = FinCTokenType_Left_Curly; break; case '}': self->current_token = FinCTokenType_Right_Curly; break; case '[': self->current_token = FinCTokenType_Left_Brace; break; case ']': self->current_token = FinCTokenType_Right_Brace; break; case ',': self->current_token = FinCTokenType_Comma; break; case ':': self->current_token = FinCTokenType_Colon; break; case ';': self->current_token = FinCTokenType_Semicolon; break; case '?': self->current_token = FinCTokenType_Question; break; case '!': ch = token_next_char(self); if ( ch == '=' ) { self->current_token = FinCTokenType_Not_Eqs; } else { token_prev_char(self); self->current_token = FinCTokenType_Not; } break; case '&': ch = token_next_char(self); if ( ch == '&' ) { self->current_token = FinCTokenType_Logic_And; } else { self->current_token = FinCTokenType_And; token_prev_char(self); } break; case '*': self->current_token = FinCTokenType_Mul; break; case '+': ch = token_next_char(self); if ( ch == '+' ) { self->current_token = FinCTokenType_Inc; } else { token_prev_char(self); self->current_token = FinCTokenType_Add; } break; case '-': ch = token_next_char(self); if ( ch == '-' ) { self->current_token = FinCTokenType_Dec; } else { token_prev_char(self); self->current_token = FinCTokenType_Sub; } break; case '/': self->current_token = FinCTokenType_Div; break; case '<': ch = token_next_char(self); if ( ch == '=' ) { self->current_token = FinCTokenType_Less_Eqs; } else if ( ch == '<' ) { self->current_token = FinCTokenType_SHL; } else { token_prev_char(self); self->current_token = FinCTokenType_LT; } break; case '>': ch = token_next_char(self); if ( ch == '=' ) { self->current_token = FinCTokenType_Greater_Eqs; } else if ( ch == '>' ) { self->current_token = FinCTokenType_SHR; } else { token_prev_char(self); self->current_token = FinCTokenType_GT; } break; case '|': ch = token_next_char(self); if ( ch == '|' ) { self->current_token = FinCTokenType_Logic_Or; } else { self->current_token = FinCTokenType_Or; token_prev_char(self); } break; case '%': self->current_token = FinCTokenType_Mod; break; case '~': self->current_token = FinCTokenType_Bitwise; break; case '^': self->current_token = FinCTokenType_XOR; break; case '=': ch = token_next_char(self); if ( ch =='=' ) { self->current_token = FinCTokenType_Eq; } else { token_prev_char(self); self->current_token = FinCTokenType_Assign; } break; case '\'': self->last_char = token_proc_char(self); self->current_token = FinCTokenType_Char; break; case '"': unref(self->last_str); self->last_str = NULL; self->last_str = token_proc_string(self); self->current_token = FinCTokenType_String; break; case '.': ch = token_next_char(self); if ( is_digit(ch) )/*if it's a float number.*/ { token_proc_number(self); break; } else { token_prev_char(self); self->current_token = FinCTokenType_Dot; } break; default: if ( is_digit(ch) ) { token_prev_char(self); token_proc_number(self); break; } kprintf ( "Lex Error: Unknown characector\n"); finc_context_error_inc(g_finc_context); self->current_token = FinCTokenType_Bad; break; } }}static Bool token_match_name(FinCTokenEnv* self, const char* str){ int i; for ( i=0; i<sizeof(name_table)/sizeof(*name_table); i++) { if ( strcmp(name_table[i].name, str)==0 ) { self->current_token = name_table[i].type; return TRUE; } } return FALSE;}static void token_trim_comment(FinCTokenEnv* self){ char ch; while( !is_eof(self) ) { ch = token_next_char(self); if ( ch =='*' ) { ch = token_next_char(self); if ( ch =='/' ) return; token_prev_char(self); } } kprintf( "Lex Error: Unexpecting end of comment.\n"); finc_context_error_inc(g_finc_context);}static void token_trim_line_comment(FinCTokenEnv* self){ unref(self->line_str); self->line_str = string_new_str( (*self->read_line)(self->arg) ); if (string_is_empty(self->line_str)) self->eof = TRUE; self->line++; self->position = 0;}static void token_trim_space(FinCTokenEnv* self){ char ch; while ( (ch = token_next_char(self)) ==' ' || ch == '\t' || ch == '/' ) { if ( ch == '/' ) { ch = token_next_char(self); if ( ch == '*' ) { token_trim_comment(self); } else if ( ch == '/' ) { token_trim_line_comment(self); } else { token_prev_char(self); break; } } } token_prev_char(self);}static char token_proc_char(FinCTokenEnv* self){ char ch; char buf[4], *p; p = buf; ch = token_next_char(self); if ( ch == '\\' ) { ch = token_next_char(self); switch ( ch ) { case 'n': ch = '\n'; break; case 't': ch = '\t'; break; case 'v': ch = '\v'; break; case 'b': ch = '\b'; break; case 'r': ch = '\r'; break; case '\\': ch = '\\'; break; case '\'': ch = '\''; break; default : while ( is_digit(ch) )/*for '\113' char*/ { ch = token_next_char(self); *p++ = ch; } token_prev_char(self); *p = '\0'; ch = atoi(p); break; } } if ( token_next_char(self) != '\'' ) { token_prev_char(self); kprintf( "Lex Error: Expecting a '\''.\n"); finc_context_error_inc(g_finc_context); return ch; } return ch;}static String* token_proc_string(FinCTokenEnv* self){ String* string; string = string_new(); for ( ; ; ) { char ch = token_next_char(self); if ( is_eof(self) ) { kprintf( "Lex Error: Unexpecting end in process string.\n"); finc_context_error_inc(g_finc_context); unref(string); return NULL;; } if ( ch == '\\' ) { ch = token_proc_escape(self); } else if ( ch == '"' )/*end of string.*/ { unref(self->last_str); self->last_str = string; return string; } string_add_char(string, ch); } return string;}static int token_proc_escape(FinCTokenEnv* self){ char ch; int result=0; ch = token_next_char(self); switch (ch) { case 'n': result = '\n'; break; case 't': result = '\t'; break; case 'v': result = '\v'; break; case 'b': result = '\b'; break; case 'r': result = '\r'; break; case 'f': result = '\f'; break; case 'a': result = '\007'; break; case 'x': result = 0; ch = token_next_char(self); while ( (ch - '0')<16u ) { result = result*16 + ch - '0'; ch = token_next_char(self); } token_prev_char(self); break; default: if ( (ch - '0') < 8u) { result = 0; while ( (ch - '0') < 8u ) { result = result*8 + ch - '0'; ch = token_next_char(self);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?