⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scanner.cpp

📁 C-Talk is interpreted scripting language with C-like syntax and dynamic type checking. Variables in
💻 CPP
字号:
#include "compiler.h"
#include "scanner.h"
#include "ctkparser.h"

#define _S     0x01       // space symbol: ' ' \n \t \r ...
#define _I     0x02       // part of identifier: letter or digit
#define _O     0x04       // 0-7
#define _H     0x08       // 0-9 a-f A-F
#define _D     0x10       // 0-9 a-f A-F . + - x X

#define isspace( c )   ( Letter[ byte(c) ] & _S )
#define isident( c )   ( Letter[ byte(c) ] & _I )
#define isdig( c )     ( Letter[ byte(c) ] & _D )
#define isoct( c )     ( Letter[ byte(c) ] & _O )
#define ishex( c )     ( Letter[ byte(c) ] & _H )

static const char Letter [ 256 ] = {
  0,_S,_S,_S, _S,_S,_S,_S, _S,_S,_S,_S, _S,_S,_S,_S,
  _S,_S,_S,_S, _S,_S,_S,_S, _S,_S,_S,_S, _S,_S,_S,_S,
  _S,0,0,0, 0,0,0,0, 0,0,0,_D, 0,_D,_D,0,
  _I|_O|_H|_D,_I|_O|_H|_D,_I|_O|_H|_D,_I|_O|_H|_D,
  _I|_O|_H|_D,_I|_O|_H|_D,_I|_O|_H|_D,_I|_O|_H|_D,
  _I|_H|_D,_I|_H|_D,0,0, 0,0,0,0,
  0,_I|_H|_D,_I|_H|_D,_I|_H|_D, _I|_H|_D,_I|_H|_D,_I|_H|_D,_I, _I,_I,_I,_I, _I,_I,_I,_I,
  _I,_I,_I,_I, _I,_I,_I,_I, _I|_D,_I,_I,0, 0,0,0,_I,
  0,_I|_H|_D,_I|_H|_D,_I|_H|_D, _I|_H|_D,_I|_H|_D,_I|_H|_D,_I, _I,_I,_I,_I, _I,_I,_I,_I,
  _I,_I,_I,_I, _I,_I,_I,_I, _I|_D,_I,_I,0, 0,0,0,0
} ;

CtkScanner CtkScanner::instance;
CtkSymbolTable CtkSymbolTable::instance;

CtkToken* CtkSymbolTable::add(char const* name, int tag) { 
    unsigned h = 0;
    CtkToken* tok;
    byte* p = (byte*)name;
    while (*p != 0) { 
	h = h*31 + *p++;
    }
    int i = h % TOKEN_HASH_TABLE_SIZE;
    for (tok = tokenHashTable[i]; tok != NULL; tok = tok->next) { 
	if (tok->hash == h && strcmp(tok->name, name) == 0) { 
	    return tok;
	}
    }
    tok = new CtkToken();
    tok->hash = h;
    tok->symId = ++nSymbols;
    tok->tag = tag;
    tok->name = new char[strlen(name) + 1];
    strcpy(tok->name, name);
    tok->next = tokenHashTable[i];
    tokenHashTable[i] = tok;
    return tok;
}



int yylex()
{
    return CtkScanner::instance.get();
}

void CtkScanner::reset(FILE* f) { 
    yyfile = f;
    yyline = 1;
    yybuf[0] = '\0';
    fgets(yybuf, sizeof yybuf, f);
    yyptr = yybuf;
}

int CtkScanner::get()
{
    int pos;
    int tkn;
    long   ival;
    double rval;
    char ch, *p = yyptr, *q;

    while(true) {
        switch (tkn = *p++) {
          case '\f': case ' ': case '\t': 
	    yyptr = p;
	    continue;
          case '\0': case '\n':
  	  readNextLine:
	    if (fgets(yybuf, sizeof yybuf, yyfile) == NULL) {
		return EOF;
	    }
	    yyptr = p = yybuf;
	    yyline += 1;
            continue;
          case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
	  case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
	  case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
	  case 's': case 't': case 'u': case 'v': case 'w': case 'x':
	  case 'y': case 'z': 
	  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
	  case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
	  case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
	  case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
	  case 'Y': case 'Z': 
	  case '_': 
	    while (isident(*p)) { 
		p += 1;
	    }
	    ch = *p;
	    *p = '\0';
	    yylval.tok = CtkSymbolTable::instance.add(yyptr, IDENT);
	    tkn = yylval.tok->tag;
	    *p = ch;
	    break;
          case '0': case '1': case '2': case '3': case '4':  
	  case '5': case '6': case '7': case '8': case '9':
	    while (isdig(*p)) { 
		p += 1;
	    }
	    ch = *p;
	    *p = '\0';
	    
	    if (sscanf(yyptr, "%li%n", &ival, &pos) != 1 
		|| pos != (int)(p - yyptr)) 
	    { 
		if (sscanf(yyptr, "%lf%n", &rval, &pos) != 1 
		    || pos != (int)(p - yyptr)) 
		{ 
		    error("Invalid numeric constant");
		} else { 
		    yylval.rval = (ctk_real)rval;
		    tkn = RLITERAL;
		}
	    } else { 
		yylval.ival = (ctk_integer)ival;		
		tkn = ILITERAL;
	    }
	    *p = ch;
	    break;
          case '+': 
	    if (*p == '+') { 
		p += 1;
		tkn = INC;
	    } else if (*p == '=') { 
		p += 1;
		tkn = SET_ADD;
	    }
	    break;
          case '-': 
	    if (*p == '-') { 
		p += 1;
		tkn = DEC;
	    } else if (*p == '=') { 
		p += 1;
		tkn = SET_SUB;
	    }
	    break;
          case '=':
	    if (*p == '=') { 
		p += 1;
		tkn = EQ;
	    }
	    break;
          case '*':
            if (*p == '=') { 
		p += 1;
		tkn = SET_MUL;
	    }
	    break;
           case '/':
            if (*p == '=') { 
		p += 1;
		tkn = SET_DIV;
	    } else if (*p == '/') { 
		goto readNextLine;
	    } else if (*p == '*') { 		
		do {		    
		    if (*++p == '\0') { 
			yyline += 1;
			if (fgets(yybuf, sizeof yybuf, yyfile) == NULL) {
			    return EOF;
			}
			p = yybuf;
		    }
		} while (p[0] != '*' || p[1] != '/'); 
		yyptr = p += 2;
		continue;
	    }
	    break;
	  case '%':
            if (*p == '=') { 
		p += 1;
		tkn = SET_MOD;
	    }
	    break;
          case '&':
	    if (*p == '&') { 
		p += 1;
		tkn = LAND;
	    } else if (*p == '=') { 
		p += 1;
		tkn = SET_AND;
	    }
	    break;
          case '|':
	    if (*p == '|') { 
		p += 1;
		tkn = LOR;
	    } else if (*p == '=') { 
		p += 1;
		tkn = SET_OR;
	    }
	    break;
          case '^':
	    if (*p == '=') { 
		p += 1;
		tkn = SET_XOR;
	    }
	    break;
          case '!': 
	    if (*p == '=') { 
		p += 1;
		tkn = NE;
	    }
	    break;
	  case '>':
	    if (*p == '>') { 
		if (*++p == '=') { 
		    p += 1;
		    tkn = SET_SHR;
		} else { 
		    tkn = SHR;
		}
	    } else if (*p == '=') { 
		p += 1;
		tkn = GE;
	    }
	    break;
	  case '<':
	    if (*p == '<') { 
		if (*++p == '=') { 
		    p += 1;
		    tkn = SET_SHL;
		} else { 
		    tkn = SHL;
		}
	    } else if (*p == '=') { 
		p += 1;
		tkn = LE;
	    }
	    break;
          case '$': 
          case '.': 
          case '?': 
          case ':': 
          case '~': 
          case '(': 
          case ')': 
          case '{': 
          case '}': 
          case '[': 
          case ']': 
          case ';': 
          case ',': 
	    break;
	  case '\'':
	  case '\"':
	    q = p-1;
	    while ((ch = *p++) != '\'' && ch != '\"') { 
		if (ch == '\0') { 
		    error("Unterminated character constant");
		}
		if (ch == '\\') { 
		    switch (ch = *p++) { 	
                      case 'r':
                        ch = '\r';
                        break;
                      case 'n':
                        ch = '\n';
                        break;
                      case 't':
                        ch = '\t';
                        break;
                      case 'f':
                        ch = '\f';
                        break;
		      case '0':
		      case '1':
		      case '2':
		      case '3':
		      case '4':
		      case '5':
		      case '6':
		      case '7':
			ch -= '0';
			while (isoct(*p)) { 
			    ch = ch*8 + *p++ - '0';
			}
			break;
		      case 'x':			
		      case 'X':			
			if (!ishex(p[0]) || !ishex(p[1])) { 
			    error("Invalid hexademical constant in string");
			}
			ch = p[2];
			p[2] = '\0';
			int value;
			sscanf(p, "%x", &value);
			p[2] = ch;
			ch = (char)value;
			p += 2;
		    }
		}
		*q++ = ch;
	    }
	    *q++ = '\0';
            yylval.sval = ctkAllocateStringLiteral(yyptr);
	    tkn = SLITERAL;
	    break;
	  default:
	    error("Invalid character");
	}
	yyptr = p;
	return tkn;
    }
}
	
void CtkScanner::error(char const* msg) { 
    ctkTrace("%s:%d:%d: %s", CtkCompiler::instance.currModule->path, 
	     yyline, yyptr - yybuf, msg); 
    exit(1);
}

CtkSymbolTable::CtkSymbolTable() 
{
    add("if", IF);
    add("else", ELSE);
    add("for", FOR);
    add("function", FUNCTION);
    add("do", DO);
    add("while", WHILE);
    add("try", TRY);
    add("catch", CATCH);
    add("switch", SWITCH);
    add("case", CASE);
    add("default", DEFAULT);
    add("continue", CONTINUE);
    add("break", BREAK);
    add("par", PAR);
    add("return", RETURN);
    add("throw", THROW);
    add("import", IMPORT);
    add("synchronized", SYNCHRONIZED);
    add("null", NULLLITERAL);
    add("NULL", NULLLITERAL);
}


CtkSymbolTable::~CtkSymbolTable()
{
    for(int i=0; i<TOKEN_HASH_TABLE_SIZE; i++)
    {
	CtkToken* tok = tokenHashTable[i];
	while (tok != NULL) { 
	    CtkToken* loctok = tok;
	    tok = tok->next;
	    delete[] loctok->name;
	    delete loctok;
	}	
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -