⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lexer.c

📁 guide and some example with visualC++
💻 C
字号:
/* lexer.c * *	(C) Copyright Apr 15 1995, Edmond J. Breen. *		   ALL RIGHTS RESERVED. * This code may be copied for personal, non-profit use only. * * * History: *           First written V1.0 1984 (E.J.B) *           Revised       V1.2 1994 (E.J.B) *           Revised       V2.0 1995 (E.J.B) * */ #include <stdio.h>#include <stdlib.h>#include <limits.h>#include <string.h>#include <math.h>#include <float.h>#include "global.h"#include "xalloc.h"#include "error.h"#include "symbol.h"#include "lexer.h"#include "preproc.h"static void retract(char c);static int fail(int ival, int c);static int charliteral(int c);static void EiC_stringliteral(void);static void success(int ival);char nextchar(void);char *nextproline(void);void retract(char c);int fail(int ival, int c);int charliteral(int c);void stringliteral(void);void success(int ival);#define NEWLINE(ch)  (ch  == '\n' ? TRUE: FALSE)#define LETTER(ch)   ((ch >=  'a'  && ch   <=  'z') ? TRUE :\				 ((ch >=  'A'  && ch   <=  'Z') ? TRUE :\				 ((ch ==  '_'   || ch   ==  '$')? TRUE : FALSE)))#define DIGIT(ch)   ((ch  >=  '0'   && ch   <= '9') ? TRUE : FALSE)#define WHITE(ch)   ((ch  == ' '    || ch   == '\t')? TRUE : FALSE)keyword_t cwords[] ={    {"__eiclongjmp",eiclongjmpsym,},    {"__eicsetjmp",eicsetjmpsym,},  {"auto", autosym,},  {"break", breaksym,},  {"case", casesym,},  {"char", charsym,},  {"const", constsym,},  {"continue", continuesym,},  {"default", defaultsym,},  {"do", dosym,},  {"double", doublesym,},  {"else", elsesym,},  {"enum", enumsym,},  {"extern", externsym,},  {"float", floatsym,},  {"for", forsym,},  {"goto",gotosym,},  {"if", ifsym,},  {"int", intsym,},  {"long", longsym,},  {"register", registersym,},  {"return", returnsym,},  {"safe", safesym,},  {"short", shortsym,},  {"signed", signedsym,},  {"sizeof", sizeofsym,},  {"static", staticsym,},  {"struct", structsym,},  {"switch", switchsym,},  {"typedef", typedefsym,},  {"union", unionsym,},  {"unsafe", unsafesym,},  {"unsigned", unsignedsym,},  {"void", voidsym,},  {"volatile", volatilesym,},  {"while", whilesym,},};#define NOTOKEN 0char EiC_LEXEM[BSIZE+1];static token_t TOK;token_t *token = &TOK;#ifdef ILOOKAHEADtoken_t EiC_TokenArray[MAX_TOKENS];int EiC_TokenI = 0, EiC_TokenP = 0, EiC_TokenR=0;#elseunsigned short STOKEN;#endifunsigned lex_lineno, lex_lineprev, lex_linepos, lex_linelen, lex_curpos = 0, lex_lastpos;char *lex_buff;static int state = 0;static unsigned long lexival;static double lexfval;static char Lseen,  /* long seen */            Fseen,  /* floating point seen */            Useen,  /* Unsigned specifier seen */            Hseen;  /* Hex or Octal value seen */#define getoct(x)  (((x)>='0'&&(x)<='7')? (x)-'0':-1)static int gethex(int c){    if (c >= '0' && c <= '9')	return c - '0';    if (c >= 'a' && c <= 'f')	return c - 'a' + 10;    if (c >= 'A' && c <= 'F')	return c - 'A' + 10;    return -1;}void EiC_initlex(char *str){    lex_buff = str;    lex_curpos = 0;    lex_lineprev = 0;    lex_lineno = 1;#ifdef ILOOKAHEAD    EiC_TokenR = EiC_TokenP = EiC_TokenI = 0;    #else    STOKEN = NOTOKEN;#endif    }char EiC_nextchar(){    char *EiC_nextproline();    if (lex_buff[lex_curpos] == EOF) {	return 0;    }    else if (lex_buff[lex_curpos] == '\0') {      	if (lex_curpos != lex_lastpos)	    return 0;	lex_buff = EiC_nextproline();	lex_lastpos = lex_curpos = 0;	lex_lineprev = 0;	lex_lineno = 1;    }    if (lex_lineprev != lex_lineno) {	lex_linepos = lex_curpos;	lex_lineprev = lex_lineno;    }    return (lex_buff[lex_curpos++]);}static void checkExt(int c){				/* check for unsigned and long suffix */    Lseen = Useen = Fseen = 0;    if (c == 'f' || c == 'F')	Fseen = 1;    else if (c == 'u' || c == 'U') {	Useen = 1;	if ((c = EiC_nextchar()) == 'l' || c == 'L')	    Lseen = 1;	else	    retract(c);    } else if (c == 'l' || c == 'L')	Lseen = 1;    else	retract(c);}static void retract(char c){    if (c != '\0') {	lex_curpos--;	if (lex_curpos < lex_lastpos)	    lex_lastpos = lex_curpos;    }}static int fail(int ival, int c){    retract(c);    switch (ival) {      case RELOP: return (10);      case ID:    return (20);      case FLOAT:      case INT: return (100);    }    return 0;}static void setfloatval(void){    if (Lseen || lexfval > FLT_MAX  || !Fseen ) {	token->Val.dval = lexfval;	token->Tok = DOUBLE;    } else {	/* mandatory conversion to float */	float f = lexfval;	token->Val.dval = f;	token->Tok = FLOAT;    }}static void setintval(void){    if (Fseen) {	lexfval = lexival;	setfloatval();	return;    }    if (Useen) {	if (Lseen || lexival > UINT_MAX) {	    token->Tok = ULONG;	    token->Val.ulval = lexival;	} else {	    token->Tok = UINT;	    token->Val.uival = (unsigned) lexival;	}    } else if (Lseen || lexival > UINT_MAX) {	if (lexival > ULONG_MAX) {	    token->Tok = ULONG;	    token->Val.ulval = lexival;	} else if (lexival >= ULONG_MAX) {	    token->Tok = ULONG;	    token->Val.ulval = lexival;	} else {	    token->Val.lval = lexival;	    token->Tok = LONG;	}    } else {	if (lexival <= INT_MAX) {	    token->Val.ival = (int) lexival;	    token->Tok = INT;	} else if(Hseen && lexival <= UINT_MAX) {	    token->Tok = UINT;	    token->Val.uival = (unsigned) lexival;	} else if(lexival <= LONG_MAX) {	    	    token->Val.lval = (long) lexival;	    token->Tok = LONG;	} else {	   token->Tok = ULONG;	   token->Val.ulval = lexival;       }    }}static void success(int ival){    int i, size;    size = (int) (lex_curpos - lex_lastpos);    memcpy(EiC_LEXEM, &lex_buff[lex_lastpos], size);    EiC_LEXEM[size] = '\0';    if (Lseen) size--;    if (Useen) size--;    if (Fseen) size--;    Hseen = 0;    switch (ival) {	case ID:	    if ((token->Tok = EiC_iskeyword(cwords, EiC_LEXEM,					sizeof(cwords) / sizeof(keyword_t))) == 0) {		token->Tok = ID;		/* search for id in various name spaces */		if ((token->Val.sym = EiC_lookup(EiC_work_tab, EiC_LEXEM)) == NULL)		    token->Val.sym = EiC_insertLUT(EiC_work_tab, EiC_LEXEM, ID);		if (token->Val.sym)		    if (token->Val.sym->sclass == c_typedef)			token->Tok = TYPENAME;	    }	    break;	case OCTAL:	    if (Fseen)		EiC_error("Declaration syntax error");	    for (lexival = 0, i = 0; i < size; i++)		lexival = lexival * 8 + getoct(EiC_LEXEM[i]);	    Hseen = 1;	    setintval();	    break;	case HEX:	    for (lexival = 0, i = 2; i < size; i++)		lexival = lexival * 16 + gethex(EiC_LEXEM[i]);	    Hseen = 1;	    setintval();	    break;	case INT:	    for (lexival = 0, i = 0; i < size; i++)		lexival = lexival * 10 + EiC_LEXEM[i] - '0';	    setintval();	    break;	case FLOAT:	    if (Useen)		EiC_error("Declaration syntax error");	    lexfval = atof(EiC_LEXEM);	    setfloatval();	    break;	case RELOP:	case MISC:	    break;    }}static int WASLITERAL;static int charliteral(int c){    if (c == '\\') {	switch ((c = EiC_nextchar())) {	case 'n': c = '\n'; break;     /* newline */	case 't': c = '\t'; break;     /* tabspace */	case 'v': c = '\v'; break;     /* vertical tab */	case 'b': c = '\b'; break;     /* backspace */	case 'r': c = '\r'; break;     /* carriage return */	case 'f': c = '\f'; break;     /* formfeed */	case 'a': c = '\a'; break;     /* bell */	case '\\': c = '\\'; break;    /* backslash */	case '\'': c = '\''; break;    /* single quote */	case '"': c = '\"'; break;     /* double quote */        case '?': c = '\?'; break;     /* question mark */	case 'x':		       /* string of hex characters */	case 'X':{	    int i, val = 0;	    while ((i = gethex((c = EiC_nextchar()))) > -1) {		val = val * 16 + i;	    }	    retract(c);	    if (val > 255)		EiC_error("Illegal character hex value");	    c = val;	}	break;	default:	    if (getoct(c) > -1) {		/* octal characters */		int i, val = 0;		while ((i = getoct(c)) > -1) {		    val = val * 8 + i;		    c = EiC_nextchar();		}		retract(c);		if (val > 255)		    EiC_error("Illegal character octal value");		c = val;	    } else		EiC_error("Illegal character escape sequence `\\%c'", c);	    break;	}	WASLITERAL = 1;    } else	WASLITERAL = 0;    return ((signed char )c);}static void EiC_stringliteral(void){    unsigned size, lastsize = 0, c;    char *p=NULL;    lex_lastpos = lex_curpos;    do {	for (size = 0; ((c = charliteral(EiC_nextchar())) != '\0' || WASLITERAL) &&	     !(c == '"' && !WASLITERAL)  && size < BSIZE; size++) 	    EiC_LEXEM[size] = c;    	if (lastsize)	    p = (char *) xrealloc(p, lastsize + size + 1);	else	    p = (char *) xcalloc(size + 1, sizeof(char));	memcpy(&p[lastsize], EiC_LEXEM, size);	lastsize += size;	if(c != '"' && size == BSIZE) {	    p[lastsize++] = c;	    continue;	}		if (c != '"')	    EiC_error("String literal error");	do {	    c = EiC_nextchar();	    if (c == '\n')		lex_lastpos++, lex_lineno++;	} while (WHITE(c) || c == '\n');	lex_lastpos = lex_curpos;	if (!c)	    do		c = EiC_nextchar();	    while (WHITE(c));    } while (c == '"' || size == BSIZE);    retract(c);    p[lastsize] = '\0';    token->Val.p.sp = token->Val.p.p = p;    token->Val.p.ep = p + lastsize + 1;}extern int EiC_lexan(void){    int t=0, loop; char c=0, EiC_nextchar();#ifdef ILOOKAHEAD    token = &EiC_TokenArray[EiC_TokenP];    if(EiC_TokenR > 0) {	EiC_TokenR--;	EiC_TokenI++;	EiC_TokenP=(EiC_TokenP+1)%MAX_TOKENS;	return token->Tok;    }#else    if (STOKEN != NOTOKEN) {	STOKEN = NOTOKEN;	return token->Tok;    }#endif        loop  = 1;    state = 0;    while (loop) {	switch (state) {	  case 0: lex_lastpos = lex_curpos; c = EiC_nextchar();	    state = (WHITE(c) ? 0 :		    (c == '\n' ? lex_lineno++, 0 :		    (c == '<' ? t = LT, 1 :		    (c == '>' ? t = GT, 2 :		    (c == '+' ? t = '+', 3 :		    (c == '-' ? t = '-', 4 :		    (c == '|' ? t = BOR, 5 :		    (c == '&' ? t = AND, 6 :		    (c == '\''? 7 :		    (c == '"' ? 8 :		    (c == '.' ? 9 :  		    (c == '/' ? t = '/', c = EiC_nextchar(), 50 :		    (c == '%' ? t = '%', c = EiC_nextchar(), 50 :		    (c == '*' ? t = '*', c = EiC_nextchar(), 50 :		    (c == '=' ? t = ASS, c = EiC_nextchar(), 50 :		    (c == '!' ? t = NOT, c = EiC_nextchar(), 50 :		    (c == '^' ? t = XOR, c = EiC_nextchar(), 50 :		     fail(RELOP, c))))))))))))))))));	    break;	  case 1: /* get <,  <= and << */	    if ((c = EiC_nextchar()) == '<') t = LSHT;	    else state = 50;	    break;	  case 2: /* get >, >= and >> */	    if ((c = EiC_nextchar()) == '>') t = RSHT;	    else state = 50;	    break;	  case 3: c = EiC_nextchar();                         /* get +, += or ++ */	    if (c == '+') t = INC, state = 60;	    else state = 50;	    break;	  case 4: c = EiC_nextchar();                            /* get -, -= -- */	    state = 60;	    if (c == '-') t = DEC;	    else if (c == '>') t = RARROW;	    else state = 50;	    break;	  case 5: c = EiC_nextchar();                         /* get |, |= or || */	    if (c == '|') t = LOR, state = 60;	    else state = 50;	    break;	  case 6: c = EiC_nextchar();                         /* get &, &= or && */	    if (c == '&') t = LAND, state = 60;	    else state = 50;	    break;	  case 7:token->Val.ival = charliteral(EiC_nextchar()); /* char_constants */	    t = CHAR;	    if (EiC_nextchar() != '\'')		EiC_error("Missing single quote '");	    state = 60;	    break;	  case 8: EiC_stringliteral();                        /* string literals */	    token->Tok = STR;	    /*return STR;*/ loop = 0; break;	  case 9: c = EiC_nextchar();	    t = '.';	    if(DIGIT(c)) 		state = 22;	    else		state = 60;	    retract(c);	    break;	  case 10: c = EiC_nextchar();              /* identifiers and  keywords */	    state = (LETTER(c) ? 11 :		    (c == '_' ? 11 : fail(ID, c)));	    break;	  case 11: c = EiC_nextchar();	    state = (LETTER(c) ? 11 :		    (DIGIT(c) ? 11 :		    (c == '_' ? 11 : 12)));	    break;	  case 12: retract(c); success(ID); /*return (token->Tok);*/ loop = 0; break;	  case 20: c = EiC_nextchar();                     /* integers and reals */	    state = (c == '0' ? 30 :		    (DIGIT(c) ? 21 : fail(INT, c)));	    break;	  case 21: c = EiC_nextchar();	    state = (DIGIT(c) ? 21 :		    (c == '.' ? 22 :		    (c == 'e' ? 23 :		    (c == 'E' ? 23 : 25))));	    break;	  case 22: c = EiC_nextchar();	    state = (DIGIT(c) ? 22 :		    (c == 'e' ? 23 :		    (c == 'E' ? 23 : 26)));	    break;	  case 23: c = EiC_nextchar();	    state = (c == '+' ? 24 :		    (c == '-' ? 24 :		    (DIGIT(c) ? 24 : fail(FLOAT, c) /* ??? */ )));	    break;	  case 24: c = EiC_nextchar();	    state = (DIGIT(c) ? 24 : 26);	    break;	  case 25: checkExt(c); success(INT); /*return (token->Tok);*/ loop = 0; break;	  case 26: checkExt(c); success(FLOAT); /*return (token->Tok);*/ loop = 0; break;	  case 27: checkExt(c); success(HEX);   /*return (token->Tok);*/ loop = 0; break;	  case 28: checkExt(c); success(OCTAL); /*return (token->Tok);*/ loop = 0; break;	  case 30:			  /* check for octal and hex numbers */	    if ((c = EiC_nextchar()) == 'x' || c == 'X') {		while (gethex((c = EiC_nextchar())) > -1);		state = 27;		break;	    }	    if (c != '.' && c != 'e' && c != 'E') {		while (getoct(c) > -1)		    c = EiC_nextchar();		state = 28;		break;	    }	    retract(c); state = 21; break;	  case 50:                                      /* mix with equal's  */	    if (c == '=')		switch (t) {		  case '+': t = ADDEQ;  break;		/* += */		  case '-': t = SUBEQ;  break;		/* -= */		  case '/': t = DIVEQ;  break;		/* /= */		  case '*': t = MULEQ;  break;		/* *= */		  case '%': t = MODEQ;  break;		/* %= */		  case ASS: t = EQ;     break;		/* == */		  case GT:  t = GE;     break;		/* >= */		  case LT:  t = LE;     break;		/* <= */		  case NOT: t = NE;     break;		/* != */		  case RSHT:t = RSHTEQ; break;		/* >>= */		  case LSHT:t = LSHTEQ; break;		/* <<= */		  case AND: t = ANDEQ;  break;		/* &= */		  case BOR: t = BOREQ;  break;		/* |= */		  case XOR: t = XOREQ;  break;		/* ^= */		  default: retract(c);	    } else retract(c);	    state = 60;	    break;	  case 60: success(MISC); token->Tok = t; /*return (token->Tok);*/ loop = 0; break;	  case 100: token->Tok = EiC_nextchar(); /*return (token->Tok);*/ loop = 0; break;	}    }#ifdef ILOOKAHEAD    if(EiC_TokenI<MAX_TOKENS)	EiC_TokenI++;    EiC_TokenP = (EiC_TokenP +1)%MAX_TOKENS;#endif    return token->Tok;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -