⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cclex.c

📁 KCC , a good c compiler, write by Ken Harrenstien
💻 C
📖 第 1 页 / 共 2 页
字号:
/*	CCLEX.C - New KCC Lexer - Token input
**
**	(c) Copyright Ken Harrenstien 1989
**		All changes after v.150, 8-Apr-1988
**	(c) Copyright Ken Harrenstien, SRI International 1985, 1986
**		All changes after v.43, 8-Aug-1985
**
**	Original version (C) 1981  K. Chen
*/

#include "cc.h"
#include "ccchar.h"
#include "cclex.h"	/* Get stuff shared with CCINP */
#include <stddef.h>	/* ptrdiff_t */

/* Imported functions */
extern SYMBOL *symfind(char *, int);	/* CCSYM */
extern int nextpp(void);		/* CCPP */
extern void pushpp(void);		/* CCPP */

/* Exported functions defined in CCLEX: */
void lexinit(void);		/* Initializes the lexer (CC) */
int nextoken(void);	/* Reads and parses next token(CCDECL,CCERR,CCSTMT) */
void tokpush(int t, SYMBOL *s);	/*Pushes back a token (like ungetc) (",",") */

/* Internal functions */
static int trident(void), trintcon(void), trfltcon(void),
	trstrcon(void), trchrcon(void);
static int spcident(char *, char *, int), cchar(char **);
static int zerotok(void), dzerotok(void), szerotok(void);

/* Globals used */
extern int savelits;	/* Set 0 by CC main parsing loop for each toplevel
			** declaration parse, to indicate that string literal
			** space is free and can be re-used again.
			*/
#if SYS_CSI /* KAR-11/91, usage before initializtion, used to signal if
	     * parsing the right hand side of an assignment expr.
	     */
extern char ra_expr;
#endif

/* See also stuff in "cclex.h" */

/* Globals set:
 *	int token	Current token code.
 *   If token==T_ICONST, T_CCONST, T_FCONST, T_SCONST
 *	struct {} constant	contains type+value of constant (CCINP,CCSTMT)
 *   If token==Q_IDENT or a reserved-word token,
 *	SYMBOL *csymbol		contains pointer to SYMBOL for this identifier.
**				If it hasn't yet been defined, it will be a
**				global symbol with class SC_UNDEF.
 *					(CCDECL,CCERR,CCSTMT)
 *
 * Note: the "constant" structure is not correct after nextoken() returns
 * a token which was pushed back by tokpush().
 *
 * Note that most routines operate, or begin to operate, on the current
 * token in "token", rather than immediately reading the next token.  When
 * a token is completely processed and is not needed any more, nextoken()
 * must be called in order to get rid of it and set up a new token for
 * whatever will be next looking at the input.  Occasionally "token" is
 * set directly for proper "priming".
 */	

/* Token stack - entries added by tokpush(), removed by nextok() */
static int tokstack;
static struct
    {
    int      ttoken;
    SYMBOL  *tsym;
    }
tstack[MAXTSTACK];

/* String literal char pool */
static char *slcptr = NULL;	/* Pointer into slcpool */
static int slcleft;		/* Countdown of # free chars left */
static int slcocnt;		/* Saved slcleft for deriving string len */

/* Macros to handle deposit of chars into string literal char pool (slcpool)*/
#if 0	/* 5/91 Dynamic tables */
static void slcresize();
static unsigned char slcsize = 0;
 /* String literal character pool, non-static since CCSTMT needs csptr as
  * an offset, thus base (slcpool) needed.
  */
static char *slcpool = NULL;
 #define slcreset() ((!slcpool? slcresize():0), slcleft =	\
	 slcsize*DYN_SIZE - 1, slcptr=slcpool)
 #define slcput(c) ((--slcleft > 0 ? 0:slcresize()), *++slcptr = (c))
 #define slcend() ((--slcleft > 0 ? 0:slcresize()), *++slcptr = 0, slclen())
#else
static char slcpool[CPOOLSIZE];	/* String literal character pool */
 #define slcreset() (slcleft=CPOOLSIZE-1, slcptr=slcpool)
 #define slcput(c) (--slcleft > 0 ? *++slcptr = (c) : (c))
 #define slcend() (--slcleft > 0 ? (*++slcptr = 0, slclen()) : -1)
#endif
 #define slcbeg() (slcocnt=slcleft, slcptr+1)
 #define slclen() (slcocnt - slcleft)

/* LEXINIT() - Initialize the lexer
**	The symbol table must have already been set up (by initsym)
**	and the preprocessor initialized (by initinp)
**	otherwise the initial nextoken() will not work properly.
*/
void
lexinit(void)
{
    tokstack = 0;
    savelits = 0;		/* OK to reset string literal char pool */
    if (!prepf)
	nextoken();	/* Prime with 1st token */
}

/* TOKPUSH(tok, sym) - Push a token
**	Note that the "constant" structure is not pushed or changed.
** It is OK for the current token to be a constant, if the token pushed
** (arg to tokpush) is not a constant.  In fact, no constants can be
** pushed.  The code for unary() in CCSTMT is the only place where this
** sort of thing has to be taken into account.
*/
void
tokpush(int t, SYMBOL *s)
{
    if(++tokstack >= MAXTSTACK)		/* Token stack depth exceeded? */
	--tokstack, int_error("tokpush: tokstack overflow");
    else
	{
	tstack[tokstack].ttoken = token;
	tstack[tokstack].tsym = csymbol;
	token = t;
	csymbol = s;
	}
}

/* NEXTOKEN() - Get next C language token, by transforming one or more
**	PP-tokens from CCPP.
*/

int
nextoken (void)
{
    if (tokstack)		/* Pop token from push-back stack */
	{
	csymbol = tstack[tokstack].tsym;
	token = tstack[tokstack--].ttoken;
	}
    else
        {
	csymbol = NULL;			/* Clear sym associated with token */

	for (;;)
	    {
	    switch (token = nextpp ())	/* Get next preproc token */
	        {
		case T_WSP:		/* Just skip whitespace */
		case T_EOL:
		    continue;		/* for */
		
		default:		/* Most returned directly! */
		    break;		/* switch */

		/* Transform things that need transforming */
		
		case T_IDENT:
		    token = trident();	/* Identifier */
		    break;		/* switch */
	

		case T_ICONST:
		    token = trintcon();	/* Integer constant */
		    break;		/* switch */
		    

		case T_FCONST:
		    token = trfltcon();	/* Floating constant */
		    break;		/* switch */
		    

		case T_CCONST:
		    token = trchrcon();	/* Char constant */
		    break;		/* switch */
		    

		case T_SCONST:
		    token = trstrcon();	/* String constant */
		    break;		/* switch */
		    
		/*
		 * Do debug checking to catch PP-only stuff.  This would be
		 * caught later on by higher levels, but most responsible
		 * to screen them here.
		 */
		
		case T_MACRO:
		case T_MACARG:
		case T_MACINS:
		case T_MACSTR:
		case T_MACCAT:
		    int_error ("nextoken: PP-only token %Q", token);
		    continue;		/* for */


		case T_SHARP:
		case T_SHARP2:
		    error ("# or ## can only appear in directives or macros");
		    continue;		/* for */
		

		case T_UNKNWN:
		    error ("Unknown token: \"%s\"", curval.cp);
		    continue;		/* for */
		}

	    break;
	    }
	}

    /*
     * A lexing pre-compilation scheme will break the input stream
     * at this point.  Tokens will be diverted to a file, along with
     * their associated string literals and constant values, if any;
     * the symbol and type tables will also be dumped.
     */

    return token;
}

/* TRIDENT() - Transform identifer token
**
** Sets "csymbol" to point to the resulting symbol, and then returns the token
** corresponding to the given identifier (i.e. reserved word or Q_IDENT).
*/

static int
trident()
{
    char ident[IDENTSIZE+4];	/* Identifier big enuf to trigger trunc */
    char *cp;

#if SYS_CSI /* KAR-11/91, needed temp storages for v1=v2=v3...; check */
    int t;
    SYMBOL *s;
#endif

    if ((cp = curval.cp) == 0)
	{
	int_error("trident: no string");	/* No string for T_IDENT */
	return zerotok();
	}
    if ((csymbol = cursym) != 0)
	switch (csymbol->Sclass)
	    {
	    case SC_RW:		/* Reserved word, use its token */
		return token = (int) csymbol->Stoken;
	    case SC_MACRO:		/* Paranoia check on CCPP */
		int_error("trident: Escaped macro %S", csymbol);
	    default:		/* Normal symbol, just return identifier */
#if SYS_CSI /* KAR-11/91, usage bef. init. code */
	    /* KAR-11/91, added check for v1=v2=v3...; code */
		if (ra_expr == 1)	/* if parsing right side of asgn. expr */
		    switch (csymbol->Sclass)
			{
			case SC_AUTO:
			case SC_RAUTO:
			case SC_ISTATIC:
			    t = token;
			    s = csymbol;
			    if (nextoken() != Q_ASGN)	/* check for v1=v2=v3...; */
				if ((!s->Sinit) && (s->Stype->Tspec != TS_ARRAY))
				    {
				    warn("Possible usage before initialization: %s",
					    s->Sname);
				    s->Sinit = 1;
				    }
			    tokpush (t, s);	/* push back last token */
			    break;
			default:
			    break;
			}
		else			/* & address op seen before ident */
		/*
		 * KAR-11/91, changed value of ra_expr if an & is seen
		 * to 20, if ra_expr was 0 and 21 if ra_expr was 1 to be
		 * able to restore the old value of ra_expr
		 */
		    {
		    if (ra_expr == 21)
			ra_expr = 1;
		    else if (ra_expr == 20)
			ra_expr = 0;

		    csymbol->Sinit = 1;
		    }
#endif
		return token = Q_IDENT;
	    }

    if (*cp == SPC_IDQUOT && clevkcc)
	{
	if (!spcident(ident, cp, sizeof(ident)-1))
	    return zerotok();
	cp = ident;
	}
    else
	int_error("trident: cursym 0 for \"%s\"", cp);

    /* If no symbol already exists for identifier, find or get one.
    ** This will only happen when creating a symbol for a quoted identifier
    ** (which cannot be a macro), or recovering from an internal error.
    ** If a symbol is made, it will have class SC_UNDEF.
    ** symfind() will complain if the identifier was truncated.
    */
    csymbol = symfind(cp, 1);	/* Find sym or make one */
    return token = Q_IDENT;
}

/* SPCIDENT(to, frm, cnt) - Get quoted identifier; special KCC extension.
**	First char of "frm" string is '`'.
*/
static int
spcident(to, frm, cnt)
char *to, *frm;
int cnt;
{
    register int c;
    register char *s = to;

    *s = SPC_IDQUOT;		/* Start sym with special char */
    for(;;)
	{
	switch (c = *++frm)	/* Loop over input chars */
	    {
	    case '`':		/* Terminator? */
		if (!*++frm)	/* Yes, string must stop now! */
		    break;	/* Won! */
		/* Drop thru to flag as error */
	    case 0:
		int_error("spcident: Bad string for %s %Q", to, token);
		return 0;		/* Leave loop */

	    case '\\':
		c = cchar(&frm);	/* Get escaped char */
		--frm;			/* Back up so ++ gets next */
					/* and drop thru to default */
	    default:
		if (c == UNDERSCORE_MAPCHR) /* Check symbol chars */
		    c = '_';
		if (!iscsym(c) && (c != '$') && (c != '%') && (c != '.'))
		    warn("Bad PDP10 symbol char: '%c'", c);
		if (--cnt > 0)
		    *++s = c;		/* add to ident. */
		continue;		/* and continue loop */
	    }
	break;				/* Leave loop */
	}

    *++s = '\0';			/* null terminate */
    if (!to[1])
	{
	error("Quoted identifier is null");
	return 0;		/* Say no token */
	}
    return 1;
}

static int
zerotok()
{
    constant.ctype = inttype;
    constant.cvalue = 0;
    return token = T_ICONST;
}

static int
dzerotok()
{
    constant.ctype = dbltype;
    constant.Cdouble = 0.0;
    return token = T_FCONST;
}

/* TRINTCON() - Transform PP-number integer constant
*/
#define SIGN ((unsigned long)1<<(TGSIZ_LONG-1))
#define MAXPOSLONG ((long)((~(unsigned long)0)>>1))

static int
trintcon()
{
    register char *cp;
    register int c;
    register long v = 0;
    int ovfl = 0;

    if ((cp = curval.cp) == 0)
	{
	int_error("trintcon: no str");
	return zerotok();
	}

    if ((c = *cp) == '0')		/* Octal/Hex prefix? */
	{
	c = *++cp;
	if (c == 'x' || c == 'X')	/* Hex (base 16) */
	    {
	    if (isxdigit(c = *++cp))  /* must have at least one hex digit */
		{
		v = toint((char) c);			// FW KCC-NT
		while (isxdigit(c = *++cp))
		    {
		    if (v & (017 << (TGSIZ_LONG-4)))
			ovfl++;
		    v = ((unsigned long)v << 4) + toint((char) c); // FW KCC-NT
		    }
		}
	    else
		error("Illegal hex const %s", curval.cp);
	    }
	else			/* Octal (base 8) */
	    {
	    while (isodigit(c))
		{
		if (v & (07 << (TGSIZ_LONG-3)))
		    ovfl++;
		v = ((unsigned long)v << 3) + c - '0';
		c = *++cp;
		}
	    if (isdigit(c))		/* Helpful msg for common error */
		{
		error("Octal constant cannot have '8' or '9'");
		return zerotok();
		}
	    }
	constant.ctype = (v&SIGN) ? uinttype : inttype;	/* Set right type */
	}
    else				/* Decimal (base 10) */
	{
	v = c - '0';
	while (isdigit(c = *++cp))
	    {
	    if (v < ((MAXPOSLONG-9)/10))
		v = v*10 + c - '0';	/* Can't overflow, do it fast */
	    else			/* Slow unsigned multiply loop */
		{
		unsigned long pv, uv = v;
		do
		    {
		    pv = uv;			/* Remember prev value */
		    uv = uv*10 + c - '0';
		    if (uv/10 != pv)
			++ovfl;	/* If cannot recover, ovflw */
		    }
		while (isdigit(c = *++cp))
		    ;
		v = uv;
		break;
		}
	    }
	constant.ctype = (v&SIGN) ? ulongtype:inttype;	/* Set right type */
	}

    /* Fix up result by checking suffixes and deciding type to use.
    ** Must use first of the types that can represent the value:
    ** Decimal:	int, long, ulong
    ** Oct/Hex:	int, uint, long, ulong
    **  U     :	uint, ulong
    **	L     : long, ulong
    **  UL    : ulong
    **
    ** Since for the PDP-10 int and long are the same size, this basically
    ** just amounts to deciding whether signed or unsigned is appropriate.
    **	If sign bit set, unsigned type can hold value.
    **	If overflow is set, no type can hold value, use largest.
    *
    *  And now, specifically to please the Plum Hall validation suite:
    *  Recognize and complain when an integer constant is suffixed with
    *  a floating constant suffix (as opposed to a random character).
    *				  MVS, CSI, 6/27/90
    */

    if (ovfl)
	{
	error("Integer constant overflow");
	constant.ctype = ulongtype;		/* Set to biggest type */
	}
    if (c)
	{
	if ((c = toupper((char) c)) == 'L')	// FW KCC-NT
	    {
	    if (!*++cp)
		constant.ctype = (ovfl||(v&SIGN)) ? ulongtype:longtype;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -