📄 cclex.c
字号:
/* CCLEX.C - New KCC Lexer - Token input
**
** (c) Copyright Ken Harrenstien 1989
** All changes after v.150, 8-Apr-1988
** (c) Copyright Ken Harrenstien, SRI International 1985, 1986
** All changes after v.43, 8-Aug-1985
**
** Original version (C) 1981 K. Chen
*/
#include "cc.h"
#include "ccchar.h"
#include "cclex.h" /* Get stuff shared with CCINP */
#include <stddef.h> /* ptrdiff_t */
/* Imported functions */
extern SYMBOL *symfind(char *, int); /* CCSYM */
extern int nextpp(void); /* CCPP */
extern void pushpp(void); /* CCPP */
/* Exported functions defined in CCLEX: */
void lexinit(void); /* Initializes the lexer (CC) */
int nextoken(void); /* Reads and parses next token(CCDECL,CCERR,CCSTMT) */
void tokpush(int t, SYMBOL *s); /*Pushes back a token (like ungetc) (",",") */
/* Internal functions */
static int trident(void), trintcon(void), trfltcon(void),
trstrcon(void), trchrcon(void);
static int spcident(char *, char *, int), cchar(char **);
static int zerotok(void), dzerotok(void), szerotok(void);
/* Globals used */
extern int savelits; /* Set 0 by CC main parsing loop for each toplevel
** declaration parse, to indicate that string literal
** space is free and can be re-used again.
*/
#if SYS_CSI /* KAR-11/91, usage before initializtion, used to signal if
* parsing the right hand side of an assignment expr.
*/
extern char ra_expr;
#endif
/* See also stuff in "cclex.h" */
/* Globals set:
* int token Current token code.
* If token==T_ICONST, T_CCONST, T_FCONST, T_SCONST
* struct {} constant contains type+value of constant (CCINP,CCSTMT)
* If token==Q_IDENT or a reserved-word token,
* SYMBOL *csymbol contains pointer to SYMBOL for this identifier.
** If it hasn't yet been defined, it will be a
** global symbol with class SC_UNDEF.
* (CCDECL,CCERR,CCSTMT)
*
* Note: the "constant" structure is not correct after nextoken() returns
* a token which was pushed back by tokpush().
*
* Note that most routines operate, or begin to operate, on the current
* token in "token", rather than immediately reading the next token. When
* a token is completely processed and is not needed any more, nextoken()
* must be called in order to get rid of it and set up a new token for
* whatever will be next looking at the input. Occasionally "token" is
* set directly for proper "priming".
*/
/* Token stack - entries added by tokpush(), removed by nextok() */
static int tokstack;
static struct
{
int ttoken;
SYMBOL *tsym;
}
tstack[MAXTSTACK];
/* String literal char pool */
static char *slcptr = NULL; /* Pointer into slcpool */
static int slcleft; /* Countdown of # free chars left */
static int slcocnt; /* Saved slcleft for deriving string len */
/* Macros to handle deposit of chars into string literal char pool (slcpool)*/
#if 0 /* 5/91 Dynamic tables */
static void slcresize();
static unsigned char slcsize = 0;
/* String literal character pool, non-static since CCSTMT needs csptr as
* an offset, thus base (slcpool) needed.
*/
static char *slcpool = NULL;
#define slcreset() ((!slcpool? slcresize():0), slcleft = \
slcsize*DYN_SIZE - 1, slcptr=slcpool)
#define slcput(c) ((--slcleft > 0 ? 0:slcresize()), *++slcptr = (c))
#define slcend() ((--slcleft > 0 ? 0:slcresize()), *++slcptr = 0, slclen())
#else
static char slcpool[CPOOLSIZE]; /* String literal character pool */
#define slcreset() (slcleft=CPOOLSIZE-1, slcptr=slcpool)
#define slcput(c) (--slcleft > 0 ? *++slcptr = (c) : (c))
#define slcend() (--slcleft > 0 ? (*++slcptr = 0, slclen()) : -1)
#endif
#define slcbeg() (slcocnt=slcleft, slcptr+1)
#define slclen() (slcocnt - slcleft)
/* LEXINIT() - Initialize the lexer
** The symbol table must have already been set up (by initsym)
** and the preprocessor initialized (by initinp)
** otherwise the initial nextoken() will not work properly.
*/
void
lexinit(void)
{
tokstack = 0;
savelits = 0; /* OK to reset string literal char pool */
if (!prepf)
nextoken(); /* Prime with 1st token */
}
/* TOKPUSH(tok, sym) - Push a token
** Note that the "constant" structure is not pushed or changed.
** It is OK for the current token to be a constant, if the token pushed
** (arg to tokpush) is not a constant. In fact, no constants can be
** pushed. The code for unary() in CCSTMT is the only place where this
** sort of thing has to be taken into account.
*/
void
tokpush(int t, SYMBOL *s)
{
if(++tokstack >= MAXTSTACK) /* Token stack depth exceeded? */
--tokstack, int_error("tokpush: tokstack overflow");
else
{
tstack[tokstack].ttoken = token;
tstack[tokstack].tsym = csymbol;
token = t;
csymbol = s;
}
}
/* NEXTOKEN() - Get next C language token, by transforming one or more
** PP-tokens from CCPP.
*/
int
nextoken (void)
{
if (tokstack) /* Pop token from push-back stack */
{
csymbol = tstack[tokstack].tsym;
token = tstack[tokstack--].ttoken;
}
else
{
csymbol = NULL; /* Clear sym associated with token */
for (;;)
{
switch (token = nextpp ()) /* Get next preproc token */
{
case T_WSP: /* Just skip whitespace */
case T_EOL:
continue; /* for */
default: /* Most returned directly! */
break; /* switch */
/* Transform things that need transforming */
case T_IDENT:
token = trident(); /* Identifier */
break; /* switch */
case T_ICONST:
token = trintcon(); /* Integer constant */
break; /* switch */
case T_FCONST:
token = trfltcon(); /* Floating constant */
break; /* switch */
case T_CCONST:
token = trchrcon(); /* Char constant */
break; /* switch */
case T_SCONST:
token = trstrcon(); /* String constant */
break; /* switch */
/*
* Do debug checking to catch PP-only stuff. This would be
* caught later on by higher levels, but most responsible
* to screen them here.
*/
case T_MACRO:
case T_MACARG:
case T_MACINS:
case T_MACSTR:
case T_MACCAT:
int_error ("nextoken: PP-only token %Q", token);
continue; /* for */
case T_SHARP:
case T_SHARP2:
error ("# or ## can only appear in directives or macros");
continue; /* for */
case T_UNKNWN:
error ("Unknown token: \"%s\"", curval.cp);
continue; /* for */
}
break;
}
}
/*
* A lexing pre-compilation scheme will break the input stream
* at this point. Tokens will be diverted to a file, along with
* their associated string literals and constant values, if any;
* the symbol and type tables will also be dumped.
*/
return token;
}
/* TRIDENT() - Transform identifer token
**
** Sets "csymbol" to point to the resulting symbol, and then returns the token
** corresponding to the given identifier (i.e. reserved word or Q_IDENT).
*/
static int
trident()
{
char ident[IDENTSIZE+4]; /* Identifier big enuf to trigger trunc */
char *cp;
#if SYS_CSI /* KAR-11/91, needed temp storages for v1=v2=v3...; check */
int t;
SYMBOL *s;
#endif
if ((cp = curval.cp) == 0)
{
int_error("trident: no string"); /* No string for T_IDENT */
return zerotok();
}
if ((csymbol = cursym) != 0)
switch (csymbol->Sclass)
{
case SC_RW: /* Reserved word, use its token */
return token = (int) csymbol->Stoken;
case SC_MACRO: /* Paranoia check on CCPP */
int_error("trident: Escaped macro %S", csymbol);
default: /* Normal symbol, just return identifier */
#if SYS_CSI /* KAR-11/91, usage bef. init. code */
/* KAR-11/91, added check for v1=v2=v3...; code */
if (ra_expr == 1) /* if parsing right side of asgn. expr */
switch (csymbol->Sclass)
{
case SC_AUTO:
case SC_RAUTO:
case SC_ISTATIC:
t = token;
s = csymbol;
if (nextoken() != Q_ASGN) /* check for v1=v2=v3...; */
if ((!s->Sinit) && (s->Stype->Tspec != TS_ARRAY))
{
warn("Possible usage before initialization: %s",
s->Sname);
s->Sinit = 1;
}
tokpush (t, s); /* push back last token */
break;
default:
break;
}
else /* & address op seen before ident */
/*
* KAR-11/91, changed value of ra_expr if an & is seen
* to 20, if ra_expr was 0 and 21 if ra_expr was 1 to be
* able to restore the old value of ra_expr
*/
{
if (ra_expr == 21)
ra_expr = 1;
else if (ra_expr == 20)
ra_expr = 0;
csymbol->Sinit = 1;
}
#endif
return token = Q_IDENT;
}
if (*cp == SPC_IDQUOT && clevkcc)
{
if (!spcident(ident, cp, sizeof(ident)-1))
return zerotok();
cp = ident;
}
else
int_error("trident: cursym 0 for \"%s\"", cp);
/* If no symbol already exists for identifier, find or get one.
** This will only happen when creating a symbol for a quoted identifier
** (which cannot be a macro), or recovering from an internal error.
** If a symbol is made, it will have class SC_UNDEF.
** symfind() will complain if the identifier was truncated.
*/
csymbol = symfind(cp, 1); /* Find sym or make one */
return token = Q_IDENT;
}
/* SPCIDENT(to, frm, cnt) - Get quoted identifier; special KCC extension.
** First char of "frm" string is '`'.
*/
static int
spcident(to, frm, cnt)
char *to, *frm;
int cnt;
{
register int c;
register char *s = to;
*s = SPC_IDQUOT; /* Start sym with special char */
for(;;)
{
switch (c = *++frm) /* Loop over input chars */
{
case '`': /* Terminator? */
if (!*++frm) /* Yes, string must stop now! */
break; /* Won! */
/* Drop thru to flag as error */
case 0:
int_error("spcident: Bad string for %s %Q", to, token);
return 0; /* Leave loop */
case '\\':
c = cchar(&frm); /* Get escaped char */
--frm; /* Back up so ++ gets next */
/* and drop thru to default */
default:
if (c == UNDERSCORE_MAPCHR) /* Check symbol chars */
c = '_';
if (!iscsym(c) && (c != '$') && (c != '%') && (c != '.'))
warn("Bad PDP10 symbol char: '%c'", c);
if (--cnt > 0)
*++s = c; /* add to ident. */
continue; /* and continue loop */
}
break; /* Leave loop */
}
*++s = '\0'; /* null terminate */
if (!to[1])
{
error("Quoted identifier is null");
return 0; /* Say no token */
}
return 1;
}
static int
zerotok()
{
constant.ctype = inttype;
constant.cvalue = 0;
return token = T_ICONST;
}
static int
dzerotok()
{
constant.ctype = dbltype;
constant.Cdouble = 0.0;
return token = T_FCONST;
}
/* TRINTCON() - Transform PP-number integer constant
*/
#define SIGN ((unsigned long)1<<(TGSIZ_LONG-1))
#define MAXPOSLONG ((long)((~(unsigned long)0)>>1))
static int
trintcon()
{
register char *cp;
register int c;
register long v = 0;
int ovfl = 0;
if ((cp = curval.cp) == 0)
{
int_error("trintcon: no str");
return zerotok();
}
if ((c = *cp) == '0') /* Octal/Hex prefix? */
{
c = *++cp;
if (c == 'x' || c == 'X') /* Hex (base 16) */
{
if (isxdigit(c = *++cp)) /* must have at least one hex digit */
{
v = toint((char) c); // FW KCC-NT
while (isxdigit(c = *++cp))
{
if (v & (017 << (TGSIZ_LONG-4)))
ovfl++;
v = ((unsigned long)v << 4) + toint((char) c); // FW KCC-NT
}
}
else
error("Illegal hex const %s", curval.cp);
}
else /* Octal (base 8) */
{
while (isodigit(c))
{
if (v & (07 << (TGSIZ_LONG-3)))
ovfl++;
v = ((unsigned long)v << 3) + c - '0';
c = *++cp;
}
if (isdigit(c)) /* Helpful msg for common error */
{
error("Octal constant cannot have '8' or '9'");
return zerotok();
}
}
constant.ctype = (v&SIGN) ? uinttype : inttype; /* Set right type */
}
else /* Decimal (base 10) */
{
v = c - '0';
while (isdigit(c = *++cp))
{
if (v < ((MAXPOSLONG-9)/10))
v = v*10 + c - '0'; /* Can't overflow, do it fast */
else /* Slow unsigned multiply loop */
{
unsigned long pv, uv = v;
do
{
pv = uv; /* Remember prev value */
uv = uv*10 + c - '0';
if (uv/10 != pv)
++ovfl; /* If cannot recover, ovflw */
}
while (isdigit(c = *++cp))
;
v = uv;
break;
}
}
constant.ctype = (v&SIGN) ? ulongtype:inttype; /* Set right type */
}
/* Fix up result by checking suffixes and deciding type to use.
** Must use first of the types that can represent the value:
** Decimal: int, long, ulong
** Oct/Hex: int, uint, long, ulong
** U : uint, ulong
** L : long, ulong
** UL : ulong
**
** Since for the PDP-10 int and long are the same size, this basically
** just amounts to deciding whether signed or unsigned is appropriate.
** If sign bit set, unsigned type can hold value.
** If overflow is set, no type can hold value, use largest.
*
* And now, specifically to please the Plum Hall validation suite:
* Recognize and complain when an integer constant is suffixed with
* a floating constant suffix (as opposed to a random character).
* MVS, CSI, 6/27/90
*/
if (ovfl)
{
error("Integer constant overflow");
constant.ctype = ulongtype; /* Set to biggest type */
}
if (c)
{
if ((c = toupper((char) c)) == 'L') // FW KCC-NT
{
if (!*++cp)
constant.ctype = (ovfl||(v&SIGN)) ? ulongtype:longtype;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -