📄 lex.c
字号:
/*
* @(#)lex.c
*
* Copyright 1997-1999, Wes Cherry (mailto:wesc@technosis.com)
* 2000-2001, Aaron Ardiri (mailto:aaron@ardiri.com)
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, please write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Revisions:
* ==========
*
* pre 18-Jun-2000 <numerous developers>
* creation
* 18-Jun-2000 Aaron Ardiri
* GNU GPL documentation additions
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#define NOPILOTINC
#include "RCP2C.h"
static int wBaseCur = 10;
static char *pchLexBuf;
static char *pchLexPrev;
static char *pchLex;
static char *_pchParseError;
static BOOL _fReportErrors;
static int commentDepth;
BOOL
FInitLexer(char *pch,
BOOL fMarkErrors)
{
pchLexBuf = pch;
pchLex = pch;
pchLexPrev = pch;
_pchParseError = NULL;
_fReportErrors = fMarkErrors;
return fTrue;
}
#if 0
static char *
PchLexer(void)
{
return pchLex;
}
#endif
char *
PchLexerPrev(void)
{
return pchLexPrev;
}
VOID
ParseError(char *sz1,
char *sz2)
{
_pchParseError = PchLexerPrev();
/*
* if (_fReportErrors)
*/
ErrorLine2(sz1, sz2);
}
#if 0
static char *
PchParseError(void)
{
return _pchParseError;
}
#endif
static BOOL
FSkipWhite(void)
{
if (pchLex == NULL)
return fFalse;
while (*pchLex == ' ' || *pchLex == '\t' || *pchLex == '\n'
|| *pchLex == '\r')
pchLex++;
return (*pchLex != '\000');
}
/*** Allow 12345678LU, for instance (should be only in .h or .hpp?!?) ***/
static void
AllowLUAtEndOfConstant(int ch)
{
if ((ch == 'l') || (ch == 'L'))
ch = (BYTE) * pchLex++;
if ((ch == 'u') || (ch == 'U'))
pchLex++;
}
static BOOL
FParseHex(LEX * plex,
int ch)
{
LEX lex;
ch = tolower(ch);
if ((ch == '0') && ((*(pchLex) == 'x') || (*(pchLex) == 'X')))
{
pchLex++;
ch = *pchLex++;
}
if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f'))
{
lex.lt = ltConst;
lex.val = 0;
while ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f'))
{
lex.val *= 16;
if ((ch >= '0' && ch <= '9'))
lex.val += ch - '0';
else
lex.val += ch - 'a' + 10;
ch = *pchLex++;
ch = tolower(ch);
}
AllowLUAtEndOfConstant(ch);
*plex = lex;
return fTrue;
}
return fFalse;
}
static BOOL
FParseConst(LEX * plex,
int ch)
{
char *pchStore;
pchStore = plex->szId;
Assert(wBaseCur == 10);
if ((ch >= '0' && ch <= '9') || ch == '.')
{
plex->lt = ltConst;
plex->val = 0;
if ((ch == '0') && ((*(pchLex) == 'x') || (*(pchLex) == 'X')))
{
*pchStore++ = *pchLex++;
*pchStore = (char)ch;
ch = *pchLex++;
return FParseHex(plex, ch);
}
while (ch >= '0' && ch <= '9')
{
plex->val *= 10;
plex->val += ch - '0';
*pchStore++ = (char)ch;
ch = *pchLex++;
}
}
else if (ch == '\'')
{
char szT[2];
int cc;
plex->lt = ltConst;
plex->val = 0;
*pchStore++ = (char)ch;
for (cc = 0; cc < 4; ++cc)
{
ch = (BYTE) * pchLex++;
*pchStore++ = (char)ch;
/*** printf("char=[%c]\n", ch); ***/
if (ch == '\'')
break;
if (ch < ' ')
{
szT[0] = (char)ch;
szT[1] = '\000';
ParseError("Unknown character in '' constant: ", szT);
}
plex->val *= 256;
plex->val += ch; /* high-byte first as a guess */
}
/*** Compensate for when we got a full 4 characters ***/
if (ch != '\'')
ch = (BYTE) * pchLex++;
++pchLex; /* compensate for later -- by caller */
if (ch != '\'')
{
szT[0] = (char)ch;
szT[1] = '\000';
ParseError("Unknown '' constant terminator: ", szT);
}
}
else
{
*pchStore = 0;
return fFalse;
}
/*** Note: 'pchLex' is now one past the character that's in 'ch' - the next character to parse ***/
AllowLUAtEndOfConstant(ch);
*pchStore = 0;
return fTrue;
}
static int
ChParseOctal(int ch)
{
int chVal, cnt;
chVal = 0;
/*** Functionality change: 2.01b bar: only three octal chars! ***/
for (cnt = 3; cnt-- && ((ch >= '0') && (ch <= '7'));)
{
chVal *= 8;
chVal += ch - '0';
ch = *pchLex++;
}
pchLex--; /* back off to the non-octal digit */
return chVal;
}
#ifndef _within
#define _within(n, l, h) (((n) >= (l)) && ((n) <= (h)))
#endif
static int
hexize(int c)
{
/*** convert hex digit to binary ***/
if (_within(c, '0', '9'))
return (c - '0');
if (_within(c, 'A', 'F'))
return (c - ('A' - 10));
if (_within(c, 'a', 'f'))
return (c - ('a' - 10));
return (-1);
}
static int
ChParseHex(int ch)
{
int chVal;
chVal = 0;
for (;;)
{
int n;
ch = *pchLex;
n = hexize(ch);
if (n < 0)
break;
n += (chVal * 16);
if (n >= 256)
break; /* Java's \Uxxxx might take more digits (Unicode alert!!!!) */
++pchLex;
chVal = n;
}
return chVal;
}
static BOOL
FParseId(LEX * plex,
int ch)
{
LEX lex;
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_')
{
/*
* Identifier
*/
int cch;
lex.lt = ltId;
cch = 0;
do
{
/*
* if (ch != '"')
*/
{
lex.szId[cch] = (char)ch; /* gratuitous cast - Unicode alert!!!! */
cch++;
}
ch = *pchLex++;
if (cch == cchIdMax - 1)
{
ParseError("Identifier too long", NULL);
break;
}
}
while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
|| (ch >= '0' && ch <= '9') || ch == '_');
lex.szId[cch] = '\000';
*plex = lex;
return fTrue;
}
return fFalse;
}
/*** Skip to end of C comment, if there is an end - else eat the input ***/
static BOOL
FSkipToEndOfCComment(void)
{
while (*pchLex)
{
if ((pchLex[0] == '/') && (pchLex[1] == '*'))
{
WarningLine("nested comment");
commentDepth++;
pchLex += 2;
return (fFalse);
}
if ((pchLex[0] == '*') && (pchLex[1] == '/'))
{
commentDepth--;
pchLex += 2;
if (commentDepth)
return (fFalse); /* end of nested comment */
else
return (fTrue); /* true end of comment */
}
pchLex++;
}
return (fFalse);
}
#define SLT(ch, ltArg) case ch: lex.lt = ltArg; break;
BOOL
FGetLex(LEX * plex,
BOOL fInComment)
{
int ch;
char szT[2];
LEX lex;
char *pchStore;
lex.lt = plex->lt = ltNil;
pchLexPrev = pchLex;
if (!FSkipWhite())
return fFalse;
if (fInComment)
{
if (!FSkipToEndOfCComment())
lex.lt = ltCComment; /* keep going with it */
else
lex.lt = ltEndCComment; /* ok, the comment is over */
}
else
{
pchStore = lex.szId;
ch = *pchStore++ = *pchLex++;
*pchStore = 0;
switch (ch)
{
/*
* BUG! could use a lookup table...
*/
/*
* TODO logical operators
*/
SLT('.', ltPoint) SLT('+', ltPlus) SLT('-', ltMinus) SLT('*', ltMult) SLT('%', ltMod) SLT('(', ltLParen) SLT(')', ltRParen) SLT('[', ltLBracket) SLT(']', ltRBracket) SLT('{', ltLBrace) SLT('}', ltRBrace) SLT(',', ltComma) SLT('?', ltQuestion) SLT(':', ltColon) SLT('^', ltCaret) SLT('\\', ltBSlash) SLT('#', ltPound) SLT('@', ltAt) SLT(';', ltSemi) SLT('|', ltPipe) case '/':
if (*pchLex == '/')
{
*pchStore++ = *pchLex++;
*pchStore = 0;
lex.lt = ltDoubleSlash;
}
else if (*pchLex == '*')
{
commentDepth = 1;
pchLex++;
if (!FSkipToEndOfCComment())
lex.lt = ltCComment;
else
lex.lt = ltEndCComment; /* return place holder token */
}
else
lex.lt = ltDiv;
break;
case '<':
if (*pchLex == '=')
{
*pchStore++ = *pchLex++;
*pchStore = 0;
lex.lt = ltLTE;
}
else if (*pchLex == '>')
{
*pchStore++ = *pchLex++;
*pchStore = 0;
lex.lt = ltNE;
}
else
lex.lt = ltLT;
break;
case '>':
if (*pchLex == '=')
{
*pchStore++ = *pchLex++;
*pchStore = 0;
lex.lt = ltGTE;
}
else
lex.lt = ltGT;
break;
case '=':
if (*pchLex == '=')
{
*pchStore++ = *pchLex++;
*pchStore = 0;
lex.lt = ltEQ;
}
else
lex.lt = ltAssign;
break;
case '"':
lex.lt = ltStr;
pchStore = lex.szId;
while (*pchLex != '"')
{
int n, tmp;
n = (*pfnChkCode) (pchLex, &tmp);
if (n >= 1)
{
while (n-- > 0)
*pchStore++ = *pchLex++;
}
else if (*pchLex == '\\')
{
int ch;
pchLex++;
ch = *pchLex++;
switch (ch)
{
case 'a':
ch = '\a';
break;
case 'b':
ch = '\b';
break;
case 'f':
ch = '\f';
break;
case 'n':
ch = '\n';
break;
case 'r':
ch = '\r';
break;
case 't':
ch = '\t';
break;
case 'v':
ch = '\v';
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
ch = ChParseOctal(ch);
break;
case 'x':
case 'X':
ch = ChParseHex(ch);
break;
case '\0': /* handle slash at the end of the line */
ch = '\\';
break;
case 'e': /* ESC, thank you Richard Brinkerhoff */
ch = '\033';
break;
case 'z': /* special control z'er */
ch = 'z' & 0x1f;
break;
case '_': /* special ignore - turns to nothing */
ch = '\0';
break;
}
/*** This program does not handle nulls in strings ***/
if (ch)
*pchStore++ = (char)ch; /* gratuitous cast - Unicode alert!!!! */
}
else
*pchStore++ = *pchLex++;
if (pchStore - lex.szId == cchIdMax - 1)
{
ParseError("String too long", NULL);
break;
}
if (*pchLex == 0)
{
ParseError("Unterminated string", NULL);
break;
}
}
pchLex++;
*pchStore = 0;
break;
default:
if (FParseConst(&lex, ch) || FParseId(&lex, ch))
{
/*
* do nuthin...code is easier to read this way
*/
}
else
{
szT[0] = (char)ch;
szT[1] = '\000';
ParseError("Unknown character: ", szT);
}
pchLex--;
break;
}
}
*plex = lex;
return lex.lt != ltNil;
}
#define SPLT(lt, sz) case lt: printf(sz); break;
VOID
PrintLex(LEX * plex)
{
plex = plex;
#ifdef FOO
switch (plex->lt)
{
case ltConst:
printf("%d ", plex->val);
break;
case ltId:
printf("%s ", plex->szId);
break;
SPLT(ltPoint, ".");
SPLT(ltPlus, "+");
SPLT(ltMinus, "-");
SPLT(ltMult, "*");
SPLT(ltDiv, "/");
SPLT(ltMod, "%");
SPLT(ltLParen, "(");
SPLT(ltRParen, ")");
SPLT(ltLBracket, "[");
SPLT(ltRBracket, "]");
SPLT(ltLBrace, "{");
SPLT(ltRBrace, "}");
SPLT(ltComma, ",");
SPLT(ltLT, "<");
SPLT(ltGT, ">");
SPLT(ltLTE, "<=");
SPLT(ltGTE, ">=");
SPLT(ltNE, "<>");
SPLT(ltEQ, "==");
SPLT(ltAssign, "=");
SPLT(ltQuestion, "?");
SPLT(ltColon, ":");
SPLT(ltCaret, "^");
}
#endif
}
/*
* eof
*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -