📄 lex.c

📁 关于rcp2c_v3.rar协议在LINUX下的实现程序
💻 C
字号:

/*
 * @(#)lex.c
 *
 * Copyright 1997-1999, Wes Cherry   (mailto:wesc@technosis.com)
 *           2000-2001, Aaron Ardiri (mailto:aaron@ardiri.com)
 * All rights reserved.
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation;  either version 2, or (at your option)
 * any version.
 *
 * This program is distributed in the hope that it will be useful,  but
 * WITHOUT  ANY  WARRANTY;   without  even   the  implied  warranty  of 
 * MERCHANTABILITY  or FITNESS FOR A  PARTICULAR  PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You  should have  received a  copy of the GNU General Public License
 * along with this program;  if not,  please write to the Free Software 
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * Revisions:
 * ==========
 *
 * pre 18-Jun-2000 <numerous developers>
 *                 creation
 *     18-Jun-2000 Aaron Ardiri
 *                 GNU GPL documentation additions
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>

#define NOPILOTINC
#include "RCP2C.h"

static int wBaseCur = 10;

static char *pchLexBuf;
static char *pchLexPrev;
static char *pchLex;

static char *_pchParseError;
static BOOL _fReportErrors;

static int commentDepth;

BOOL
FInitLexer(char *pch,
           BOOL fMarkErrors)
{
  pchLexBuf = pch;
  pchLex = pch;
  pchLexPrev = pch;
  _pchParseError = NULL;
  _fReportErrors = fMarkErrors;
  return fTrue;
}

#if 0
static char *
PchLexer(void)
{
  return pchLex;
}
#endif

char *
PchLexerPrev(void)
{
  return pchLexPrev;
}

VOID
ParseError(char *sz1,
           char *sz2)
{
  _pchParseError = PchLexerPrev();

  /*
   * if (_fReportErrors) 
   */
  ErrorLine2(sz1, sz2);
}

#if 0
static char *
PchParseError(void)
{
  return _pchParseError;
}
#endif

static BOOL
FSkipWhite(void)
{
  if (pchLex == NULL)
    return fFalse;
  while (*pchLex == ' ' || *pchLex == '\t' || *pchLex == '\n'
         || *pchLex == '\r')
    pchLex++;
  return (*pchLex != '\000');
}

/***    Allow 12345678LU, for instance (should be only in .h or .hpp?!?) ***/
static void
AllowLUAtEndOfConstant(int ch)
{
  if ((ch == 'l') || (ch == 'L'))
    ch = (BYTE) * pchLex++;
  if ((ch == 'u') || (ch == 'U'))
    pchLex++;
}

static BOOL
FParseHex(LEX * plex,
          int ch)
{
  LEX lex;

  ch = tolower(ch);
  if ((ch == '0') && ((*(pchLex) == 'x') || (*(pchLex) == 'X')))
  {
    pchLex++;
    ch = *pchLex++;
  }
  if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f'))
  {
    lex.lt = ltConst;
    lex.val = 0;
    while ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f'))
    {
      lex.val *= 16;

      if ((ch >= '0' && ch <= '9'))
        lex.val += ch - '0';
      else
        lex.val += ch - 'a' + 10;
      ch = *pchLex++;
      ch = tolower(ch);
    }
    AllowLUAtEndOfConstant(ch);
    *plex = lex;
    return fTrue;
  }
  return fFalse;
}

static BOOL
FParseConst(LEX * plex,
            int ch)
{
  char *pchStore;

  pchStore = plex->szId;
  Assert(wBaseCur == 10);
  if ((ch >= '0' && ch <= '9') || ch == '.')
  {
    plex->lt = ltConst;
    plex->val = 0;
    if ((ch == '0') && ((*(pchLex) == 'x') || (*(pchLex) == 'X')))
    {
      *pchStore++ = *pchLex++;
      *pchStore = (char)ch;
      ch = *pchLex++;
      return FParseHex(plex, ch);
    }

    while (ch >= '0' && ch <= '9')
    {
      plex->val *= 10;
      plex->val += ch - '0';
      *pchStore++ = (char)ch;
      ch = *pchLex++;
    }
  }
  else if (ch == '\'')
  {
    char szT[2];
    int cc;

    plex->lt = ltConst;
    plex->val = 0;
    *pchStore++ = (char)ch;
    for (cc = 0; cc < 4; ++cc)
    {
      ch = (BYTE) * pchLex++;
      *pchStore++ = (char)ch;

            /***    printf("char=[%c]\n", ch); ***/
      if (ch == '\'')
        break;

      if (ch < ' ')
      {
        szT[0] = (char)ch;
        szT[1] = '\000';
        ParseError("Unknown character in '' constant: ", szT);
      }

      plex->val *= 256;
      plex->val += ch;                           /* high-byte first as a guess */
    }

        /***    Compensate for when we got a full 4 characters ***/
    if (ch != '\'')
      ch = (BYTE) * pchLex++;

    ++pchLex;                                    /* compensate for later -- by caller */
    if (ch != '\'')
    {
      szT[0] = (char)ch;
      szT[1] = '\000';
      ParseError("Unknown '' constant terminator: ", szT);
    }
  }
  else
  {
    *pchStore = 0;
    return fFalse;
  }

    /***    Note: 'pchLex' is now one past the character that's in 'ch' - the next character to parse ***/
  AllowLUAtEndOfConstant(ch);

  *pchStore = 0;
  return fTrue;
}

static int
ChParseOctal(int ch)
{
  int chVal, cnt;

  chVal = 0;

        /***    Functionality change: 2.01b     bar: only three octal chars! ***/
  for (cnt = 3; cnt-- && ((ch >= '0') && (ch <= '7'));)
  {
    chVal *= 8;
    chVal += ch - '0';
    ch = *pchLex++;
  }
  pchLex--;                                      /* back off to the non-octal digit */
  return chVal;
}

#ifndef _within
#define _within(n, l, h)    (((n) >= (l)) && ((n) <= (h)))
#endif

static int
hexize(int c)
{

/***   convert hex digit to binary                                                 ***/
  if (_within(c, '0', '9'))
    return (c - '0');
  if (_within(c, 'A', 'F'))
    return (c - ('A' - 10));
  if (_within(c, 'a', 'f'))
    return (c - ('a' - 10));

  return (-1);
}

static int
ChParseHex(int ch)
{
  int chVal;

  chVal = 0;
  for (;;)
  {
    int n;

    ch = *pchLex;
    n = hexize(ch);
    if (n < 0)
      break;
    n += (chVal * 16);
    if (n >= 256)
      break;                                     /* Java's \Uxxxx might take more digits (Unicode alert!!!!) */
    ++pchLex;
    chVal = n;
  }

  return chVal;
}

static BOOL
FParseId(LEX * plex,
         int ch)
{
  LEX lex;

  if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_')
  {
    /*
     * Identifier 
     */
    int cch;

    lex.lt = ltId;
    cch = 0;
    do
    {

      /*
       * if (ch != '"') 
       */
      {
        lex.szId[cch] = (char)ch;                /* gratuitous cast - Unicode alert!!!! */
        cch++;
      }
      ch = *pchLex++;
      if (cch == cchIdMax - 1)
      {
        ParseError("Identifier too long", NULL);
        break;
      }
    }
    while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
           || (ch >= '0' && ch <= '9') || ch == '_');
    lex.szId[cch] = '\000';
    *plex = lex;
    return fTrue;
  }
  return fFalse;
}

/***    Skip to end of C comment, if there is an end - else eat the input ***/
static BOOL
FSkipToEndOfCComment(void)
{
  while (*pchLex)
  {
    if ((pchLex[0] == '/') && (pchLex[1] == '*'))
    {
      WarningLine("nested comment");
      commentDepth++;
      pchLex += 2;
      return (fFalse);
    }

    if ((pchLex[0] == '*') && (pchLex[1] == '/'))
    {
      commentDepth--;
      pchLex += 2;
      if (commentDepth)
        return (fFalse);                         /* end of nested comment */
      else
        return (fTrue);                          /* true end of comment */
    }
    pchLex++;
  }

  return (fFalse);
}

#define SLT(ch, ltArg) case ch: lex.lt = ltArg; break;

BOOL
FGetLex(LEX * plex,
        BOOL fInComment)
{
  int ch;
  char szT[2];
  LEX lex;
  char *pchStore;

  lex.lt = plex->lt = ltNil;
  pchLexPrev = pchLex;
  if (!FSkipWhite())
    return fFalse;

  if (fInComment)
  {
    if (!FSkipToEndOfCComment())
      lex.lt = ltCComment;                       /* keep going with it */
    else
      lex.lt = ltEndCComment;                    /* ok, the comment is over */
  }
  else
  {
    pchStore = lex.szId;
    ch = *pchStore++ = *pchLex++;

    *pchStore = 0;
    switch (ch)
    {
        /*
         * BUG! could use a lookup table... 
         */
        /*
         * TODO logical operators 
         */
      SLT('.', ltPoint) SLT('+', ltPlus) SLT('-', ltMinus) SLT('*', ltMult) SLT('%', ltMod) SLT('(', ltLParen) SLT(')', ltRParen) SLT('[', ltLBracket) SLT(']', ltRBracket) SLT('{', ltLBrace) SLT('}', ltRBrace) SLT(',', ltComma) SLT('?', ltQuestion) SLT(':', ltColon) SLT('^', ltCaret) SLT('\\', ltBSlash) SLT('#', ltPound) SLT('@', ltAt) SLT(';', ltSemi) SLT('|', ltPipe) case '/':
        if (*pchLex == '/')
        {
          *pchStore++ = *pchLex++;
          *pchStore = 0;
          lex.lt = ltDoubleSlash;
        }
        else if (*pchLex == '*')
        {
          commentDepth = 1;
          pchLex++;
          if (!FSkipToEndOfCComment())
            lex.lt = ltCComment;
          else
            lex.lt = ltEndCComment;              /* return place holder token */
        }
        else
          lex.lt = ltDiv;
        break;
      case '<':
        if (*pchLex == '=')
        {
          *pchStore++ = *pchLex++;
          *pchStore = 0;
          lex.lt = ltLTE;
        }
        else if (*pchLex == '>')
        {
          *pchStore++ = *pchLex++;
          *pchStore = 0;
          lex.lt = ltNE;
        }
        else
          lex.lt = ltLT;
        break;
      case '>':
        if (*pchLex == '=')
        {
          *pchStore++ = *pchLex++;
          *pchStore = 0;
          lex.lt = ltGTE;
        }
        else
          lex.lt = ltGT;
        break;
      case '=':
        if (*pchLex == '=')
        {
          *pchStore++ = *pchLex++;
          *pchStore = 0;
          lex.lt = ltEQ;
        }
        else
          lex.lt = ltAssign;
        break;
      case '"':
        lex.lt = ltStr;
        pchStore = lex.szId;
        while (*pchLex != '"')
        {
          int n, tmp;

          n = (*pfnChkCode) (pchLex, &tmp);
          if (n >= 1)
          {
            while (n-- > 0)
              *pchStore++ = *pchLex++;
          }
          else if (*pchLex == '\\')
          {
            int ch;

            pchLex++;
            ch = *pchLex++;
            switch (ch)
            {
              case 'a':
                ch = '\a';
                break;

              case 'b':
                ch = '\b';
                break;

              case 'f':
                ch = '\f';
                break;

              case 'n':
                ch = '\n';
                break;

              case 'r':
                ch = '\r';
                break;

              case 't':
                ch = '\t';
                break;

              case 'v':
                ch = '\v';
                break;

              case '0':
              case '1':
              case '2':
              case '3':
              case '4':
              case '5':
              case '6':
              case '7':
                ch = ChParseOctal(ch);
                break;

              case 'x':
              case 'X':
                ch = ChParseHex(ch);
                break;

              case '\0':                        /* handle slash at the end of the line  */
                ch = '\\';
                break;

              case 'e':                         /* ESC, thank you Richard Brinkerhoff   */
                ch = '\033';
                break;

              case 'z':                         /* special control z'er                 */
                ch = 'z' & 0x1f;
                break;

              case '_':                         /* special ignore - turns to nothing    */
                ch = '\0';
                break;
            }

                                                /***    This program does not handle nulls in strings ***/
            if (ch)
              *pchStore++ = (char)ch;            /* gratuitous cast - Unicode alert!!!! */
          }
          else
            *pchStore++ = *pchLex++;
          if (pchStore - lex.szId == cchIdMax - 1)
          {
            ParseError("String too long", NULL);
            break;
          }
          if (*pchLex == 0)
          {
            ParseError("Unterminated string", NULL);
            break;
          }
        }
        pchLex++;
        *pchStore = 0;
        break;
      default:
        if (FParseConst(&lex, ch) || FParseId(&lex, ch))
        {
          /*
           * do nuthin...code is easier to read this way 
           */
        }
        else
        {
          szT[0] = (char)ch;
          szT[1] = '\000';
          ParseError("Unknown character: ", szT);
        }
        pchLex--;
        break;
    }
  }

  *plex = lex;
  return lex.lt != ltNil;
}

#define SPLT(lt, sz) case lt: printf(sz); break;

VOID
PrintLex(LEX * plex)
{
  plex = plex;
#ifdef FOO
  switch (plex->lt)
  {
    case ltConst:
      printf("%d ", plex->val);
      break;
    case ltId:
      printf("%s ", plex->szId);
      break;
      SPLT(ltPoint, ".");
      SPLT(ltPlus, "+");
      SPLT(ltMinus, "-");
      SPLT(ltMult, "*");
      SPLT(ltDiv, "/");
      SPLT(ltMod, "%");
      SPLT(ltLParen, "(");
      SPLT(ltRParen, ")");
      SPLT(ltLBracket, "[");
      SPLT(ltRBracket, "]");
      SPLT(ltLBrace, "{");
      SPLT(ltRBrace, "}");
      SPLT(ltComma, ",");
      SPLT(ltLT, "<");
      SPLT(ltGT, ">");
      SPLT(ltLTE, "<=");
      SPLT(ltGTE, ">=");
      SPLT(ltNE, "<>");
      SPLT(ltEQ, "==");
      SPLT(ltAssign, "=");
      SPLT(ltQuestion, "?");
      SPLT(ltColon, ":");
      SPLT(ltCaret, "^");
  }
#endif
}

/*
 * eof 
 */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -