aparser.cpp

来自「SRI international 发布的OAA框架软件」· C++ 代码 · 共 872 行 · 第 1/2 页

CPP
872
字号
/* ANTLRParser.C
 *
 * SOFTWARE RIGHTS
 *
 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
 * Set (PCCTS) -- PCCTS is in the public domain.  An individual or
 * company may do whatever they wish with source code distributed with
 * PCCTS or the code generated by PCCTS, including the incorporation of
 * PCCTS, or its output, into commerical software.
 *
 * We encourage users to develop software with PCCTS.  However, we do ask
 * that credit is given to us for developing PCCTS.  By "credit",
 * we mean that if you incorporate our source code into one of your
 * programs (commercial product, research project, or otherwise) that you
 * acknowledge this fact somewhere in the documentation, research report,
 * etc...  If you like PCCTS and have developed a nice tool with the
 * output, please mention that you developed it using PCCTS.  In
 * addition, we ask that this header remain intact in our source code.
 * As long as these guidelines are kept, we expect to continue enhancing
 * this system and expect to make other tools available as they are
 * completed.
 *
 * ANTLR 1.33
 * Terence Parr
 * Parr Research Corporation
 * with Purdue University and AHPCRC, University of Minnesota
 * 1989-2000
 */

#include "pcctscfg.h"

#include "pccts_stdlib.h"
#include "pccts_stdarg.h"
#include "pccts_string.h"
#include "pccts_stdio.h"

PCCTS_NAMESPACE_STD

/* I have to put this here due to C++ limitation
 * that you can't have a 'forward' decl for enums.
 * I hate C++!!!!!!!!!!!!!!!
 * Of course, if I could use real templates, this would go away.
 */
// MR1
// MR1  10-Apr-97  133MR1  Prevent use of varying sizes for the
// MR1  			ANTLRTokenType enum
// MR1

enum ANTLRTokenType { TER_HATES_CPP=0, ITS_TOO_COMPLICATED=9999};	    // MR1

#define ANTLR_SUPPORT_CODE

#include ATOKEN_H
#include ATOKENBUFFER_H
#include APARSER_H

static const int zzINF_DEF_TOKEN_BUFFER_SIZE = 2000;    /* MR14 */
static const int zzINF_BUFFER_TOKEN_CHUNK_SIZE = 1000;  /* MR14 */

                 /* L o o k a h e a d  M a c r o s */

/* maximum of 32 bits/unsigned int and must be 8 bits/byte;
 * we only use 8 bits of it.
 */
SetWordType ANTLRParser::bitmask[sizeof(SetWordType)*8] = {
	0x00000001, 0x00000002, 0x00000004, 0x00000008,
	0x00000010, 0x00000020, 0x00000040, 0x00000080
};

char ANTLRParser::eMsgBuffer[500] = "";

ANTLRParser::
~ANTLRParser()
{
	delete [] token_type;
    delete [] zzFAILtext;       // MR16 Manfred Kogler
}

ANTLRParser::
ANTLRParser(ANTLRTokenBuffer *_inputTokens,
			int k,
			int use_inf_look,
			int dlook,
			int ssize)
{
	LLk = k;
	can_use_inf_look = use_inf_look;
/* MR14 */    if (dlook != 0) {
/* MR14 */      panic("ANTLRParser::ANTLRParser - Demand lookahead not supported in C++ mode");
/* MR14 */
/* MR14 */    };
    demand_look = 0;    /* demand_look = dlook; */
    bsetsize = ssize;
	guessing = 0;
	token_tbl = NULL;
	eofToken = (ANTLRTokenType)1;

	// allocate lookahead buffer
	token_type = new ANTLRTokenType[LLk];
	lap = 0;
	labase = 0;
#ifdef ZZDEFER_FETCH
	stillToFetch = 0;                                                   // MR19
#endif
	dirty = 0;
    inf_labase = 0;                                                     // MR7
    inf_last = 0;                                                       // MR7
	/* prime lookahead buffer, point to inputTokens */
	this->inputTokens = _inputTokens;
	this->inputTokens->setMinTokens(k);
	_inputTokens->setParser(this);					                    // MR1
    resynchConsumed=1;                                                  // MR8
    zzFAILtext=NULL;                                                    // MR9
    traceOptionValueDefault=0;                                          // MR10
    traceReset();                                                       // MR10
    zzGuessSeq=0;                                                       // MR10
    syntaxErrCount=0;                                                   // MR11
}

void ANTLRParser::init()
{
   prime_lookahead();
   resynchConsumed=1;                                                   // MR8
   traceReset();                                                        // MR10
}

void ANTLRParser::traceReset()
{
   traceOptionValue=traceOptionValueDefault;
   traceGuessOptionValue=1;
   traceCurrentRuleName=NULL;
   traceDepth=0;
}


#ifdef _MSC_VER  // MR23
//Turn off warning:
//interaction between '_setjmp' and C++ object destruction is non-portable
#pragma warning(disable : 4611)
#endif
int ANTLRParser::
guess(ANTLRParserState *st)
{
	saveState(st);
	guessing = 1;
	return setjmp(guess_start.state);
}
#ifdef _MSC_VER  // MR23
#pragma warning(default: 4611)
#endif

void ANTLRParser::
saveState(ANTLRParserState *buf)
{
	buf->guess_start = guess_start;
	buf->guessing = guessing;
	buf->inf_labase = inf_labase;
	buf->inf_last = inf_last;
	buf->dirty = dirty;
    buf->traceOptionValue=traceOptionValue;            /* MR10 */
    buf->traceGuessOptionValue=traceGuessOptionValue;  /* MR10 */
    buf->traceCurrentRuleName=traceCurrentRuleName;    /* MR10 */
    buf->traceDepth=traceDepth;                        /* MR10 */
}

void ANTLRParser::
restoreState(ANTLRParserState *buf)
{
	int     i;
    int     prevTraceOptionValue;

	guess_start = buf->guess_start;
	guessing = buf->guessing;
	inf_labase = buf->inf_labase;
	inf_last = buf->inf_last;
	dirty = buf->dirty;

	// restore lookahead buffer from k tokens before restored TokenBuffer position
	// if demand_look, then I guess we don't look backwards for these tokens.
	for (i=1; i<=LLk; i++) token_type[i-1] =
		inputTokens->bufferedToken(i-LLk)->getType();
	lap = 0;
	labase = 0;

    /* MR10 */

    prevTraceOptionValue=traceOptionValue;
    traceOptionValue=buf->traceOptionValue;
    if ( (prevTraceOptionValue > 0) !=
             (traceOptionValue > 0)) {
      if (traceCurrentRuleName != NULL) {  /* MR21 */
          if (traceOptionValue > 0) {
            /* MR23 */ printMessage(stderr,
                   "trace enable restored in rule %s depth %d\n",
                   traceCurrentRuleName,
                   traceDepth);
          };
          if (traceOptionValue <= 0) {
            /* MR23 */ printMessage(stderr,
            "trace disable restored in rule %s depth %d\n",
            traceCurrentRuleName, /* MR21 */
            traceDepth);
          };
       }
    };
    traceGuessOptionValue=buf->traceGuessOptionValue;
    traceCurrentRuleName=buf->traceCurrentRuleName;
    traceDepth=buf->traceDepth;
    traceGuessDone(buf);
}

/* Get the next symbol from the input stream; put it into lookahead buffer;
 * fill token_type[] fast reference cache also.  NLA is the next place where
 * a lookahead ANTLRAbstractToken should go.
 */
void ANTLRParser::
consume()
{

#ifdef ZZDEBUG_CONSUME_ACTION
    zzdebug_consume_action();
#endif

// MR19 V.H. Simonis
//      Defer Fetch feature
//      Moves action of consume() into LA() function

#ifdef ZZDEFER_FETCH
      stillToFetch++;
#else
      NLA = inputTokens->getToken()->getType();
      dirty--;
      lap = (lap+1)&(LLk-1);
#endif

}

_ANTLRTokenPtr ANTLRParser::
LT(int i)
{

// MR19 V.H. Simonis
//      Defer Fetch feature
//      Moves action of consume() into LA() function

#ifdef ZZDEFER_FETCH
    undeferFetch();
#endif

#ifdef DEBUG_TOKENBUFFER
	if ( i >= inputTokens->bufferSize() || inputTokens->minTokens() < LLk )     /* MR20 Was "<=" */
	{
		char buf[2000];                 /* MR20 Was "static" */
        sprintf(buf, "The minimum number of tokens you requested that the\nANTLRTokenBuffer buffer is not enough to satisfy your\nLT(%d) request; increase 'k' argument to constructor for ANTLRTokenBuffer\n", i);
		panic(buf);
	}
#endif
	return inputTokens->bufferedToken(i-LLk);
}

void
ANTLRParser::
look(int k)
{
	int i, c = k - (LLk-dirty);
	for (i=1; i<=c; i++) consume();
}

/* fill the lookahead buffer up with k symbols (even if DEMAND_LOOK);
 */
void
ANTLRParser::
prime_lookahead()
{
	int i;
	for(i=1;i<=LLk; i++) consume();
	dirty=0;
	// lap = 0;     // MR14 Sinan Karasu (sinan.karasu@boeing.com)
	// labase = 0;  // MR14
    labase=lap;     // MR14
}

/* check to see if the current input symbol matches '_t'.
 * During NON demand lookahead mode, dirty will always be 0 and
 * hence the extra code for consuming tokens in _match is never
 * executed; the same routine can be used for both modes.
 */
int ANTLRParser::
_match(ANTLRTokenType _t, ANTLRChar **MissText,
	   ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok,
	   SetWordType **MissSet)
{
	if ( dirty==LLk ) {
		consume();
	}
	if ( LA(1)!=_t ) {
		*MissText=NULL;
		*MissTok= _t;
		*BadTok = LT(1);
		*MissSet=NULL;
		return 0;
	}
	dirty++;
	labase = (labase+1)&(LLk-1);	// labase maintained even if !demand look
	return 1;
}

/* check to see if the current input symbol matches '_t'.
 * Used during exception handling.
 */
int ANTLRParser::
_match_wsig(ANTLRTokenType _t)
{
	if ( dirty==LLk ) {
		consume();
	}
	if ( LA(1)!=_t ) return 0;
	dirty++;
	labase = (labase+1)&(LLk-1);	// labase maintained even if !demand look
	return 1;
}

/* check to see if the current input symbol matches any token in a set.
 * During NON demand lookahead mode, dirty will always be 0 and
 * hence the extra code for consuming tokens in _match is never
 * executed; the same routine can be used for both modes.
 */
int ANTLRParser::
_setmatch(SetWordType *tset, ANTLRChar **MissText,
	   ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok,
	   SetWordType **MissSet, SetWordType *tokclassErrset)
{
	if ( dirty==LLk ) {
		consume();
	}
	if ( !set_el(LA(1), tset) ) {
		*MissText=NULL;										/* MR23 */
		*MissTok=(ANTLRTokenType) 0;						/* MR23 */
		*BadTok=LT(1);										/* MR23 */
		*MissSet=tokclassErrset;							/* MR23 */
		return 0;
	}
	dirty++;
	labase = (labase+1)&(LLk-1);	// labase maintained even if !demand look
	return 1;
}

int ANTLRParser::
_setmatch_wsig(SetWordType *tset)
{
	if ( dirty==LLk ) {
		consume();
	}
	if ( !set_el(LA(1), tset) ) return 0;
	dirty++;
	labase = (labase+1)&(LLk-1);	// labase maintained even if !demand look
	return 1;
}

                   /* Exception handling routines */
//
//  7-Apr-97 133MR1
//   	     Change suggested by Eli Sternheim (eli@interhdl.com)
//
void ANTLRParser::
consumeUntil(SetWordType *st)
{
	ANTLRTokenType		tmp;	                        				// MR1
	const			int Eof=1;                                          // MR1
	while ( !set_el( (tmp=LA(1)), st) && tmp!=Eof) { consume(); }       // MR1
}

//
//  7-Apr-97 133MR1
//   	     Change suggested by Eli Sternheim (eli@interhdl.com)
//
void ANTLRParser::
consumeUntilToken(int t)
{
	int	tmp;                                                            // MR1
	const	int Eof=1;                                                  // MR1
	while ( (tmp=LA(1)) !=t && tmp!=Eof) { consume(); }                 // MR1
}


                        /* Old error stuff */

void ANTLRParser::
resynch(SetWordType *wd,SetWordType mask)
{

/* MR8              S.Bochnak@microtool.com.pl                          */
/* MR8              Change file scope static "consumed" to instance var */

	/* if you enter here without having consumed a token from last resynch
	 * force a token consumption.
	 */
/* MR8 */  	if ( !resynchConsumed ) {consume(); resynchConsumed=1; return;}

   	/* if current token is in resynch set, we've got what we wanted */

/* MR8 */  	if ( wd[LA(1)]&mask || LA(1) == eofToken ) {resynchConsumed=0; return;}
	
   	/* scan until we find something in the resynch set */

        	while ( !(wd[LA(1)]&mask) && LA(1) != eofToken ) {consume();}

/* MR8 */	resynchConsumed=1;
}

/* standard error reporting function that assumes DLG-based scanners;
 * you should redefine in subclass to change it or if you use your
 * own scanner.
 */

/* MR23 THM There appears to be a parameter "badText" passed to syn()
            which is not present in the parameter list.  This may be
            because in C mode there is no attribute function which
            returns the text, so the text representation of the token
            must be passed explicitly.  I think.
*/
           
void ANTLRParser::
syn(_ANTLRTokenPtr /*tok MR23*/, ANTLRChar *egroup, SetWordType *eset,
	ANTLRTokenType etok, int k)
{
	int line;

	line = LT(1)->getLine();

    syntaxErrCount++;                                   /* MR11 */

    /* MR23  If the token is not an EOF token, then use the ->getText() value.

             If the token is the EOF token the text returned by ->getText() 
             may be garbage.  If the text from the token table is "@" use

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?