misc.c

来自「SRI international 发布的OAA框架软件」· C语言 代码 · 共 1,865 行 · 第 1/4 页

C
1,865
字号
/*
 * misc.c
 *
 * Manage tokens, regular expressions.
 * Print methods for debugging
 * Compute follow lists onto tail ends of rules.
 *
 * The following functions are visible:
 *
 *		int		addTname(char *);		Add token name
 *		int		addTexpr(char *);		Add token expression
 *		int		Tnum(char *);			Get number of expr/token
 *		void	Tklink(char *, char *);	Link a name with an expression
 *		int		hasAction(expr);		Does expr already have action assigned?
 *		void	setHasAction(expr);		Indicate that expr now has an action
 *		Entry	*newEntry(char *,int);	Create new table entry with certain size
 *		void	list_add(ListNode **list, char *e)
 *      void    list_free(ListNode **list, int freeData);   *** MR10 ***
 *		void	list_apply(ListNode *list, void (*f)())
 *		void	lexclass(char *m);		switch to new/old lexical class
 *		void	lexmode(int i);			switch to old lexical class i
 *
 * SOFTWARE RIGHTS
 *
 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
 * Set (PCCTS) -- PCCTS is in the public domain.  An individual or
 * company may do whatever they wish with source code distributed with
 * PCCTS or the code generated by PCCTS, including the incorporation of
 * PCCTS, or its output, into commerical software.
 *
 * We encourage users to develop software with PCCTS.  However, we do ask
 * that credit is given to us for developing PCCTS.  By "credit",
 * we mean that if you incorporate our source code into one of your
 * programs (commercial product, research project, or otherwise) that you
 * acknowledge this fact somewhere in the documentation, research report,
 * etc...  If you like PCCTS and have developed a nice tool with the
 * output, please mention that you developed it using PCCTS.  In
 * addition, we ask that this header remain intact in our source code.
 * As long as these guidelines are kept, we expect to continue enhancing
 * this system and expect to make other tools available as they are
 * completed.
 *
 * ANTLR 1.33
 * Terence Parr
 * Parr Research Corporation
 * with Purdue University and AHPCRC, University of Minnesota
 * 1989-2001
 */

#include <stdio.h>
#include "pcctscfg.h"
#include "set.h"
#include "syn.h"
#include "hash.h"
#include "generic.h"
#include "dlgdef.h"
#include <ctype.h>

static int tsize=TSChunk;		/* size of token str arrays */

static void
#ifdef __USE_PROTOS
RemapForcedTokensInSyntaxDiagram(Node *);
#else
RemapForcedTokensInSyntaxDiagram();
#endif

				/* T o k e n  M a n i p u l a t i o n */

/*
 * add token 't' to the TokenStr/Expr array.  Make more room if necessary.
 * 't' is either an expression or a token name.
 *
 * There is only one TokenStr array, but multiple ExprStr's.  Therefore,
 * for each lex class (element of lclass) we must extend the ExprStr array.
 * ExprStr's and TokenStr are always all the same size.
 *
 * Also, there is a Texpr hash table for each automaton.
 */
static void
#ifdef __USE_PROTOS
Ttrack( char *t )
#else
Ttrack( t )
char *t;
#endif
{
	if ( TokenNum >= tsize )	/* terminal table overflow? */
	{
		char **p;
		int i, more, j;

		more = TSChunk * (1 + ((TokenNum-tsize) / TSChunk));
		tsize += more;
		TokenStr = (char **) realloc((char *)TokenStr, tsize*sizeof(char *));
		require(TokenStr != NULL, "Ttrack: can't extend TokenStr");
		for (i=0; i<NumLexClasses; i++)
		{
			lclass[i].exprs = (char **)
							  realloc((char *)lclass[i].exprs, tsize*sizeof(char *));
			require(lclass[i].exprs != NULL, "Ttrack: can't extend ExprStr");
			for (p= &lclass[i].exprs[tsize-more],j=1; j<=more; j++) *p++ = NULL;
		}
		for (p= &TokenStr[tsize-more],i=1; i<=more; i++) *p++ = NULL;
		lexmode( CurrentLexClass ); /* reset ExprStr in case table moved */
	}
	/* note: we use the actual ExprStr/TokenStr array
	 * here as TokenInd doesn't exist yet
	 */
	if ( *t == '"' ) ExprStr[TokenNum] = t;
	else TokenStr[TokenNum] = t;
}

static Expr *
#ifdef __USE_PROTOS
newExpr( char *e )
#else
newExpr( e )
char *e;
#endif
{
	Expr *p = (Expr *) calloc(1, sizeof(Expr));
	require(p!=NULL, "newExpr: cannot alloc Expr node");

	p->expr = e;
	p->lclass = CurrentLexClass;
	return p;
}

/* switch to lexical class/mode m.  This amounts to creating a new
 * lex mode if one does not already exist and making ExprStr point
 * to the correct char string array.  We must also switch Texpr tables.
 *
 * BTW, we need multiple ExprStr arrays because more than one automaton
 * may have the same label for a token, but with different expressions.
 * We need to track an expr for each automaton.  If we disallowed this
 * feature, only one ExprStr would be required.
 */
void
#ifdef __USE_PROTOS
lexclass( char *m )
#else
lexclass( m )
char *m;
#endif
{
	int i;
	TermEntry *p;
	static char EOFSTR[] = "\"@\"";

	if ( hash_get(Tname, m) != NULL )
	{
		warn(eMsg1("lexclass name conflicts with token/errclass label '%s'",m));
	}
	/* does m already exist? */
	i = LexClassIndex(m);
	if ( i != -1 ) {lexmode(i); return;}
	/* must make new one */
	NumLexClasses++;
	CurrentLexClass = NumLexClasses-1;
	require(NumLexClasses<=MaxLexClasses, "number of allowable lexclasses exceeded\nIncrease MaxLexClasses in generic.h and recompile all C files");
	lclass[CurrentLexClass].classnum = m;
	lclass[CurrentLexClass].exprs = (char **) calloc(tsize, sizeof(char *));
	require(lclass[CurrentLexClass].exprs!=NULL,
			"lexclass: cannot allocate ExprStr");
	lclass[CurrentLexClass].htable = newHashTable();
	ExprStr = lclass[CurrentLexClass].exprs;
	Texpr = lclass[CurrentLexClass].htable;
	/* define EOF for each automaton */
	p = newTermEntry( EOFSTR );
	p->token = EofToken;	/* couldn't have remapped tokens yet, use EofToken */
	hash_add(Texpr, EOFSTR, (Entry *)p);
	list_add(&ExprOrder, (void *)newExpr(EOFSTR));
	/* note: we use the actual ExprStr array
	 * here as TokenInd doesn't exist yet
	 */
	ExprStr[EofToken] = EOFSTR;
}

void
#ifdef __USE_PROTOS
lexmode( int i )
#else
lexmode( i )
int i;
#endif
{
	require(i<NumLexClasses, "lexmode: invalid mode");
	ExprStr = lclass[i].exprs;
	Texpr = lclass[i].htable;
	CurrentLexClass = i;
}

/* return index into lclass array of lexical class. return -1 if nonexistent */
int
#ifdef __USE_PROTOS
LexClassIndex( char *cl )
#else
LexClassIndex( cl )
char *cl;
#endif
{
	int i;

	for (i=0; i<NumLexClasses; i++)
	{
		if ( strcmp(lclass[i].classnum, cl) == 0 ) return i;
	}
	return -1;
}

int
#ifdef __USE_PROTOS
hasAction( char *expr )
#else
hasAction( expr )
char *expr;
#endif
{
	TermEntry *p;
	require(expr!=NULL, "hasAction: invalid expr");

	p = (TermEntry *) hash_get(Texpr, expr);
	require(p!=NULL, eMsg1("hasAction: expr '%s' doesn't exist",expr));
	return (p->action!=NULL);
}

void
#ifdef __USE_PROTOS
setHasAction( char *expr, char *action )
#else
setHasAction( expr, action )
char *expr;
char *action;
#endif
{
	TermEntry *p;
	require(expr!=NULL, "setHasAction: invalid expr");

	p = (TermEntry *) hash_get(Texpr, expr);
	require(p!=NULL, eMsg1("setHasAction: expr '%s' doesn't exist",expr));
	p->action = action;
}

ForcedToken *
#ifdef __USE_PROTOS
newForcedToken(char *token, int tnum)
#else
newForcedToken(token, tnum)
char *token;
int tnum;
#endif
{
	ForcedToken *ft = (ForcedToken *) calloc(1, sizeof(ForcedToken));
	require(ft!=NULL, "out of memory");
	ft->token = token;
	ft->tnum = tnum;
	return ft;
}

/*
 * Make a token indirection array that remaps token numbers and then walk
 * the appropriate symbol tables and SynDiag to change token numbers
 */
void
#ifdef __USE_PROTOS
RemapForcedTokens(void)
#else
RemapForcedTokens()
#endif
{
	ListNode *p;
	ForcedToken *q;
	int max_token_number=0;     /* MR9 23-Sep-97 Removed "unsigned" */
	int i;

	if ( ForcedTokens == NULL ) return;

	/* find max token num */
	for (p = ForcedTokens->next; p!=NULL; p=p->next)
	{
		q = (ForcedToken *) p->elem;
		if ( q->tnum > max_token_number ) max_token_number = q->tnum;
	}
	fprintf(stderr, "max token number is %d\n", max_token_number);

	/* make token indirection array */
	TokenInd = (int *) calloc(max_token_number+1, sizeof(int));
	LastTokenCounted = TokenNum;
	TokenNum = max_token_number+1;
	require(TokenInd!=NULL, "RemapForcedTokens: cannot allocate TokenInd");

	/* fill token indirection array and change token id htable ; swap token indices */
	for (i=1; i<TokenNum; i++) TokenInd[i] = i;
	for (p = ForcedTokens->next; p!=NULL; p=p->next)
	{
		TermEntry *te;
		int old_pos, t;

		q = (ForcedToken *) p->elem;
		fprintf(stderr, "%s forced to %d\n", q->token, q->tnum);
		te = (TermEntry *) hash_get(Tname, q->token);
		require(te!=NULL, "RemapForcedTokens: token not in hash table");
		old_pos = te->token;
		fprintf(stderr, "Before: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]);
		fprintf(stderr, "Before: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]);
		q = (ForcedToken *) p->elem;
		t = TokenInd[old_pos];
		TokenInd[old_pos] = q->tnum;
		TokenInd[q->tnum] = t;
		te->token = q->tnum;		/* update token type id symbol table */
		fprintf(stderr, "After: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]);
		fprintf(stderr, "After: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]);

		/* Change the token number in the sym tab entry for the exprs
		 * at the old position of the token id and the target position
		 */
		/* update expr at target (if any) of forced token id */
		if ( q->tnum < TokenNum )	/* is it a valid position? */
		{
			for (i=0; i<NumLexClasses; i++)
			{
				if ( lclass[i].exprs[q->tnum]!=NULL )
				{
					/* update the symbol table for this expr */
					TermEntry *e = (TermEntry *) hash_get(lclass[i].htable, lclass[i].exprs[q->tnum]);
					require(e!=NULL, "RemapForcedTokens: expr not in hash table");
					e->token = old_pos;
					fprintf(stderr, "found expr '%s' at target %d in lclass[%d]; changed to %d\n",
							lclass[i].exprs[q->tnum], q->tnum, i, old_pos);
				}
			}
		}
		/* update expr at old position (if any) of forced token id */
		for (i=0; i<NumLexClasses; i++)
		{
			if ( lclass[i].exprs[old_pos]!=NULL )
			{
				/* update the symbol table for this expr */
				TermEntry *e = (TermEntry *) hash_get(lclass[i].htable, lclass[i].exprs[old_pos]);
				require(e!=NULL, "RemapForcedTokens: expr not in hash table");
				e->token = q->tnum;
				fprintf(stderr, "found expr '%s' for id %s in lclass[%d]; changed to %d\n",
						lclass[i].exprs[old_pos], q->token, i, q->tnum);
			}
		}
	}

	/* Update SynDiag */
	RemapForcedTokensInSyntaxDiagram((Node *)SynDiag);
}

static void
#ifdef __USE_PROTOS
RemapForcedTokensInSyntaxDiagram(Node *p)
#else
RemapForcedTokensInSyntaxDiagram(p)
Node *p;
#endif
{
	Junction *j = (Junction *) p;
	RuleRefNode *r = (RuleRefNode *) p;
	TokNode *t = (TokNode *)p;

	if ( p==NULL ) return;
	require(p->ntype>=1 && p->ntype<=NumNodeTypes,	"Remap...: invalid diagram node");
	switch ( p->ntype )
	{
		case nJunction :
			if ( j->visited ) return;
			if ( j->jtype == EndRule ) return;
			j->visited = TRUE;
			RemapForcedTokensInSyntaxDiagram( j->p1 );
			RemapForcedTokensInSyntaxDiagram( j->p2 );
			j->visited = FALSE;
			return;
		case nRuleRef :
			RemapForcedTokensInSyntaxDiagram( r->next );
			return;
		case nToken :
			if ( t->remapped ) return;	/* we've been here before */
			t->remapped = 1;
			fprintf(stderr, "remapping %d to %d\n", t->token, TokenInd[t->token]);
			t->token = TokenInd[t->token];
			RemapForcedTokensInSyntaxDiagram( t->next );
			return;
		case nAction :
			RemapForcedTokensInSyntaxDiagram( ((ActionNode *)p)->next );
			return;
		default :
			fatal_internal("invalid node type");
	}
}

/*
 * Add a token name.  Return the token number associated with it.  If it already
 * exists, then return the token number assigned to it.
 *
 * Track the order in which tokens are found so that the DLG output maintains
 * that order.  It also lets us map token numbers to strings.
 */
int
#ifdef __USE_PROTOS
addTname( char *token )
#else
addTname( token )
char *token;
#endif
{
	TermEntry *p;
	require(token!=NULL, "addTname: invalid token name");

	if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token;
	p = newTermEntry( token );
	Ttrack( p->str );
	p->token = TokenNum++;
	hash_add(Tname, token, (Entry *)p);
	return p->token;
}

/* This is the same as addTname except we force the TokenNum to be tnum.
 * We don't have to use the Forced token stuff as no tokens will have
 * been defined with #tokens when this is called.  This is only called
 * when a #tokdefs meta-op is used.
 */
int
#ifdef __USE_PROTOS
addForcedTname( char *token, int tnum )
#else
addForcedTname( token, tnum )
char *token;
int tnum;
#endif
{
	TermEntry *p;
	require(token!=NULL, "addTname: invalid token name");

	if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token;
	p = newTermEntry( token );
	Ttrack( p->str );
	p->token = tnum;
	hash_add(Tname, token, (Entry *)p);
	return p->token;
}

/*
 * Add a token expr.  Return the token number associated with it.  If it already
 * exists, then return the token number assigned to it.
 */
int
#ifdef __USE_PROTOS
addTexpr( char *expr )
#else
addTexpr( expr )
char *expr;
#endif
{
	TermEntry *p;
	require(expr!=NULL, "addTexpr: invalid regular expression");

	if ( (p=(TermEntry *)hash_get(Texpr, expr)) != NULL ) return p->token;
	p = newTermEntry( expr );
	Ttrack( p->str );
	/* track the order in which they occur */
	list_add(&ExprOrder, (void *)newExpr(p->str));
	p->token = TokenNum++;
	hash_add(Texpr, expr, (Entry *)p);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?