bits.c

来自「SRI international 发布的OAA框架软件」· C语言 代码 · 共 1,026 行 · 第 1/2 页

C
1,026
字号
/* bits.c -- manage creation and output of bit sets used by the parser.
 *
 * SOFTWARE RIGHTS
 *
 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
 * Set (PCCTS) -- PCCTS is in the public domain.  An individual or
 * company may do whatever they wish with source code distributed with
 * PCCTS or the code generated by PCCTS, including the incorporation of
 * PCCTS, or its output, into commerical software.
 *
 * We encourage users to develop software with PCCTS.  However, we do ask
 * that credit is given to us for developing PCCTS.  By "credit",
 * we mean that if you incorporate our source code into one of your
 * programs (commercial product, research project, or otherwise) that you
 * acknowledge this fact somewhere in the documentation, research report,
 * etc...  If you like PCCTS and have developed a nice tool with the
 * output, please mention that you developed it using PCCTS.  In
 * addition, we ask that this header remain intact in our source code.
 * As long as these guidelines are kept, we expect to continue enhancing
 * this system and expect to make other tools available as they are
 * completed.
 *
 * ANTLR 1.33
 * Terence Parr
 * Parr Research Corporation
 * with Purdue University and AHPCRC, University of Minnesota
 * 1989-2001
 */

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <assert.h>
#include "pcctscfg.h"
#include "set.h"
#include "syn.h"
#include "hash.h"
#include "generic.h"
#include "dlgdef.h"

/* char is only thing that is pretty much always known == 8 bits
 * This allows output of antlr (set stuff, anyway) to be androgynous (portable)
 */
typedef unsigned char SetWordType;
#define BitsPerByte		8
#define BitsPerWord		BitsPerByte*sizeof(SetWordType)

static SetWordType *setwd = NULL;
int setnum = -1;
int wordnum = 0;

int esetnum = 0;

/* Used to convert native wordsize, which ANTLR uses (via set.c) to manipulate sets,
   to bytes that are most portable size-wise.
   */
void
#ifdef __USE_PROTOS
DumpIntAsChars( FILE *f, char *format, unsigned wd )
#else
DumpIntAsChars( f, format, wd )
FILE *f;
char *format;
unsigned wd;
#endif
{
	int i;
	/* uses max of 32 bit unsigned integer for the moment */
	static unsigned long byte_mask[sizeof(unsigned long)] =
				{ 0xFF, 0xFF00UL, 0xFF0000UL, 0xFF000000UL };  /* MR20 G. Hobbelt */
/*				  0xFF00000000, 0xFF0000000000, 0xFF000000000000, 0xFF00000000000000 };*/

	/* for each byte in the word */
	assert(sizeof(unsigned) <= 4); /* M20 G. Hobbelt Sanity check */
	for (i=0; i<sizeof(unsigned); i++)
	{
		/* mask out the ith byte and shift down to the first 8 bits */
		fprintf(f, format, (wd&byte_mask[i])>>(i*BitsPerByte));
		if ( i<sizeof(unsigned)-1) fprintf(f, ",");
	}
}

/* Create a new setwd (ignoring [Ep] token on end) */
void
#ifdef __USE_PROTOS
NewSetWd( void )
#else
NewSetWd( )
#endif
{
	SetWordType *p;

	if ( setwd == NULL )
	{
		setwd = (SetWordType *) calloc(TokenNum, sizeof(SetWordType));
		require(setwd!=NULL, "NewSetWd: cannot alloc set wd\n");
	}
	for (p = setwd; p<&(setwd[TokenNum]); p++)  {*p=0;}
	wordnum++;
}

void
#ifdef __USE_PROTOS
DumpSetWd( void )
#else
DumpSetWd( )
#endif
{
	if ( GenCC ) DumpSetWdForCC();
	else DumpSetWdForC();
}

/* Dump the current setwd to ErrFile. 0..MaxTokenVal */
void
#ifdef __USE_PROTOS
DumpSetWdForC( void )
#else
DumpSetWdForC( )
#endif
{
	int i,c=1;

	if ( setwd==NULL ) return;
	fprintf(DefFile, "extern SetWordType setwd%d[];\n", wordnum);
	fprintf(ErrFile,
			"SetWordType setwd%d[%d] = {", wordnum, TokenNum-1);
	for (i=0; i<TokenNum-1; i++)
	{
		DAWDLE;
		if ( i!=0 ) fprintf(ErrFile, ",");
		if ( c == 8 ) {fprintf(ErrFile, "\n\t"); c=1;} else c++;
		fprintf(ErrFile, "0x%x", setwd[i]);
	}
	fprintf(ErrFile, "};\n");
}

/* Dump the current setwd to Parser.C file. 0..MaxTokenVal;
 * Only used if -CC on.
 */
void
#ifdef __USE_PROTOS
DumpSetWdForCC( void )
#else
DumpSetWdForCC( )
#endif
{
	int i,c=1;

	if ( setwd==NULL ) return;
	fprintf(Parser_h, "\tstatic SetWordType setwd%d[%d];\n", wordnum, TokenNum-1);
	fprintf(Parser_c,
			"SetWordType %s::setwd%d[%d] = {", CurrentClassName, wordnum,
			TokenNum-1);
	for (i=0; i<TokenNum-1; i++)
	{
		DAWDLE;
		if ( i!=0 ) fprintf(Parser_c, ",");
		if ( c == 8 ) {fprintf(Parser_c, "\n\t"); c=1;} else c++;
		fprintf(Parser_c, "0x%x", setwd[i]);
	}
	fprintf(Parser_c, "};\n");
}

/* Make a new set.  Dump old setwd and create new setwd if current setwd is full */
void
#ifdef __USE_PROTOS
NewSet( void )
#else
NewSet( )
#endif
{
	setnum++;
	if ( setnum==BitsPerWord )		/* is current setwd full? */
	{
		DumpSetWd(); NewSetWd(); setnum = 0;
	}
}

/* s is a set of tokens.  Turn on bit at each token position in set 'setnum' */
void
#ifdef __USE_PROTOS
FillSet( set s )
#else
FillSet( s )
set s;
#endif
{
	SetWordType mask=(((unsigned)1)<<setnum);
	unsigned int e;

	while ( !set_nil(s) )
	{
		e = set_int(s);
		set_rm(e, s);
		setwd[e] |= mask;
	}
}

					/* E r r o r  C l a s s  S t u f f */

/* compute the FIRST of a rule for the error class stuff */
static set
#ifdef __USE_PROTOS
Efirst( char *rule, ECnode *eclass )
#else
Efirst( rule, eclass )
char *rule;
ECnode *eclass;
#endif
{
	set rk, a;
	Junction *r;
	RuleEntry *q = (RuleEntry *) hash_get(Rname, rule);

	if ( q == NULL )
	{
		warnNoFL(eMsg2("undefined rule '%s' referenced in errclass '%s'; ignored",
						rule, TokenString(eclass->tok)));
		return empty;
	}
	r = RulePtr[q->rulenum];
	r->end->halt = TRUE;		/* don't let reach fall off end of rule here */
	rk = empty;
	REACH(r, 1, &rk, a);
	r->end->halt = FALSE;
	return a;
}

/*
 * scan the list of tokens/eclasses/nonterminals filling the new eclass
 * with the set described by the list.  Note that an eclass can be
 * quoted to allow spaces etc... However, an eclass must not conflict
 * with a reg expr found elsewhere.  The reg expr will be taken over
 * the eclass name.
 */
static void
#ifdef __USE_PROTOS
doEclass( char *eclass )
#else
doEclass( eclass )
char *eclass;
#endif
{
	TermEntry *q;
	ECnode *p;
	TCnode *tcnode;
	ListNode *e;
	unsigned int t;
	unsigned deg=0;
	set a;
	require(eclass!=NULL, "doEclass: NULL eset");
	
	p = (ECnode *) eclass;
	lexmode(p->lexclass);	/* switch to lexclass where errclass is defined */
	p->eset = empty;
	for (e = (p->elist)->next; e!=NULL; e=e->next)
	{
		q = NULL;								/* MR23 */

		if ( islower( *((char *)e->elem) ) )	/* is it a rule ref? (alias FIRST request) */
		{
			a = Efirst((char *)e->elem, p);
			set_orin(&p->eset, a);
			deg += set_deg(a);
			set_free( a );
			continue;
		}
		else if ( *((char *)e->elem)=='"' )
		{
			t = 0;
			q = (TermEntry *) hash_get(Texpr, (char *) e->elem);
			if ( q == NULL )
			{
				/* if quoted and not an expr look for eclass name */
				q = (TermEntry *) hash_get(Tname, *((char **)&(e->elem))=StripQuotes((char *)e->elem));
				if ( q != NULL ) t = q->token;
			}
			else t = q->token;
		}
		else	/* labelled token/eclass/tokclass */
		{
			q = (TermEntry *) hash_get(Tname, (char *)e->elem);
			if ( q != NULL )
			{
				if ( strcmp((char *)e->elem, TokenString(p->tok))==0 )
				{
					warnNoFL(eMsg1("self-referential error class '%s'; ignored",
								   (char *)e->elem));
					continue;
				}
				else
					t = q->token;
			}
			else t=0;
		}
		if ( t!=0 )
		{
			if (isTermEntryTokClass(q))  {			/* MR23 */
			    tcnode = q->tclass;					/* MR23 */
				set_orin(&p->eset, tcnode->tset);	/* MR23 */
				deg = set_deg(p->eset);				/* MR23 */
			}										/* MR23 */
			else {
				set_orel(t, &p->eset);
				deg++;
			}
		}
		else warnNoFL(eMsg2("undefined token '%s' referenced in errclass '%s'; ignored",
							(char *)e->elem, TokenString(p->tok)));
	}
	p->setdeg = deg;
}

void
#ifdef __USE_PROTOS
ComputeErrorSets( void )
#else
ComputeErrorSets( )
#endif
{
#ifdef __cplusplus
    list_apply(eclasses, (void (*)(void *)) doEclass);
#else
#ifdef __USE_PROTOS
    list_apply(eclasses, (void (*)(void *)) doEclass);
#else
    list_apply(eclasses, doEclass);
#endif
#endif
}

void
#ifdef __USE_PROTOS
ComputeTokSets( void )
#else
ComputeTokSets( )
#endif
{
	ListNode *t, *e = NULL, *e1, *e2;
	int something_changed;
    int i;
	TCnode *p;
	TermEntry *q, *q1, *q2;

	if ( tclasses == NULL ) return;

	/* turn lists of token/tokclass references into sets */
	for (t = tclasses->next; t!=NULL; t=t->next)
	{
		p = (TCnode *) t->elem;

		/* if wild card, then won't have entries in tclass, assume all_tokens */
		if ( p->tok == WildCardToken )
		{
			p->tset = set_dup(all_tokens);
			continue;
		}

		lexmode(p->lexclass);	/* switch to lexclass where tokclass is defined */
		p->tset = empty;

		/* instantiate all tokens/token_classes into the tset */
		for (e = (p->tlist)->next; e!=NULL; e=e->next)
		{
			char *tokstr;
			tokstr = (char *)e->elem;
			if ( *tokstr == '"' ) {
                q = (TermEntry *) hash_get(Texpr, tokstr);
    			require(q!=NULL, "ComputeTokSets: no token def");
    			set_orel(q->token, &p->tset);
			} else if (tokstr[0] == '.') {
                e1=e->next;
                e2=e1->next;
                e=e2;
                q1= (TermEntry *) hash_get(Tname, (char *)e1->elem);
    			require(q1!=NULL, "ComputeTokSets: no token def");
                q2= (TermEntry *) hash_get(Tname, (char *)e2->elem);
    			require(q2!=NULL, "ComputeTokSets: no token def");

                if (set_el(q1->token,imag_tokens)) {
errNoFL(eMsg2("can't define #tokclass %s using #tokclass or #errclass %s",
                        TokenString(p->tok),(char *)e1->elem) );
                }
                if (set_el(q2->token,imag_tokens)) {
errNoFL(eMsg2("can't define #tokclass %s using #tokclass or #errclass %s",
                        TokenString(p->tok),(char *)e2->elem) );
                }
                if (q1->token > q2->token) {
errNoFL(eMsg3("for #tokclass %s %s..%s - first token number > second token number",
                        TokenString(p->tok),(char *)e1->elem,(char *)e2->elem) );
                  for (i=q2->token; i<=q1->token; i++) { set_orel(i, &p->tset); }
                } else {
                  for (i=q1->token; i<=q2->token; i++) { set_orel(i, &p->tset); }
                }
            } else {
                q = (TermEntry *) hash_get(Tname, tokstr);
    			require(q!=NULL, "ComputeTokSets: no token def");
    			set_orel(q->token, &p->tset);
            }
		}
	}

	/* Go thru list of tokclasses again looking for tokclasses in sets */
again:
	something_changed = 0;
	for (t = tclasses->next; t!=NULL; t=t->next)
	{
		set tcl;
		p = (TCnode *) t->elem;
		tcl = set_and(p->tset, tokclasses);
		if ( !set_nil(tcl) )
		{
			int tk;
			/* replace refs to tokclasses with the associated set of tokens */
			something_changed = 1;
			while ( !set_nil(tcl) )
			{
				tk = set_int(tcl);		/* grab one of the tok class refs */
				set_rm(tk, tcl);
				if ( p->tok != tk )		/* tokclass ref to yourself? */
				{
					q = (TermEntry *) hash_get(Tname, TokenString(tk));
					require(q!=NULL, "#tokclass not in hash table");
					set_orin(&p->tset, q->tclass->tset);
				}
				set_rm(tk, p->tset);	/* remove ref that we replaced */
			}
		}
		set_free(tcl);
	}
	if ( something_changed ) goto again;
}

void
#ifdef __USE_PROTOS
DumpRemainingTokSets(void)
#else
DumpRemainingTokSets()
#endif
{
	TCnode *p;
	ListNode *t;

	/* Go thru tclasses (for the last time) and dump the sets not dumped
	 * during code gen; yes, this is a bogus way to do this, but ComputeTokSets()
	 * can't dump the defs as the error file and tok file has not been created
	 * yet etc...
	 */
	if ( tclasses==NULL ) return;
	for (t = tclasses->next; t!=NULL; t=t->next)
	{
		unsigned e;
		p = (TCnode *) t->elem;
		if ( p->dumped ) continue;
		e = DefErrSet(&(p->tset), 0, TokenString(p->tok));
		p->dumped = 1;
		p->setnum = e;
	}
}


/* replace a subset of an error set with an error class name if a subset is found
 * repeat process until no replacements made
 */
void
#ifdef __USE_PROTOS
SubstErrorClass( set *f )
#else
SubstErrorClass( f )
set *f;
#endif
{
	int max, done = 0;
	ListNode *p;
	ECnode *ec, *maxclass = NULL;
	set a;
	require(f!=NULL, "SubstErrorClass: NULL eset");

	if ( eclasses == NULL ) return;
	while ( !done )
	{
		max = 0;
		maxclass = NULL;
		for (p=eclasses->next; p!=NULL; p=p->next)	/* chk all error classes */
		{
			ec = (ECnode *) p->elem;
			if ( ec->setdeg > max )
			{
				if ( set_sub(ec->eset, *f) || set_equ(ec->eset, *f) )
					{maxclass = ec; max=ec->setdeg;}
			}
		}
		if ( maxclass != NULL )	/* if subset found, replace with token */
		{
			a = set_dif(*f, maxclass->eset);
			set_orel((unsigned)maxclass->tok, &a);
			set_free(*f);
			*f = a;
		}
		else done = 1;
	}
}

int
#ifdef __USE_PROTOS
DefErrSet1(int nilOK, set *f, int subst, char *name )
#else
DefErrSet1(nilOK, f, subst, name )
int nilOK;
set *f;
int subst;			/* should be substitute error classes? */
char *name;
#endif

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?