misc.c
来自「SRI international 发布的OAA框架软件」· C语言 代码 · 共 1,865 行 · 第 1/4 页
C
1,865 行
/*
* misc.c
*
* Manage tokens, regular expressions.
* Print methods for debugging
* Compute follow lists onto tail ends of rules.
*
* The following functions are visible:
*
* int addTname(char *); Add token name
* int addTexpr(char *); Add token expression
* int Tnum(char *); Get number of expr/token
* void Tklink(char *, char *); Link a name with an expression
* int hasAction(expr); Does expr already have action assigned?
* void setHasAction(expr); Indicate that expr now has an action
* Entry *newEntry(char *,int); Create new table entry with certain size
* void list_add(ListNode **list, char *e)
* void list_free(ListNode **list, int freeData); *** MR10 ***
* void list_apply(ListNode *list, void (*f)())
* void lexclass(char *m); switch to new/old lexical class
* void lexmode(int i); switch to old lexical class i
*
* SOFTWARE RIGHTS
*
* We reserve no LEGAL rights to the Purdue Compiler Construction Tool
* Set (PCCTS) -- PCCTS is in the public domain. An individual or
* company may do whatever they wish with source code distributed with
* PCCTS or the code generated by PCCTS, including the incorporation of
* PCCTS, or its output, into commerical software.
*
* We encourage users to develop software with PCCTS. However, we do ask
* that credit is given to us for developing PCCTS. By "credit",
* we mean that if you incorporate our source code into one of your
* programs (commercial product, research project, or otherwise) that you
* acknowledge this fact somewhere in the documentation, research report,
* etc... If you like PCCTS and have developed a nice tool with the
* output, please mention that you developed it using PCCTS. In
* addition, we ask that this header remain intact in our source code.
* As long as these guidelines are kept, we expect to continue enhancing
* this system and expect to make other tools available as they are
* completed.
*
* ANTLR 1.33
* Terence Parr
* Parr Research Corporation
* with Purdue University and AHPCRC, University of Minnesota
* 1989-2001
*/
#include <stdio.h>
#include "pcctscfg.h"
#include "set.h"
#include "syn.h"
#include "hash.h"
#include "generic.h"
#include "dlgdef.h"
#include <ctype.h>
static int tsize=TSChunk; /* size of token str arrays */
static void
#ifdef __USE_PROTOS
RemapForcedTokensInSyntaxDiagram(Node *);
#else
RemapForcedTokensInSyntaxDiagram();
#endif
/* T o k e n M a n i p u l a t i o n */
/*
* add token 't' to the TokenStr/Expr array. Make more room if necessary.
* 't' is either an expression or a token name.
*
* There is only one TokenStr array, but multiple ExprStr's. Therefore,
* for each lex class (element of lclass) we must extend the ExprStr array.
* ExprStr's and TokenStr are always all the same size.
*
* Also, there is a Texpr hash table for each automaton.
*/
static void
#ifdef __USE_PROTOS
Ttrack( char *t )
#else
Ttrack( t )
char *t;
#endif
{
if ( TokenNum >= tsize ) /* terminal table overflow? */
{
char **p;
int i, more, j;
more = TSChunk * (1 + ((TokenNum-tsize) / TSChunk));
tsize += more;
TokenStr = (char **) realloc((char *)TokenStr, tsize*sizeof(char *));
require(TokenStr != NULL, "Ttrack: can't extend TokenStr");
for (i=0; i<NumLexClasses; i++)
{
lclass[i].exprs = (char **)
realloc((char *)lclass[i].exprs, tsize*sizeof(char *));
require(lclass[i].exprs != NULL, "Ttrack: can't extend ExprStr");
for (p= &lclass[i].exprs[tsize-more],j=1; j<=more; j++) *p++ = NULL;
}
for (p= &TokenStr[tsize-more],i=1; i<=more; i++) *p++ = NULL;
lexmode( CurrentLexClass ); /* reset ExprStr in case table moved */
}
/* note: we use the actual ExprStr/TokenStr array
* here as TokenInd doesn't exist yet
*/
if ( *t == '"' ) ExprStr[TokenNum] = t;
else TokenStr[TokenNum] = t;
}
static Expr *
#ifdef __USE_PROTOS
newExpr( char *e )
#else
newExpr( e )
char *e;
#endif
{
Expr *p = (Expr *) calloc(1, sizeof(Expr));
require(p!=NULL, "newExpr: cannot alloc Expr node");
p->expr = e;
p->lclass = CurrentLexClass;
return p;
}
/* switch to lexical class/mode m. This amounts to creating a new
* lex mode if one does not already exist and making ExprStr point
* to the correct char string array. We must also switch Texpr tables.
*
* BTW, we need multiple ExprStr arrays because more than one automaton
* may have the same label for a token, but with different expressions.
* We need to track an expr for each automaton. If we disallowed this
* feature, only one ExprStr would be required.
*/
void
#ifdef __USE_PROTOS
lexclass( char *m )
#else
lexclass( m )
char *m;
#endif
{
int i;
TermEntry *p;
static char EOFSTR[] = "\"@\"";
if ( hash_get(Tname, m) != NULL )
{
warn(eMsg1("lexclass name conflicts with token/errclass label '%s'",m));
}
/* does m already exist? */
i = LexClassIndex(m);
if ( i != -1 ) {lexmode(i); return;}
/* must make new one */
NumLexClasses++;
CurrentLexClass = NumLexClasses-1;
require(NumLexClasses<=MaxLexClasses, "number of allowable lexclasses exceeded\nIncrease MaxLexClasses in generic.h and recompile all C files");
lclass[CurrentLexClass].classnum = m;
lclass[CurrentLexClass].exprs = (char **) calloc(tsize, sizeof(char *));
require(lclass[CurrentLexClass].exprs!=NULL,
"lexclass: cannot allocate ExprStr");
lclass[CurrentLexClass].htable = newHashTable();
ExprStr = lclass[CurrentLexClass].exprs;
Texpr = lclass[CurrentLexClass].htable;
/* define EOF for each automaton */
p = newTermEntry( EOFSTR );
p->token = EofToken; /* couldn't have remapped tokens yet, use EofToken */
hash_add(Texpr, EOFSTR, (Entry *)p);
list_add(&ExprOrder, (void *)newExpr(EOFSTR));
/* note: we use the actual ExprStr array
* here as TokenInd doesn't exist yet
*/
ExprStr[EofToken] = EOFSTR;
}
void
#ifdef __USE_PROTOS
lexmode( int i )
#else
lexmode( i )
int i;
#endif
{
require(i<NumLexClasses, "lexmode: invalid mode");
ExprStr = lclass[i].exprs;
Texpr = lclass[i].htable;
CurrentLexClass = i;
}
/* return index into lclass array of lexical class. return -1 if nonexistent */
int
#ifdef __USE_PROTOS
LexClassIndex( char *cl )
#else
LexClassIndex( cl )
char *cl;
#endif
{
int i;
for (i=0; i<NumLexClasses; i++)
{
if ( strcmp(lclass[i].classnum, cl) == 0 ) return i;
}
return -1;
}
int
#ifdef __USE_PROTOS
hasAction( char *expr )
#else
hasAction( expr )
char *expr;
#endif
{
TermEntry *p;
require(expr!=NULL, "hasAction: invalid expr");
p = (TermEntry *) hash_get(Texpr, expr);
require(p!=NULL, eMsg1("hasAction: expr '%s' doesn't exist",expr));
return (p->action!=NULL);
}
void
#ifdef __USE_PROTOS
setHasAction( char *expr, char *action )
#else
setHasAction( expr, action )
char *expr;
char *action;
#endif
{
TermEntry *p;
require(expr!=NULL, "setHasAction: invalid expr");
p = (TermEntry *) hash_get(Texpr, expr);
require(p!=NULL, eMsg1("setHasAction: expr '%s' doesn't exist",expr));
p->action = action;
}
ForcedToken *
#ifdef __USE_PROTOS
newForcedToken(char *token, int tnum)
#else
newForcedToken(token, tnum)
char *token;
int tnum;
#endif
{
ForcedToken *ft = (ForcedToken *) calloc(1, sizeof(ForcedToken));
require(ft!=NULL, "out of memory");
ft->token = token;
ft->tnum = tnum;
return ft;
}
/*
* Make a token indirection array that remaps token numbers and then walk
* the appropriate symbol tables and SynDiag to change token numbers
*/
void
#ifdef __USE_PROTOS
RemapForcedTokens(void)
#else
RemapForcedTokens()
#endif
{
ListNode *p;
ForcedToken *q;
int max_token_number=0; /* MR9 23-Sep-97 Removed "unsigned" */
int i;
if ( ForcedTokens == NULL ) return;
/* find max token num */
for (p = ForcedTokens->next; p!=NULL; p=p->next)
{
q = (ForcedToken *) p->elem;
if ( q->tnum > max_token_number ) max_token_number = q->tnum;
}
fprintf(stderr, "max token number is %d\n", max_token_number);
/* make token indirection array */
TokenInd = (int *) calloc(max_token_number+1, sizeof(int));
LastTokenCounted = TokenNum;
TokenNum = max_token_number+1;
require(TokenInd!=NULL, "RemapForcedTokens: cannot allocate TokenInd");
/* fill token indirection array and change token id htable ; swap token indices */
for (i=1; i<TokenNum; i++) TokenInd[i] = i;
for (p = ForcedTokens->next; p!=NULL; p=p->next)
{
TermEntry *te;
int old_pos, t;
q = (ForcedToken *) p->elem;
fprintf(stderr, "%s forced to %d\n", q->token, q->tnum);
te = (TermEntry *) hash_get(Tname, q->token);
require(te!=NULL, "RemapForcedTokens: token not in hash table");
old_pos = te->token;
fprintf(stderr, "Before: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]);
fprintf(stderr, "Before: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]);
q = (ForcedToken *) p->elem;
t = TokenInd[old_pos];
TokenInd[old_pos] = q->tnum;
TokenInd[q->tnum] = t;
te->token = q->tnum; /* update token type id symbol table */
fprintf(stderr, "After: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]);
fprintf(stderr, "After: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]);
/* Change the token number in the sym tab entry for the exprs
* at the old position of the token id and the target position
*/
/* update expr at target (if any) of forced token id */
if ( q->tnum < TokenNum ) /* is it a valid position? */
{
for (i=0; i<NumLexClasses; i++)
{
if ( lclass[i].exprs[q->tnum]!=NULL )
{
/* update the symbol table for this expr */
TermEntry *e = (TermEntry *) hash_get(lclass[i].htable, lclass[i].exprs[q->tnum]);
require(e!=NULL, "RemapForcedTokens: expr not in hash table");
e->token = old_pos;
fprintf(stderr, "found expr '%s' at target %d in lclass[%d]; changed to %d\n",
lclass[i].exprs[q->tnum], q->tnum, i, old_pos);
}
}
}
/* update expr at old position (if any) of forced token id */
for (i=0; i<NumLexClasses; i++)
{
if ( lclass[i].exprs[old_pos]!=NULL )
{
/* update the symbol table for this expr */
TermEntry *e = (TermEntry *) hash_get(lclass[i].htable, lclass[i].exprs[old_pos]);
require(e!=NULL, "RemapForcedTokens: expr not in hash table");
e->token = q->tnum;
fprintf(stderr, "found expr '%s' for id %s in lclass[%d]; changed to %d\n",
lclass[i].exprs[old_pos], q->token, i, q->tnum);
}
}
}
/* Update SynDiag */
RemapForcedTokensInSyntaxDiagram((Node *)SynDiag);
}
static void
#ifdef __USE_PROTOS
RemapForcedTokensInSyntaxDiagram(Node *p)
#else
RemapForcedTokensInSyntaxDiagram(p)
Node *p;
#endif
{
Junction *j = (Junction *) p;
RuleRefNode *r = (RuleRefNode *) p;
TokNode *t = (TokNode *)p;
if ( p==NULL ) return;
require(p->ntype>=1 && p->ntype<=NumNodeTypes, "Remap...: invalid diagram node");
switch ( p->ntype )
{
case nJunction :
if ( j->visited ) return;
if ( j->jtype == EndRule ) return;
j->visited = TRUE;
RemapForcedTokensInSyntaxDiagram( j->p1 );
RemapForcedTokensInSyntaxDiagram( j->p2 );
j->visited = FALSE;
return;
case nRuleRef :
RemapForcedTokensInSyntaxDiagram( r->next );
return;
case nToken :
if ( t->remapped ) return; /* we've been here before */
t->remapped = 1;
fprintf(stderr, "remapping %d to %d\n", t->token, TokenInd[t->token]);
t->token = TokenInd[t->token];
RemapForcedTokensInSyntaxDiagram( t->next );
return;
case nAction :
RemapForcedTokensInSyntaxDiagram( ((ActionNode *)p)->next );
return;
default :
fatal_internal("invalid node type");
}
}
/*
* Add a token name. Return the token number associated with it. If it already
* exists, then return the token number assigned to it.
*
* Track the order in which tokens are found so that the DLG output maintains
* that order. It also lets us map token numbers to strings.
*/
int
#ifdef __USE_PROTOS
addTname( char *token )
#else
addTname( token )
char *token;
#endif
{
TermEntry *p;
require(token!=NULL, "addTname: invalid token name");
if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token;
p = newTermEntry( token );
Ttrack( p->str );
p->token = TokenNum++;
hash_add(Tname, token, (Entry *)p);
return p->token;
}
/* This is the same as addTname except we force the TokenNum to be tnum.
* We don't have to use the Forced token stuff as no tokens will have
* been defined with #tokens when this is called. This is only called
* when a #tokdefs meta-op is used.
*/
int
#ifdef __USE_PROTOS
addForcedTname( char *token, int tnum )
#else
addForcedTname( token, tnum )
char *token;
int tnum;
#endif
{
TermEntry *p;
require(token!=NULL, "addTname: invalid token name");
if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token;
p = newTermEntry( token );
Ttrack( p->str );
p->token = tnum;
hash_add(Tname, token, (Entry *)p);
return p->token;
}
/*
* Add a token expr. Return the token number associated with it. If it already
* exists, then return the token number assigned to it.
*/
int
#ifdef __USE_PROTOS
addTexpr( char *expr )
#else
addTexpr( expr )
char *expr;
#endif
{
TermEntry *p;
require(expr!=NULL, "addTexpr: invalid regular expression");
if ( (p=(TermEntry *)hash_get(Texpr, expr)) != NULL ) return p->token;
p = newTermEntry( expr );
Ttrack( p->str );
/* track the order in which they occur */
list_add(&ExprOrder, (void *)newExpr(p->str));
p->token = TokenNum++;
hash_add(Texpr, expr, (Entry *)p);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?