📄 look.c
字号:
/*
* look.c
*
* Compute LL(1) lookahead sets for SORCERER code-generator
*
* SOFTWARE RIGHTS
*
* We reserve no LEGAL rights to SORCERER -- SORCERER is in the public
* domain. An individual or company may do whatever they wish with
* source code distributed with SORCERER or the code generated by
* SORCERER, including the incorporation of SORCERER, or its output, into
* commerical software.
*
* We encourage users to develop software with SORCERER. However, we do
* ask that credit is given to us for developing SORCERER. By "credit",
* we mean that if you incorporate our source code into one of your
* programs (commercial product, research project, or otherwise) that you
* acknowledge this fact somewhere in the documentation, research report,
* etc... If you like SORCERER and have developed a nice tool with the
* output, please mention that you developed it using SORCERER. In
* addition, we ask that this header remain intact in our source code.
* As long as these guidelines are kept, we expect to continue enhancing
* this system and expect to make other tools available as they are
* completed.
*
* SORCERER 1.00B
* Terence Parr
* AHPCRC, University of Minnesota
* 1992-2001
*/
#include "stdpccts.h"
#include "sym.h"
#include "proto.h"
static GLA *end_node;
static GLA *
#ifdef __USE_PROTOS
newGLANode(void);
#else
newGLANode();
#endif
set
#ifdef __USE_PROTOS
Lookahead( GLA *p )
#else
Lookahead( p )
GLA *p;
#endif
{
set r, rv;
if ( p == NULL ) return empty;
r = rv = empty;
/* have we been here before for this k? Detect cycles */
if ( p->visited )
{
return empty;
}
p->visited = 1;
/* Does edge 1 have a token label? */
if ( p->label1 <= last_valid_token )
{
rv = set_of(p->label1);
if ( p->upper_range!=0 )
{
int i;
/* put all elements of range into lookahead set */
for (i=p->label1+1; i<=p->upper_range; i++)
{
set_orel(i, &rv);
}
}
}
else if ( p->label1 == wild_card || p->label1 == end_of_input )
{
rv = set_of(p->label1);
}
else /* must be an epsilon transfer */
{
int trouble = 0;
if ( p->p1!=NULL )
{
/* check for infinite-recursion */
/* o->o->o->o-A->o.. Rule def (p->p1 points to far left node)
* | for 'a : A | B;'
* o->o-B->o..
*
* the visited flag is set on the start of the alts.
*/
if ( p->is_rule_ref && p->p1->is_rule_entry )
{
require(p->p1->p1!=NULL, "Lookahead: invalid GLA");
require(p->p1->p1->p1!=NULL, "Lookahead: invalid GLA");
if ( p->p1->p1->p1->visited ) {
errNoFL(eMsg2("infinite recursion from rule %s to rule %s",
p->in_rule,p->p1->in_rule));
rv = set_of(wild_card);
trouble = 1;
}
}
if ( !trouble )
{
r = Lookahead(p->p1);
rv = set_or(r, rv);
set_free(r);
}
}
}
/* handle edge 2 */
r = Lookahead(p->p2); /* tokens can't be on e2 edges */
set_orin(&rv, r);
set_free(r);
/* this node is no longer visited */
p->visited = 0;
return rv;
}
/*
* Build a big, interwined, lookahead GLA from the lookahead GLA created
* for each rule. From this, lookahead computation is trivial.
*
* [1] Build the GLA start states for each nonterminal. Each nonterminal
* looks like "o-->o" where the 2nd 'o' is the start of the GLA list
* of alts for the grammar block. Alternatives are connected like this:
*
* o-->o-->alt1 -->o
* | ^
* o-->alt2 ---|
* ... |
* | |
* o-->altn ---|
*
* The BLOCK building function actually makes the ALT list. This
* function only builds the first 'o' node (upper left).
*
* [2] Make GLA for block of each rule.
*
* [3] Make a set of links to the nodes following all references to
* this rule.
*/
void
#ifdef __USE_PROTOS
build_GLA( AST *rules )
#else
build_GLA( rules )
AST *rules;
#endif
{
SymEntry *s;
AST *r;
GLA *end_of_rule, *blk_start;
require(rules!=NULL, "build_GLA: NULL rule list");
for (r=rules; r!=NULL; r=r->right)
{
s = (SymEntry *) hash_get(symbols, r->text);
require(s!=NULL, "build_GLA: sym tab broken");
CurRule = s->str;
s->start_state = newGLANode();
s->start_state->is_rule_entry = 1;
}
end_node = newGLANode();
for (r=rules; r!=NULL; r=r->right)
{
s = (SymEntry *) hash_get(symbols, r->text);
require(s!=NULL, "build_GLA: sym tab broken");
CurRule = s->str;
blk_start = newGLANode();
blk_start->p1 = build_GLA_for_block(r->down, &end_of_rule);
blk_start->label1 = epsilon;
s->start_state->p1 = blk_start;
s->start_state->label1 = epsilon;
s->end_rule = end_of_rule;
/* make this BLOCK correspond to the GLA for this rule */
r->down->start_state = s->start_state->p1;
}
for (r=rules; r!=NULL; r=r->right)
{
s = (SymEntry *) hash_get(symbols, r->text);
require(s!=NULL, "build_GLA: sym tab broken");
build_follow_links(s->end_rule, s->refs);
}
}
/*
* Tree looks like ( BLOCK ( ALT alpha ) ... ( AST beta ) ).
*
* For each ALT of BLOCK, build a new alternative into the GLA.
*
* Build a block that looks like:
*
* o-->alt1 -->o
* | ^
* o-->alt2 ---|
* ... |
* | |
* o-->altn ---|
*/
GLA *
#ifdef __USE_PROTOS
build_GLA_for_block( AST *block, GLA **tail )
#else
build_GLA_for_block( block, tail )
AST *block;
GLA **tail;
#endif
{
GLA *p, *alt_tail, *blk_tail, *prev=NULL, *first=NULL;
AST *alt;
require(block!=NULL, "build_GLA_for_block: NULL tree pointer");
blk_tail = newGLANode();
for (alt=block->down; alt!=NULL; alt=alt->right)
{
p = newGLANode();
p->p1 = build_GLA_for_ALT(alt, &alt_tail);
p->label1 = epsilon;
/* connect new alt into downward link */
if ( prev!=NULL )
{
prev->p2 = p;
prev->label2 = epsilon;
}
else
{
first = p;
}
prev = p;
/* link alt to block tail node */
alt_tail->p1 = blk_tail;
alt_tail->label1 = epsilon;
}
*tail = blk_tail;
return first;
}
/*
* Tree looks like ( ALT elem1 ... elemn ). Generate a GLA for each
* element with build_GLA_for_element; make a tail node and return
* pointer to GLA element built for first element.
*
* Tree patterns such as #(A b C) must be included because they may
* invoke other rules or have subrules in them for which lookahead
* must be computed. The '#(' and ')' are basically ignored and #(A b C)
* turns into 'A b C' for GLA construction purposes. Note that this would
* NOT work for LL(k>1).
*/
GLA *
#ifdef __USE_PROTOS
build_GLA_for_ALT( AST *alt, GLA **alt_tail )
#else
build_GLA_for_ALT( alt, alt_tail )
AST *alt;
GLA **alt_tail;
#endif
{
GLA *elem_tail, *first;
require(alt!=NULL, "build_GLA_for_alt: NULL tree pointer");
first = build_GLA_for_tree(alt->down, &elem_tail);
*alt_tail = elem_tail;
return first;
}
GLA *
#ifdef __USE_PROTOS
build_GLA_for_tree( AST *q, GLA **tree_tail )
#else
build_GLA_for_tree( q, tree_tail )
AST *q;
GLA **tree_tail;
#endif
{
AST *t;
GLA *elem_tail, *alt_tail=NULL, *start=NULL, *elem;
for (t=q; t!=NULL; t = t->right)
{
elem = build_GLA_for_element(t, &elem_tail);
if ( elem!=NULL )
{
if ( start==NULL ) { start = elem; alt_tail = elem_tail; }
else {
alt_tail->p1 = elem;
alt_tail->label1 = epsilon;
alt_tail = elem_tail;
}
}
if ( t->down != NULL && (t->token == Token||t->token == WILD) )
{
elem = build_GLA_for_tree(t->down, &elem_tail);
if ( elem==NULL ) continue;
alt_tail->p1 = elem;
alt_tail->label1 = epsilon;
/* put an end-of-input node at the end of each sibling list so
* that lookahead computation don't take '#(A b) D' to be same
* as 'A b D'; i.e., 'D' CANNOT follow a reference to a 'b' in
* this context as the 'b' is at a lower level. Only a NULL ptr
* can follow a ref to 'b'. For example,
*
* a : #(A b) D;
* b : E
* |
* ;
*
* The lookahead for 'b' is {E} for alt1 and {end_of_input} of
* alt2 because for the second alt to match, the input subtree
* must be NULL.
*/
elem_tail->p1 = newGLANode();
elem_tail->label1 = end_of_input;
elem_tail = elem_tail->p1;
alt_tail = elem_tail;
}
}
if ( start==NULL )
{
start = newGLANode();
*tree_tail = start;
}
else *tree_tail = alt_tail;
return start;
}
GLA *
#ifdef __USE_PROTOS
build_GLA_for_element( AST *elem, GLA **elem_tail )
#else
build_GLA_for_element( elem, elem_tail )
AST *elem;
GLA **elem_tail;
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -