📄 sor.g
字号:
/*
* SORCERER Version 1.00B
*
* Terence Parr
* U of MN, AHPCRC
* April 1995
*/
#header <<
/* 23-Sep-97 thm Accomodate user who needs to redefine ZZLEXBUFSIZE */
#ifndef ZZLEXBUFSIZE
#define ZZLEXBUFSIZE 8000
#endif
#include "pcctscfg.h" /* MR20 G. Hobbelt __USE_PROTOS #define */
#include "charbuf.h"
#include "hash.h"
#include "set.h"
#include "sor.h"
#define AST_FIELDS \
int token; char text[MaxAtom+1], label[MaxRuleName+1]; \
char *action; /* if action node, here is ptr to it */ \
char in,out; \
char init_action; /* set if Action and 1st action of alt */ \
int file; int line; /* set for BLOCK, ALT, nonterm nodes */ \
int upper_range; /* only if T1..T2 found */ \
GLA *start_state; /* ptr into GLA for this block */ \
int no_copy; /* copy input ptr to output ptr? */ \
ListNode *refvars; /* any ref vars defined for this rule */ \
unsigned char is_root; /* this token is a root #( A ... ) */
#define zzcr_ast(node, cur, _tok, _text) \
{(node)->token=_tok; strncpy((node)->text, _text,MaxAtom);}
#define USER_ZZSYN
#define zzAST_DOUBLE
extern int define_num;
>>
<<
/* MR20 G. Hobbelt Fix for Borland C++ 4.x & 5.x compiling with ALL warnings enabled */
#if defined(__TURBOC__)
#pragma warn -aus /* unused assignment of 'xxx' */
#endif
#include "sym.h"
#include "proto.h"
>>
#lexaction <<
#include "sym.h"
#include "proto.h"
int define_num = 0;
char *
#ifdef __USE_PROTOS
scarf_to_end_of_func_call(void)
#else
scarf_to_end_of_func_call()
#endif
{
static char func_call_str[MaxAtom+1];
char *p;
p = &func_call_str[0];
more:
if ( zzchar==')' ) { *p++ = zzchar; *p++ = '\0'; zzadvance(); return func_call_str; }
if ( zzchar=='"' )
{
*p++ = zzchar; zzadvance();
while ( zzchar!='"' )
{
if ( zzchar=='\\' ) { *p++ = zzchar; zzadvance(); }
*p++ = zzchar; zzadvance();
}
}
*p++ = zzchar; zzadvance();
goto more;
}
>>
<<
void /* MR9 23-Sep-97 Eliminate complaint about no return value */
#ifdef __USE_PROTOS
lisp( AST *tree, FILE *output )
#else
lisp( tree, output )
AST *tree;
FILE *output;
#endif
{
while ( tree != NULL )
{
if ( tree->down != NULL ) fprintf(output," (");
if ( tree->text[0]!='\0' ) {
fprintf(output, " \"");
if ( tree->label[0]!='\0' ) fprintf(output, "%s:", tree->label);
switch ( tree->token ) {
case OPT :
case POS_CLOSURE :
case CLOSURE :
case PRED_OP :
fprintf(output, "%s", tree->text);
break;
default :
fprintf(output, "%s[%s]", zztokens[tree->token], tree->text);
}
fprintf(output, "\"");
}
else {
fprintf(output, " %s", zztokens[tree->token]);
}
lisp(tree->down, output);
if ( tree->down != NULL ) fprintf(output," )");
tree = tree->right;
}
}
AST *
#ifdef __USE_PROTOS
zzmk_ast(AST *node, int token)
#else
zzmk_ast(node, token)
AST *node;
int token;
#endif
{
node->token = token;
return node;
}
AST *
#ifdef __USE_PROTOS
read_sor_desc(FILE *f)
#else
read_sor_desc(f)
FILE *f;
#endif
{
AST *root = NULL;
zzline = 1;
ANTLR(sordesc(&root), f);
if ( found_error ) return NULL;
if ( print_guts ) {
fprintf(stderr, "Internal Represenation of Tree Grammar:\n");
lisp(root, stderr);
fprintf(stderr, "\n");
}
last_valid_token = token_type;
end_of_input = token_type++;/* end of input token type is 1 + last real token */
epsilon = token_type++; /* epsilon token type is 2 + last real token */
wild_card = token_type++; /* wild_card_token is 3 + last real token */
token_association(end_of_input, "$");
token_association(epsilon, "[Ep]");
token_association(wild_card, ".");
zzdouble_link(root, NULL, NULL);
rules = root;
if ( root!=NULL ) build_GLA(root);
if ( print_guts ) {
fprintf(stderr, "Internal Represenation of Grammar Lookahead Automaton:\n");
dump_GLAs(root);
fprintf(stderr, "\n");
}
return root;
}
>>
#lexclass STRINGS
#token RExpr "\"" << zzmode(START); >>
#token "\n|\r|\r\n" << /* MR16a */
zzline++;
warn("eoln found in string");
zzskip();
>>
#token "\\~[]" << zzmore(); >>
#token "~[\n\r\"\\]+" << zzmore(); >> /* MR16a */
#lexclass ACTION_STRINGS
#token "\"" << zzmode(ACTIONS); zzmore(); >>
#token "\n|\r|\r\n" << /* MR16a */
zzline++;
warn("eoln found in string (in user action)");
zzskip();
>>
#token "\\~[]" << zzmore(); >>
#token "~[\n\r\"\\]+" << zzmore(); >> /* MR16a */
#lexclass ACTION_CHARS
#token "'" << zzmode(ACTIONS); zzmore(); >>
#token "\n|\r|\r\n" << /* MR16a */
zzline++;
warn("eoln found in char literal (in user action)");
zzskip();
>>
#token "\\~[]" << zzmore(); >>
#token "~[\n\r'\\]+" << zzmore(); >> /* MR16a */
#lexclass ACTION_COMMENTS
#token "\*/" << zzmode(ACTIONS); zzmore(); >>
#token "\*" << zzmore(); >>
#token "\n|\r|\r\n" << zzline++; zzmore(); >> /* MR16a */
#token "~[\n\r\*]+" << zzmore(); >> /* MR16a */
#lexclass ACTION_CPP_COMMENTS
#token "\n|\r|\r\n" << zzline++; zzmode(ACTIONS); zzmore(); >> /* MR16a */
#token "~[\n\r]+" << zzmore(); >> /* MR16a */
#lexclass CPP_COMMENTS
#token "\n|\r|\r\n" << zzline++; zzmode(START); zzskip(); >> /* MR16a */
#token "~[\n\r]+" << zzskip(); >> /* MR16a */
#lexclass COMMENTS
#token "\*/" << zzmode(START); zzskip(); >>
#token "\*" << zzskip(); >>
#token "\n|\r|\r\n" << zzline++; zzskip(); >> /* MR16a */
#token "~[\n\r\*]+" << zzskip(); >> /* MR16a */
#lexclass REFVAR_SCARF /* everything until a ')' */
#token "~[\)]+ \)" <<{
RefVarRec *rf;
zzskip();
zzbegexpr[strlen(zzbegexpr)-1] = '\0';
rf=refVarRec(zzbegexpr);
list_add(&AllRefVars, rf);
list_add(&RefVars, rf);
zzmode(ACTIONS); zzmore(); zzreplstr("");
}>>
/*
* This lexical class accepts actions of type [..] and <<..>>
*
* It translates the following special items:
*
* #[args] --> "ast_node(args)" add "classname::" for C++, however.
* #[] --> "ast_empty_node()"
* #( root, child1, ..., childn )
--> "ast_make(root, child1, ...., childn, NULL)"
* #() --> "NULL"
*
* Things for reference variables are also recognized:
* blah blah
*
* To escape,
*
* \] --> ]
* \) --> )
* \$ --> $
* \# --> #
*
* A stack is used to nest action terminators because they can be nested
* like crazy: << #[#[..],..] >>
*/
#lexclass ACTIONS
#token Action "\>\>" << /* these do not nest */
zzmode(START);
NLATEXT[0] = ' ';
NLATEXT[1] = ' ';
zzbegexpr[0] = ' ';
zzbegexpr[1] = ' ';
if ( zzbufovf ) {
found_error = 1;
err( eMsgd("action buffer overflow; size %d",ZZLEXBUFSIZE));
}
>>
#token PassAction "\]" << if ( topint() == ']' ) {
popint();
if ( istackempty() ) /* terminate action */
{
zzmode(START);
NLATEXT[0] = ' ';
zzbegexpr[0] = ' ';
if ( zzbufovf ) {
found_error = 1;
err( eMsgd("parameter buffer overflow; size %d",ZZLEXBUFSIZE));
}
}
else {
/* terminate #[..] */
zzreplstr(")");
zzmore();
}
}
else if ( topint() == '|' ) { /* end of simple [...] */
popint();
zzmore();
}
else zzmore();
>>
#token "\n|\r|\r\n" << zzline++; zzmore(); >> /* MR16a */
#token "\>" << zzmore(); >>
#token "#[_a-zA-Z][_a-zA-Z0-9]*"
<<
if ( !(strcmp(zzbegexpr, "#ifdef")==0 ||
strcmp(zzbegexpr, "#else")==0 ||
strcmp(zzbegexpr, "#endif")==0 ||
strcmp(zzbegexpr, "#ifndef")==0 ||
strcmp(zzbegexpr, "#if")==0 ||
strcmp(zzbegexpr, "#define")==0 ||
strcmp(zzbegexpr, "#pragma")==0 ||
strcmp(zzbegexpr, "#undef")==0 ||
strcmp(zzbegexpr, "#import")==0 ||
strcmp(zzbegexpr, "#line")==0 ||
strcmp(zzbegexpr, "#include")==0 ||
strcmp(zzbegexpr, "#error")==0) )
{
static char buf[100];
if ( !transform ) {
warn("#id used in nontransform mode; # ignored");
sprintf(buf, "%s", zzbegexpr+1);
}
else {
if ( CurRule==NULL )
{warn("#id used in action outside of rule; ignored");}
else if ( strcmp(zzbegexpr+1,CurRule)==0 )
strcpy(buf, "(*_result)");
}
zzreplstr(buf);
}
zzmore();
>>
#token "#\[\]" <<
if ( GenCPP ) zzreplstr("new SORAST");
else zzreplstr("ast_empty_node()");
zzmore();
>>
#token "#\(\)" << zzreplstr("NULL"); zzmore(); >>
#token "#\[" <<
pushint(']');
if ( GenCPP ) zzreplstr("new SORAST(");
else zzreplstr("ast_node(");
zzmore();
>>
#token "#\(" <<
pushint('}');
if ( GenCPP ) zzreplstr("PCCTS_AST::make(");
else zzreplstr("ast_make(");
zzmore();
>>
#token "#" << zzmore(); >>
#token "\)" <<
if ( istackempty() )
zzmore();
else if ( topint()==')' ) {
popint();
}
else if ( topint()=='}' ) {
popint();
/* terminate #(..) */
zzreplstr(", NULL)");
}
zzmore();
>>
#token "\[" <<
pushint('|'); /* look for '|' to terminate simple [...] */
zzmore();
>>
#token "\(" <<
pushint(')');
zzmore();
>>
#token "\\\]" << zzreplstr("]"); zzmore(); >>
#token "\\\)" << zzreplstr(")"); zzmore(); >>
#token "\\>" << zzreplstr(">"); zzmore(); >>
#token "'" << zzmode(ACTION_CHARS); zzmore();>>
#token "\"" << zzmode(ACTION_STRINGS); zzmore();>>
#token "\\#" << zzreplstr("#"); zzmore(); >>
/*#token "\\\\" << zzmore(); >> /* need this for some reason */
#token "\\~[\]\)>#]" << zzmore(); >> /* escaped char, always ignore */
#token "/" << zzmore(); >>
#token "/\*" << zzmode(ACTION_COMMENTS); zzmore(); >>
#token "\*/" << err("Missing /*; found dangling */ in action"); zzmore(); >>
#token "//" << zzmode(ACTION_CPP_COMMENTS); zzmore(); >>
#token "\@\(" <<zzmode(REFVAR_SCARF); zzmore(); zzreplstr("");>>
#token "\@" <<
zzmore(); if ( !GenCPP ) zzreplstr("_parser->");
>>
#token "[a-zA-Z_]+\(" <<
if ( (GenCPP && strcmp(zzbegexpr,"ast_scan(")==0) ||
(!GenCPP && strcmp(zzbegexpr,"ast_scan(")==0) ) {
char *args=scarf_to_end_of_func_call();
zzreplstr(cvt_token_str(zzbegexpr, args));
zzmore();
}
else { pushint(')'); zzmore(); }
>>
#token "[a-zA-Z_]+" << zzmore(); >>
#token "~[a-zA-Z_\n\r\)\(\\#\>\]\[\"'/\@]+" << zzmore(); >> /* MR16a */
#lexclass START
#token "[\t\ ]+" << zzskip(); >> /* Ignore White */
#token "\n|\r|\n\r" << zzline++; zzskip(); >> /* Track Line # */ /* MR16a */
#token "\[" << zzmode(ACTIONS); zzmore();
istackreset();
pushint(']'); >>
#token "\<\<" << action_file=CurFile; action_line=zzline;
zzmode(ACTIONS); zzmore();
istackreset();
pushint('>'); >>
#token "\"" << zzmode(STRINGS); zzmore(); >>
#token "/\*" << zzmode(COMMENTS); zzskip(); >>
#token "\*/" << err("Missing /*; found dangling */"); zzskip(); >>
#token "//" << zzmode(CPP_COMMENTS); zzskip(); >>
#token "\>\>" << err("Missing <<; found dangling \>\>"); zzskip(); >>
#token Eof "@"
<< /* L o o k F o r A n o t h e r F i l e */
{
FILE *new_input;
new_input = NextFile();
if ( new_input != NULL ) {
fclose( input );
input = new_input;
zzrdstream( input );
/*zzadvance(); ** Get 1st char of this file */
zzskip(); /* Skip the Eof (@) char i.e continue */
}
}
>>
#token Header "#header"
#token Tokdef "#tokdefs"
#token BLOCK /* used only as place-holder in intermediate tree */
#token ALT /* used only as place-holder in intermediate tree */
#token LABEL ":" /* used only as place-holder in intermediate tree */
#token OPT "\{" /* These are labeled so we can ref them in trees */
#token POS_CLOSURE "\+"
#token CLOSURE "\*"
#token WILD "."
#token PRED_OP "?"
#token BT "#\("
#token RULE
#token REFVAR
#errclass "atomic-element" { WILD NonTerm Token }
#errclass "rule-header" { PassAction LABEL "\<" "\>" }
/*
* Build trees for a sorcerer description
*/
sordesc
: <<int he=0,to=0;>>
( header <<he++;>>
| tokdef <<to++;>>
)*
<<
if ( he==0 && !Inline && !GenCPP ) warnNoFL("missing #header statement");
if ( he>1 ) warnNoFL("extra #header statement");
if ( to>1 ) warnNoFL("extra #tokdef statement");
>>
( Action!
<<list_add(&before_actions, actiondup(LATEXT(1)));>>
)*
{ class_def }
( Action!
<<
if ( CurClassName[0]!='\0' )
list_add(&class_actions, actiondup(LATEXT(1)));
else
list_add(&before_actions, actiondup(LATEXT(1)));
>>
)*
( rule )*
( Action!
<<
if ( CurClassName[0]!='\0' )
list_add(&class_actions, actiondup(LATEXT(1)));
else
list_add(&before_actions, actiondup(LATEXT(1)));
>>
)*
{ "\}"! // end of class def
<<
if ( CurClassName[0]=='\0' )
err("missing class definition for trailing '}'");
>>
}
( Action!
<<list_add(&after_actions, actiondup(LATEXT(1)));>>
)*
"@"!
;
<<found_error=1;>>
header: "#header"! Action! <<header_action = actiondup(LATEXT(1));>>
;
<<found_error=1;>>
tokdef: "#tokdefs"! RExpr!
<<{
AST *dumb = NULL;
zzantlr_state st; FILE *f; struct zzdlg_state dst;
strcpy(tokdefs_file, LATEXT(1));
strcpy(tokdefs_file, tokdefs_file+1); /* remove quotes */
tokdefs_file[strlen(tokdefs_file)-1] = '\0';
zzsave_antlr_state(&st);
zzsave_dlg_state(&dst);
define_num=0;
f = fopen(tokdefs_file, "r");
if ( f==NULL ) {found_error=1; err(eMsg1("cannot open token defs file '%s'", tokdefs_file));}
else {ANTLRm(enum_file(&dumb), f, PARSE_ENUM_FILE);}
zzrestore_antlr_state(&st);
zzrestore_dlg_state(&dst);
UserDefdTokens = 1;
}>>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -