📄 art.c
字号:
/* * art.c * * ACCENT RUNTIME * * Copyright (C) 1984, 1999 Friedrich Wilhelm Schroeer * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//*============================================================================*/#include <stdlib.h>#define PRIVATE static#define PUBLIC/*============================================================================*//* OPTIONS *//*============================================================================*/#define TRACE 0#define PRINTTREE 0#define WALK 1#define CHECKVIABLE 1#define LOOKAHEAD 1#define DETECTAMBIGUITY 1#define DYNAMICCYCLECHECK 1#define HASHING 1#define BIGHASH 0#define STATISTICS 0#define DYNAMICITEMS 1/*============================================================================*//* ITEMS *//*============================================================================*/#if DYNAMICITEMSint ITEMLIMIT;#else#define ITEMLIMIT 285000#endif#define ITEMINCR 285000# if DYNAMICITEMSlong *dot, *back, *left, *sub;#elselong dot[ITEMLIMIT], back[ITEMLIMIT], left[ITEMLIMIT], sub[ITEMLIMIT];#endif/* * An "item" is a quadrupel < D, B, L, S > , where * * D is a rule currently being processed. * A working point ("dot", written as "*") * devides the right hand side into part already being processed (alpha) * and a part that still needs to be processed (beta) * M : alpha * beta * * B (back-pointer) is a reference to a list of items in which the * processing of rule rule * D = M : M : alpha * beta * startet. This list contains a start item * I' = < D', B', L', S' > * where * D' = M : * alpha beta * * L (left-pointer) * * S (sub-pointer) * * [ documentation to be completed in forthcoming release ....... ] * * If I is the index of an item < D, B, L, S > then * dot[I] = D * back[I] = B * left[I] = L * sub[I] = S */long thislist;/* * current itemlist (index of first item of list) */long last_item;/* * index of last item in item list * position last_item+1 is used for sentinel during searching * last_item+1 must not exceed ITEMLIMIT-1 (i.e. last_item < ITEMLIMIT-2) * */long specialitemadded;/* * an item with index i such that back[i]==thislist * has been added to the current item list * This requires a rescan of previous items during closure computation * (which otherwise is not neccessary) *//*============================================================================*//* GRAMMAR ENCODING *//*============================================================================*/extern int yygrammar[];/* * encoded grammar * defined in 'yygrammar.c' * * * structure of encoding * * a rule * [r] M0 : M1 ... Mn * with number r, lhs M0, and rhs M1 .. M1 * is encoded as follows * * +-- chain<-+ * | M1*-------> first rule for M1 * | ... | * | Mn* | * | -M0*---+ * | r * +-> next rule for M0 * * chain : index of start of next rule for M0 (or 0) * * M1* : encoding of rhs member Mi (1 <= i <= n) * ... Mi nonterminal: index of start of encoding of first rule for Mi * Mn* Mi terminal: see below * * -M0* : negative index of start of encoding of first rule for M0 * * r : rule number * * The grammar starts with the rule * [0] YYSTART : UserRoot EOF * which is encoded as follows: * * 1: 0 (chain) * 2: 6 (first rule of user root) * 3: EOF (eof token) * 4: -1 (points to start of rule) * 5: 0 (rule number) */extern int yyannotation[];/* defined in 'yygrammar.c' */extern int yycoordinate[];/* defined in 'yygrammar.c' */extern int c_length; /* * length of yygrammar * defined in 'yygrammar.c' */# define term_base 50000# define max_char 255# define eofsym term_base/* nonterminals are encoded as * 1 ... term_base-1 * the eof terminal is encoded as * termbase * single character terminals are encoded as * term_base+1 ... term_base+max_char * named terminals are encoded as * term_base+max_char+1 ... * * yylex() returns an original token code * that must be incremented by term_base *//*============================================================================*//* DIRECTOR SETS *//*============================================================================*/long sym;/* current input token */long lookaheadsym;/* next input token */extern long yypos;long yypos = 1;/* this variable must be set by the scanner */PRIVATE long lookaheadpos;#if BIGHASHextern long DHITS[];long BHITS[ITEMLIMIT];/* * search optimisation * * DHITS[d] == thislist: * there is an item with dot d in the current itemlist * (if not: no search neccessary) * * BHITS[b] == thislist: * there is an item with backpointer b in the current itemlist * (if not: no search neccessary) */#endif/*============================================================================*//* STATISTICS *//*============================================================================*/#if STATISTICSint itemlistcount;int notviable = 0;int search = 0;int nosearch = 0;int nosearch_b = 0;int nosearch_h = 0;/* no_search == 1: * no need to call SEARCH because no item with same hash code */int foundsame = 0;int foundnew = 0;int completerstep = 0;int truecompleterstep = 0;int calls_additem = 0;int itemsadded = 0;int rules_rej = 0;/* rules rejected by 'lookup_dirset' (predictor) */int tokens_rej = 0;/* tokens rejected by 'is_viable' (kernel/completer) */int nonterms_rej = 0;/* nonterms rejected by 'is_viable' (kernel/completer) */int GOOD = 0;/* number of items with dot preceeding a token *//* that contribute to the next kernel */int BAD = 0;/* number of items with dot preceeding a token *//* that don't contribute to the next kernel */#endif/*============================================================================*//* PRINT ROUTINES *//*============================================================================*/PRIVATE print_item(p) int p;/* * print the item with index p */{ int i, b, e, l, k; i = dot[p]; printf(" %d: ", p); if (dot[p] == 0 && sub[p] == 0) { printf("[ separator-item ]\n"); return; } if (i <= 5) { b = 1; } else { b = i-1; while(yygrammar[b] >= 0) b--; b += 2; } /* b points to the start of the rule */ e = i; while(yygrammar[e] >= 0) e++; /* e points to the end of the rule, i.e. the lhscode */ l = - yygrammar[e]; /* l is the lhs */ printf("%s :", yyprintname(l)); k = b+1; /* k points to the first member */ while(yygrammar[k] > 0) { if (k == i) printf(" *"); /* print member yygrammar[k] */ if (yygrammar[k] == 5000) { printf(" <EOF>"); } else if (yygrammar[k] > 5000) { if (yygrammar[k] < term_base+max_char+1) printf(" '%c'", yygrammar[k]-term_base); else printf(" %s", yyprintname(yygrammar[k])); } else { printf(" %s", yyprintname(yygrammar[k])); } k++; } if (yygrammar[i] <= 0) printf(" *"); printf(" (back:%d sub:%d left:%d)\n", back[p], sub[p], left[p]);}/*----------------------------------------------------------------------------*/PRIVATE print_coordinate(i) int i;/* * print source coordinate (of grammar file) with code i * the number encodes both, line and column information */{ int pos = yycoordinate[i]; int l = pos / 1000; int c = pos % 1000; printf("line %d, col %d of grammar", l, c);}/*----------------------------------------------------------------------------*/PRIVATE print_tree(i) int i;/* * print tree for item with index i */{ static int indent = 0; int k; /* rule number if item at end of rule */ if (yygrammar[dot[i]] < 0 ) { /* end of rule */ for (k = 1; k <= indent; k++) printf(" "); printf("%s alternative at ", yyprintname(-yygrammar[dot[i]])); print_coordinate(dot[i]+1); printf(" {\n"); indent++; } /* left brothers */ if (left[i]) print_tree(left[i]); /* this son */ if (left[i]) { int sym = yygrammar[dot[i]-1]; if (sym > term_base) { for (k = 1; k <= indent; k++) printf(" "); if (sym < term_base+max_char+1) printf("'%c'\n", yygrammar[dot[i]-1]-term_base); else printf("%s\n", yyprintname(sym)); } } /* subtree for this sun */ if (sub[i]) print_tree(sub[i]); if (yygrammar[dot[i]] < 0 ) { /* end of rule */ indent--; for (k = 1; k <= indent; k++) printf(" "); printf("}\n"); }}/*----------------------------------------------------------------------------*/#if STATISTICSPRIVATE print_statistics()/* * print values of statistics counters */{ printf("itemlistcount =%10d\n", itemlistcount); printf("notviable =%10d\n", notviable); printf("search =%10d\n", search); printf("nosearch =%10d\n", nosearch); printf("nosearch_b =%10d\n", nosearch_b); printf("nosearch_h =%10d\n", nosearch_h); printf("foundsame =%10d\n", foundsame); printf("foundnew =%10d\n", foundnew); printf("completerst =%10d\n", completerstep); printf("truecomplst =%10d\n", truecompleterstep); printf("calls_additem =%10d\n", calls_additem); printf("itemsadded =%10d\n", itemsadded); printf("rules_rej =%10d (predictor: lookup_dirset)\n", rules_rej); printf("tokens_rej =%10d (kernel/completer: is_viable)\n", tokens_rej); printf("nonterms_rej =%10d (kernel/completer: is_viable)\n", nonterms_rej); printf("GOOD =%10d\n", GOOD); printf("BAD =%10d\n", BAD);}#endif/*============================================================================*//* DIRECTOR SETS *//*============================================================================*/PRIVATE int lookup_dirset(ruleptr) long ruleptr;/* * Let 'rule' be the rule be the rule into which 'ruleptr' points. * Let 'tkn' be the code of the next token. * Return ('tkn' is in the director set of 'rule'). */{ int p; int rule; int tkn; /* find rule number */ p = ruleptr; while (yygrammar[p] >= 0) p++; p++; rule = yygrammar[p]; tkn = lookaheadsym-term_base; return yydirset(rule, tkn);}/*----------------------------------------------------------------------------*/PRIVATE int is_viable (d) int d;/* * d is a pointer into the encoded grammar * Returns true if the the symbol yygrammar[d] (token or nonterminal) * is a valid continuation of the parse: * can it start with the current lookahead token? * If the symbol is a token it is compared with the lookahead token * If symbol is a nonterminal it is checked whether the lookahead token * appears in the director sets of the rules for the nonterm of the symbol * * NOTE: the union of the director sets should be computed statically */{ if (yygrammar[d] >= term_base) { /* token */ if (yygrammar[d] == lookaheadsym) { return 1; } else {#if STATISTICS tokens_rej++;#endif return 0; } } else if (yygrammar[d] > 0 ) { /* nonterm */ int start; start = yygrammar[d]; /* start points to the first rule for the nonterm */ do { int p; int rule; p = start+1; while (yygrammar[p] >= 0) { p++; } /* p now points to negative lhs encoding */ rule = yygrammar[p+1]; if (yydirset(rule, lookaheadsym-term_base)) { return 1; } start = yygrammar[start]; } while (start);#if STATISTICS nonterms_rej++;#endif return 0; } else { /* end of rule */ return 1; }}/*============================================================================*//* ERROR MESSAGES *//*============================================================================*/PRIVATE syntaxerror()/* * Report syntax error and terminate */{ yyerror("syntax error"); exit(1);}/*----------------------------------------------------------------------------*/PRIVATE Abort(msg) char *msg;/* * emit msg and terminate */{ printf("%s\n", msg); exit(1);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -