📄 lemon.c
字号:
/*** Copyright (c) 1991, 1994, 1997, 1998 D. Richard Hipp**** This file contains all sources (including headers) to the LEMON** LALR(1) parser generator. The sources have been combined into a** single file to make it easy to include LEMON as part of another** program.**** This program is free software; you can redistribute it and/or** modify it under the terms of the GNU General Public** License as published by the Free Software Foundation; either** version 2 of the License, or (at your option) any later version.**** This program is distributed in the hope that it will be useful,** but WITHOUT ANY WARRANTY; without even the implied warranty of** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU** General Public License for more details.** ** You should have received a copy of the GNU General Public** License along with this library; if not, write to the** Free Software Foundation, Inc., 59 Temple Place - Suite 330,** Boston, MA 02111-1307, USA.**** Author contact information:** drh@acm.org** http://www.hwaci.com/drh/**** $Id: lemon.c,v 1.12 2002/01/30 22:55:15 guy Exp $*/#include <stdio.h>#include <stdarg.h>#include <stdlib.h>#include <string.h>#include <ctype.h>/* * Wrapper around "isupper()", "islower()", etc. to cast the argument to * "unsigned char", so that they at least handle non-ASCII 8-bit characters * (and don't provoke a pile of warnings from GCC). */#define safe_isupper(c) isupper((unsigned char)(c))#define safe_islower(c) islower((unsigned char)(c))#define safe_isalpha(c) isalpha((unsigned char)(c))#define safe_isalnum(c) isalnum((unsigned char)(c))#define safe_isspace(c) isspace((unsigned char)(c))extern int access(const char *, int);#ifndef __WIN32__# if defined(_WIN32) || defined(WIN32)# define __WIN32__# endif#endif/* #define PRIVATE static */#define PRIVATE#ifdef TEST#define MAXRHS 5 /* Set low to exercise exception code */#else#define MAXRHS 1000#endifchar *msort(char *, char **, int (*)(const void *, const void *));/********** From the file "struct.h" *************************************//*** Principal data structures for the LEMON parser generator.*/typedef enum {BOOL_FALSE=0, BOOL_TRUE} Boolean;/* Symbols (terminals and nonterminals) of the grammar are stored** in the following: */struct symbol { char *name; /* Name of the symbol */ int index; /* Index number for this symbol */ enum { TERMINAL, NONTERMINAL } type; /* Symbols are all either TERMINALS or NTs */ struct rule *rule; /* Linked list of rules of this (if an NT) */ int prec; /* Precedence if defined (-1 otherwise) */ enum e_assoc { LEFT, RIGHT, NONE, UNK } assoc; /* Associativity if predecence is defined */ char *firstset; /* First-set for all rules of this symbol */ Boolean lambda; /* True if NT and can generate an empty string */ char *destructor; /* Code which executes whenever this symbol is ** popped from the stack during error processing */ int destructorln; /* Line number of destructor code */ char *datatype; /* The data type of information held by this ** object. Only used if type==NONTERMINAL */ int dtnum; /* The data type number. In the parser, the value ** stack is a union. The .yy%d element of this ** union is the correct data type for this object */};/* Each production rule in the grammar is stored in the following** structure. */struct rule { struct symbol *lhs; /* Left-hand side of the rule */ char *lhsalias; /* Alias for the LHS (NULL if none) */ int ruleline; /* Line number for the rule */ int nrhs; /* Number of RHS symbols */ struct symbol **rhs; /* The RHS symbols */ char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ int line; /* Line number at which code begins */ char *code; /* The code executed when this rule is reduced */ struct symbol *precsym; /* Precedence symbol for this rule */ int index; /* An index number for this rule */ Boolean canReduce; /* True if this rule is ever reduced */ struct rule *nextlhs; /* Next rule with the same LHS */ struct rule *next; /* Next rule in the global list */};/* A configuration is a production rule of the grammar together with** a mark (dot) showing how much of that rule has been processed so far.** Configurations also contain a follow-set which is a list of terminal** symbols which are allowed to immediately follow the end of the rule.** Every configuration is recorded as an instance of the following: */struct config { struct rule *rp; /* The rule upon which the configuration is based */ int dot; /* The parse point */ char *fws; /* Follow-set for this configuration only */ struct plink *fplp; /* Follow-set forward propagation links */ struct plink *bplp; /* Follow-set backwards propagation links */ struct state *stp; /* Pointer to state which contains this */ enum { COMPLETE, /* The status is used during followset and */ INCOMPLETE /* shift computations */ } status; struct config *next; /* Next configuration in the state */ struct config *bp; /* The next basis configuration */};/* Every shift or reduce operation is stored as one of the following */struct action { struct symbol *sp; /* The look-ahead symbol */ enum e_action { SHIFT, ACCEPT, REDUCE, ERROR, CONFLICT, /* Was a reduce, but part of a conflict */ SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ NOT_USED /* Deleted by compression */ } type; union { struct state *stp; /* The new state, if a shift */ struct rule *rp; /* The rule, if a reduce */ } x; struct action *next; /* Next action for this state */ struct action *collide; /* Next action with the same hash */};/* Each state of the generated parser's finite state machine** is encoded as an instance of the following structure. */struct state { struct config *bp; /* The basis configurations for this state */ struct config *cfp; /* All configurations in this set */ int index; /* Sequencial number for this state */ struct action *ap; /* Array of actions for this state */ unsigned int naction; /* Number of actions for this state */ int tabstart; /* First index of the action table */ int tabdfltact; /* Default action */};/* A followset propagation link indicates that the contents of one** configuration followset should be propagated to another whenever** the first changes. */struct plink { struct config *cfp; /* The configuration to which linked */ struct plink *next; /* The next propagate link */};/* The state vector for the entire parser generator is recorded as** follows. (LEMON uses no global variables and makes little use of** static variables. Fields in the following structure can be thought** of as begin global variables in the program.) */struct lemon { struct state **sorted; /* Table of states sorted by state number */ struct rule *rule; /* List of all rules */ int nstate; /* Number of states */ int nrule; /* Number of rules */ int nsymbol; /* Number of terminal and nonterminal symbols */ int nterminal; /* Number of terminal symbols */ struct symbol **symbols; /* Sorted array of pointers to symbols */ int errorcnt; /* Number of errors */ struct symbol *errsym; /* The error symbol */ char *name; /* Name of the generated parser */ char *arg; /* Declaration of the 3th argument to parser */ char *tokentype; /* Type of terminal symbols in the parser stack */ char *start; /* Name of the start symbol for the grammar */ char *stacksize; /* Size of the parser stack */ char *include; /* Code to put at the start of the C file */ int includeln; /* Line number for start of include code */ char *error; /* Code to execute when an error is seen */ int errorln; /* Line number for start of error code */ char *overflow; /* Code to execute on a stack overflow */ int overflowln; /* Line number for start of overflow code */ char *failure; /* Code to execute on parser failure */ int failureln; /* Line number for start of failure code */ char *accept; /* Code to execute when the parser excepts */ int acceptln; /* Line number for the start of accept code */ char *extracode; /* Code appended to the generated file */ int extracodeln; /* Line number for the start of the extra code */ char *tokendest; /* Code to execute to destroy token data */ int tokendestln; /* Line number for token destroyer code */ char *filename; /* Name of the input file */ char *basename; /* Basename of inputer file (no directory or path */ char *outname; /* Name of the current output file */ char *outdirname; /* Name of the output directory, specified by user */ char *templatename; /* Name of template file to use, specified by user */ char *tokenprefix; /* A prefix added to token names in the .h file */ int nconflict; /* Number of parsing conflicts */ int tablesize; /* Size of the parse tables */ int basisflag; /* Print only basis configurations */ char *argv0; /* Name of the program */};#define MemoryCheck(X) if((X)==0){ \ extern void memory_error(void); \ memory_error(); \}/******** From the file "action.h" *************************************/struct action *Action_new(void);struct action *Action_sort(struct action *);void Action_add(struct action **, enum e_action, struct symbol *, void *);/********* From the file "assert.h" ************************************/void myassert(char *, int);#ifndef NDEBUG# define assert(X) if(!(X))myassert(__FILE__,__LINE__)#else# define assert(X)#endif/********** From the file "build.h" ************************************/void FindRulePrecedences(struct lemon *);void FindFirstSets(struct lemon *);void FindStates(struct lemon *);void FindLinks(struct lemon *);void FindFollowSets(struct lemon *);void FindActions(struct lemon *);/********* From the file "configlist.h" *********************************/void Configlist_init(void);struct config *Configlist_add(struct rule *, int);struct config *Configlist_addbasis(struct rule *, int);void Configlist_closure(struct lemon *);void Configlist_sort(void);void Configlist_sortbasis(void);struct config *Configlist_return(void);struct config *Configlist_basis(void);void Configlist_eat(struct config *);void Configlist_reset(void);/********* From the file "error.h" ***************************************/#if __GNUC__ >= 2void ErrorMsg( char *, int, char *, ... ) __attribute__((format (printf, 3, 4)));#elsevoid ErrorMsg( char *, int, char *, ... );#endif/****** From the file "option.h" ******************************************/struct s_options { enum { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR} type; char *label; char *arg; char *message;};int optinit(char**,struct s_options*,FILE*);int optnargs(void);char *get_optarg(int);void get_opterr(int);void optprint(void);/******** From the file "parse.h" *****************************************/void Parse(struct lemon *lemp);/********* From the file "plink.h" ***************************************/struct plink *Plink_new(void);void Plink_add(struct plink **, struct config *);void Plink_copy(struct plink **, struct plink *);void Plink_delete(struct plink *);/********** From the file "report.h" *************************************/void Reprint(struct lemon *);void ReportOutput(struct lemon *);void ReportTable(struct lemon *, int);void ReportHeader(struct lemon *);void CompressTables(struct lemon *);/********** From the file "set.h" ****************************************/void SetSize(int N); /* All sets will be of size N */char *SetNew(void); /* A new set for element 0..N */void SetFree(char*); /* Deallocate a set */int SetAdd(char*,int); /* Add element to a set */int SetUnion(char *A,char *B); /* A <- A U B, thru element N */#define SetFind(X,Y) (X[Y]) /* True if Y is in set X *//**************** From the file "table.h" *********************************//*** All code in this file has been automatically generated** from a specification in the file** "table.q"** by the associative array code building program "aagen".** Do not edit this file! Instead, edit the specification** file, then rerun aagen.*//*** Code for processing tables in the LEMON parser generator.*//* Routines for handling a strings */char *Strsafe(char *);void Strsafe_init(void);int Strsafe_insert(char *);char *Strsafe_find(char *);/* Routines for handling symbols of the grammar */struct symbol *Symbol_new(char *x);int Symbolcmpp(const void *, const void *);void Symbol_init(void);int Symbol_insert(struct symbol *, char *);struct symbol *Symbol_find(char *);struct symbol *Symbol_Nth(int);int Symbol_count(void);struct symbol **Symbol_arrayof(void);/* Routines to manage the state table */int Configcmp(const void *, const void *);struct state *State_new(void);void State_init(void);int State_insert(struct state *, struct config *);struct state *State_find(struct config *);struct state **State_arrayof(void);/* Routines used for efficiency in Configlist_add */void Configtable_init(void);int Configtable_insert(struct config *);struct config *Configtable_find(struct config *);void Configtable_clear(int(*)(struct config *));/****************** From the file "action.c" *******************************//*** Routines processing parser actions in the LEMON parser generator.*//* Allocate a new parser action */struct action *Action_new(void){ static struct action *freelist = 0; struct action *new; if( freelist==0 ){ int i; int amt = 100; freelist = (struct action *)malloc( sizeof(struct action)*amt ); if( freelist==0 ){ fprintf(stderr,"Unable to allocate memory for a new parser action."); exit(1); } for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1]; freelist[amt-1].next = 0; } new = freelist; freelist = freelist->next; return new;}/* Compare two actions */static int actioncmp(const void *ap1_arg, const void *ap2_arg){ const struct action *ap1 = ap1_arg, *ap2 = ap2_arg; int rc; rc = ap1->sp->index - ap2->sp->index; if( rc==0 ) rc = (int)ap1->type - (int)ap2->type; if( rc==0 ){ assert( ap1->type==REDUCE && ap2->type==REDUCE ); rc = ap1->x.rp->index - ap2->x.rp->index; } return rc;}/* Sort parser actions */struct action *Action_sort(struct action *ap){ ap = (struct action *)msort((char *)ap,(char **)&ap->next,actioncmp); return ap;}void Action_add(struct action **app, enum e_action type, struct symbol *sp, void *arg){ struct action *new; new = Action_new(); new->next = *app; *app = new; new->type = type; new->sp = sp; if( type==SHIFT ){ new->x.stp = (struct state *)arg; }else{ new->x.rp = (struct rule *)arg;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -