📄 nfa.c
字号:
/*@A (C) 1992 Allen I. Holub */
/* NFA.C---Make an NFA from a LeX input file using Thompson's construction */
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <tools/debug.h>
#include <tools/set.h>
#include <tools/hash.h>
#include <tools/compiler.h>
#include <tools/stack.h>
#include "nfa.h" /* defines for NFA, EPSILON, CCL */
#include "dfa.h" /* prototype for lerror(). */
#include "globals.h" /* externs for Verbose, etc. */
#ifdef DEBUG
int Lev = 0;
# define ENTER(f) printf("%*senter %s [%c][%1.10s] \n", \
Lev++ * 4, "", f, Lexeme, Input)
# define LEAVE(f) printf("%*sleave %s [%c][%1.10s]\n", \
--Lev * 4, "", f, Lexeme, Input)
#else
# define ENTER(f)
# define LEAVE(f)
#endif
/*----------------------------------------------------------------
* Error processing stuff. Note that all errors are fatal.
*----------------------------------------------------------------
*/
typedef enum ERR_NUM
{
E_MEM, /* Out of memory */
E_BADEXPR, /* Malformed regular expression */
E_PAREN, /* Missing close parenthesis */
E_STACK, /* Internal error: Discard stack full */
E_LENGTH, /* Too many regular expressions */
E_BRACKET, /* Missing [ in character class */
E_BOL, /* ^ must be at start of expr or ccl */
E_CLOSE, /* + ? or * must follow expression */
E_STRINGS, /* Too many characters in accept actions */
E_NEWLINE, /* Newline in quoted string */
E_BADMAC, /* Missing } in macro expansion */
E_NOMAC, /* Macro doesn't exist */
E_MACDEPTH /* Macro expansions nested too deeply. */
} ERR_NUM;
PRIVATE char *Errmsgs[] = /* Indexed by ERR_NUM */
{
"Not enough memory for NFA",
"Malformed regular expression",
"Missing close parenthesis",
"Internal error: Discard stack full",
"Too many regular expressions or expression too long",
"Missing [ in character class",
"^ must be at start of expression or after [",
"+ ? or * must follow an expression or subexpression",
"Too many characters in accept actions",
"Newline in quoted string, use \\n to get newline into expression",
"Missing } in macro expansion",
"Macro doesn't exist",
"Macro expansions nested too deeply"
};
typedef enum WARN_NUM
{
W_STARTDASH, /* Dash at start of character class */
W_ENDDASH /* Dash at end of character class */
} WARN_NUM;
PRIVATE char *Warnmsgs[] = /* Indexed by WARN_NUM */
{
"Treating dash in [-...] as a literal dash",
"Treating dash in [...-] as a literal dash"
};
PRIVATE NFA *Nfa_states ; /* State-machine array */
PRIVATE int Nstates = 0 ; /* # of NFA states in machine */
PRIVATE int Next_alloc; /* Index of next element of the array */
#define SSIZE 32
PRIVATE NFA *Sstack[ SSIZE ]; /* Stack used by new() */
PRIVATE NFA **Sp = &Sstack[ -1 ]; /* Stack pointer */
#define STACK_OK() ( INBOUNDS(Sstack, Sp) ) /* true if stack not */
/* full or empty */
#define STACK_USED() ( (int)(Sp-Sstack) + 1 ) /* slots used */
#define CLEAR_STACK() ( Sp = Sstack - 1 ) /* reset the stack */
#define PUSH(x) ( *++Sp = (x) ) /* put x on stack */
#define POP() ( *Sp-- ) /* get x from stack */
/*--------------------------------------------------------------
* MACRO support:
*/
#define MAC_NAME_MAX 34 /* Maximum name length */
#define MAC_TEXT_MAX 80 /* Maximum amount of expansion text */
typedef struct MACRO
{
char name[ MAC_NAME_MAX ];
char text[ MAC_TEXT_MAX ];
} MACRO;
PRIVATE HASH_TAB *Macros; /* Symbol table for macro definitions */
typedef enum TOKEN
{
EOS = 1, /* end of string */
ANY, /* . */
AT_BOL, /* ^ */
AT_EOL, /* $ */
CCL_END, /* ] */
CCL_START, /* [ */
CLOSE_CURLY, /* } */
CLOSE_PAREN, /* ) */
CLOSURE, /* * */
DASH, /* - */
END_OF_INPUT, /* EOF */
L, /* literal character */
OPEN_CURLY, /* { */
OPEN_PAREN, /* ( */
OPTIONAL, /* ? */
OR, /* | */
PLUS_CLOSE /* + */
} TOKEN;
PRIVATE TOKEN Tokmap[] =
{
/* ^@ ^A ^B ^C ^D ^E ^F ^G ^H ^I ^J ^K ^L ^M ^N */
L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
/* ^O ^P ^Q ^R ^S ^T ^U ^V ^W ^X ^Y ^Z ^[ ^\ ^] */
L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
/* ^^ ^_ SPACE ! " # $ % & ' */
L, L, L, L, L, L, AT_EOL, L, L, L,
/* ( ) * + , - . */
OPEN_PAREN, CLOSE_PAREN, CLOSURE, PLUS_CLOSE, L, DASH, ANY,
/* / 0 1 2 3 4 5 6 7 8 9 : ; < = */
L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
/* > ? */
L, OPTIONAL,
/* @ A B C D E F G H I J K L M N */
L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
/* O P Q R S T U V W X Y Z */
L, L, L, L, L, L, L, L, L, L, L, L,
/* [ \ ] ^ */
CCL_START, L, CCL_END, AT_BOL,
/* _ ` a b c d e f g h i j k l m */
L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
/* n o p q r s t u v w x y z */
L, L, L, L, L, L, L, L, L, L, L, L, L,
/* { | } DEL */
OPEN_CURLY, OR, CLOSE_CURLY, L
};
PRIVATE char *(*Ifunct) P((void)) ; /* Input function pointer */
PRIVATE char *Input = "" ; /* Current position in input string */
PRIVATE char *S_input ; /* Beginning of input string */
PRIVATE TOKEN Current_tok ; /* Current token */
PRIVATE int Lexeme ; /* Value associated with LITERAL */
#define MATCH(t) (Current_tok == (t))
PRIVATE void parse_err P(( ERR_NUM type ));
PRIVATE void warning P(( WARN_NUM type ));
PRIVATE void errmsg P(( int type, char **table, char *msgtype ));
PRIVATE NFA *new P(( void ));
PRIVATE void discard P(( NFA *nfa_to_discard ));
PRIVATE char *save P(( char *str ));
PRIVATE char *get_macro P(( char **namep ));
PRIVATE void print_a_macro P(( MACRO *mac ));
PRIVATE TOKEN advance P(( void ));
PRIVATE NFA *machine P(( void ));
PRIVATE NFA *rule P(( void ));
PRIVATE void expr P(( NFA **startp, NFA **endp ));
PRIVATE void cat_expr P(( NFA **startp, NFA **endp ));
PRIVATE int first_in_cat P(( TOKEN tok ));
PRIVATE void factor P(( NFA **startp, NFA **endp ));
PRIVATE void term P(( NFA **startp, NFA **endp ));
PRIVATE void dodash P(( SET *set ));
PRIVATE void warning( type )
WARN_NUM type; /* Print error mesage and arrow to */
{
errmsg( (int)type, Warnmsgs, "WARNING" );
}
PRIVATE void parse_err( type )
ERR_NUM type; /* Print error mesage and arrow to */
{
errmsg( (int)type, Errmsgs, "ERROR" );
exit(1);
}
PRIVATE void errmsg( type, table, msgtype )
int type;
char **table;
char *msgtype;
{ /* highlight point of error. */
char *p;
fprintf(stderr, "%s (line %d) %s\n", msgtype, Actual_lineno, table[type] );
for( p = S_input; ++p <= Input; putc('-', stderr) )
;
fprintf( stderr, "v\n%s\n", S_input );
exit( 1 );
}
/*--------------------------------------------------------------*/
PRIVATE NFA *new() /* NFA management functions */
{
NFA *p;
static int first_time = 1;
if( first_time )
{
if( !( Nfa_states = (NFA *) calloc(NFA_MAX, sizeof(NFA)) ))
parse_err( E_MEM ); /* doesn't return */
first_time = 0;
Sp = &Sstack[ -1 ];
}
if( ++Nstates >= NFA_MAX )
parse_err( E_LENGTH ); /* doesn't return */
/* If the stack is not ok, it's empty */
p = !STACK_OK() ? &Nfa_states[Next_alloc++] : POP();
p->edge = EPSILON;
return p;
}
/*- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
PRIVATE void discard( nfa_to_discard )
NFA *nfa_to_discard;
{
--Nstates;
memset( nfa_to_discard, 0, sizeof(NFA) );
nfa_to_discard->edge = EMPTY ;
PUSH( nfa_to_discard );
if( !STACK_OK() )
parse_err( E_STACK ); /* doesn't return */
}
/*----------------------------------------------------------------------*/
PRIVATE char *save( str ) /* String-management function. */
char *str;
{
char *textp, *startp;
int len;
static int first_time = 1;
static char size[8]; /* Query-mode size */
static int *strings; /* Place to save accepting strings */
static int *savep; /* Current position in strings array. */
if( first_time )
{
if( !(savep = strings = (int *) malloc( STR_MAX )) )
parse_err( E_MEM ); /* doesn't return */
first_time = 0;
}
if( !str ) /* query mode. Return number of bytes in use */
{
sprintf( size, "%ld", (long)(savep - strings) );
return size;
}
if( *str == '|' )
return (char *)( savep + 1 );
*savep++ = Lineno;
for( textp = (char *)savep ; *str ; *textp++ = *str++ )
if( textp >= (char *)strings + (STR_MAX-1) )
parse_err( E_STRINGS ); /* doesn't return */
*textp++ = '\0' ;
/* Increment savep past the text. "len" is initialized to the string length.
* The "len/sizeof(int)" truncates the size down to an even multiple of the
* current int size. The "+(len % sizeof(int) != 0)" adds 1 to the truncated
* size if the string length isn't an even multiple of the int size (the !=
* operator evaluates to 1 or 0). Return a pointer to the string itself.
*/
startp = (char *)savep;
len = textp - startp;
savep += (len / sizeof(int)) + (len % sizeof(int) != 0);
return startp;
}
/*------------------------------------------------------------*/
PUBLIC void new_macro( def )
char *def;
{
/* Add a new macro to the table. If two macros have the same name, the
* second one takes precedence. A definition takes the form:
* name <whitespace> text [<whitespace>]
* whitespace at the end of the line is ignored.
*/
unsigned hash_add();
char *name; /* Name component of macro definition */
char *text; /* text part of macro definition */
char *edef; /* pointer to end of text part */
MACRO *p;
static int first_time = 1;
if( first_time )
{
first_time = 0;
Macros = maketab( 31, hash_add, strcmp );
}
for( name = def; *def && !isspace(*def) ; def++ ) /* Isolate name */
;
if( *def )
*def++ = '\0' ;
/* Isolate the definition text. This process is complicated because you need
* to discard any trailing whitespace on the line. The first while loop
* skips the preceding whitespace. The for loop is looking for end of
* string. If you find a white character (and the \n at the end of string
* is white), remember the position as a potential end of string.
*/
while( isspace( *def ) ) /* skip up to macro body */
++def;
text = def; /* Remember start of replacement text */
edef = NULL; /* strip trailing white space */
while( *def )
{
if( !isspace(*def) )
++def;
else
for(edef = def++; isspace(*def) ; ++def )
;
}
if( edef )
*edef = '\0';
/* Add the macro to the symbol table */
p = (MACRO *) newsym( sizeof(MACRO) );
strncpy( p->name, name, MAC_NAME_MAX );
strncpy( p->text, text, MAC_TEXT_MAX );
addsym( Macros, p );
D( printf("Added macro definition, macro table now is:\n"); )
D( print_macs(); )
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -*/
PRIVATE char *get_macro( namep )
char **namep;
{
/* Return a pointer to the contents of a macro having the indicated
* name. Abort with a message if no macro exists. The macro name includes
* the brackets. *namep is modified to point past the close brace.
*/
char *p;
MACRO *mac;
/* { */
if( !(p = strchr( ++(*namep), '}')) ) /* skip { and find } */
parse_err( E_BADMAC ); /* print msg & abort */
else
{
*p = '\0'; /* Overwrite close brace. { */
if( !(mac = (MACRO *) findsym( Macros, *namep )) )
parse_err( E_NOMAC );
*p++ = '}'; /* Put the brace back. */
*namep = p ; /* Update name pointer. */
return mac->text;
}
return "ERROR"; /* If you get here, it's a bug */
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -*/
PRIVATE void print_a_macro( mac ) /* Workhorse function needed by */
MACRO *mac; /* ptab() call in printmacs(), below */
{
printf( "%-16s--[%s]--\n", mac->name, mac->text );
}
PUBLIC void printmacs( ANSI(void) ) /* Print all the macros to stdout */
{
if( !Macros )
printf("\tThere are no macros\n");
else
{
printf("\nMACROS:\n");
ptab( Macros, (void(*)P((void*,...))) print_a_macro, NULL, 1 );
}
}
/*----------------------------------------------------------------
* Lexical analyzer:
*
* Lexical analysis is trivial because all lexemes are single-character values.
* The only complications are escape sequences and quoted strings, both
* of which are handled by advance(), below. This routine advances past the
* current token, putting the new token into Current_tok and the equivalent
* lexeme into Lexeme. If the character was escaped, Lexeme holds the actual
* value. For example, if a "\s" is encountered, Lexeme will hold a space
* character. The MATCH(x) macro returns true if x matches the current token.
* Advance both modifies Current_tok to the current token and returns it.
*
* Macro expansion is handled by means of a stack (declared at the top
* of the subroutine). When an expansion request is encountered, the
* current input buffer is stacked, and input is read from the macro
* text. This process repeats with nested macros, so SSIZE controls
* the maximum macro-nesting depth.
*/
PRIVATE TOKEN advance()
{
static int inquote = 0; /* Processing quoted string */
int saw_esc; /* Saw a backslash */
static char *stack[ SSIZE ], /* Input-source stack */
**sp = NULL; /* and stack pointer. */
if( !sp ) /* Initialize sp. */
sp = stack - 1; /* Necessary for large model */
if( Current_tok == EOS ) /* Get another line */
{
if( inquote )
parse_err( E_NEWLINE ); /* doesn't return */
do
{
/* Sit in this loop until a non-blank line is read into */
/* the "Input" array. */
if( !(Input = (*Ifunct)()) ) /* then at end of file */
{
Current_tok = END_OF_INPUT;
goto exit;
}
while( isspace( *Input ) ) /* Ignore leading */
Input++; /* white space... */
} while ( !*Input ); /* and blank lines. */
S_input = Input; /* Remember start of line */
} /* for error messages. */
while( *Input == '\0' )
{
if( INBOUNDS(stack, sp) ) /* Restore previous input source */
{
Input = *sp-- ;
continue;
}
Current_tok = EOS; /* No more input sources to restore */
Lexeme = '\0'; /* ie. you're at the real end of */
goto exit; /* string. */
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -