⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nfa.c

📁 一个c语言写做的编译器的源码
💻 C
📖 第 1 页 / 共 2 页
字号:
/*@A (C) 1992 Allen I. Holub                                                */

/* NFA.C---Make an NFA from a LeX input file using Thompson's construction */

#include <stdio.h>
#include <stdlib.h>

#include <ctype.h>
#include <string.h>
#include <tools/debug.h>
#include <tools/set.h>
#include <tools/hash.h>
#include <tools/compiler.h>
#include <tools/stack.h>
#include "nfa.h"			/* defines for NFA, EPSILON, CCL */
#include "dfa.h"			/* prototype for lerror().	 */
#include "globals.h"			/* externs for Verbose, etc.	 */

#ifdef DEBUG
	int    Lev = 0;
#	define ENTER(f) printf("%*senter %s [%c][%1.10s] \n",  		   \
					    Lev++ * 4, "", f, Lexeme, Input)
#	define LEAVE(f) printf("%*sleave %s [%c][%1.10s]\n",   		   \
					    --Lev * 4, "", f, Lexeme, Input)
#else
#	define ENTER(f)
#	define LEAVE(f)
#endif
/*----------------------------------------------------------------
 *    Error processing stuff. Note that all errors are fatal.
 *----------------------------------------------------------------
 */

typedef enum ERR_NUM
{
    E_MEM,	/* Out of memory				*/
    E_BADEXPR,	/* Malformed regular expression			*/
    E_PAREN,	/* Missing close parenthesis			*/
    E_STACK,	/* Internal error: Discard stack full		*/
    E_LENGTH,	/* Too many regular expressions 		*/
    E_BRACKET,	/* Missing [ in character class			*/
    E_BOL,	/* ^ must be at start of expr or ccl		*/
    E_CLOSE,	/* + ? or * must follow expression		*/
    E_STRINGS,	/* Too many characters in accept actions	*/
    E_NEWLINE,	/* Newline in quoted string			*/
    E_BADMAC,	/* Missing } in macro expansion			*/
    E_NOMAC,	/* Macro doesn't exist				*/
    E_MACDEPTH  /* Macro expansions nested too deeply.		*/

} ERR_NUM;


PRIVATE char	*Errmsgs[] =		/* Indexed by ERR_NUM */
{
    "Not enough memory for NFA",
    "Malformed regular expression",
    "Missing close parenthesis",
    "Internal error: Discard stack full",
    "Too many regular expressions or expression too long",
    "Missing [ in character class",
    "^ must be at start of expression or after [",
    "+ ? or * must follow an expression or subexpression",
    "Too many characters in accept actions",
    "Newline in quoted string, use \\n to get newline into expression",
    "Missing } in macro expansion",
    "Macro doesn't exist",
    "Macro expansions nested too deeply"
};

typedef enum WARN_NUM
{
    W_STARTDASH,	/* Dash at start of character class	*/
    W_ENDDASH  		 /* Dash at end   of character class	*/

} WARN_NUM;


PRIVATE char	*Warnmsgs[] =		/* Indexed by WARN_NUM */
{
    "Treating dash in [-...] as a literal dash",
    "Treating dash in [...-] as a literal dash"
};


PRIVATE NFA   *Nfa_states  ; 	     /* State-machine array		   */
PRIVATE int   Nstates = 0  ;  	     /* # of NFA states in machine	   */
PRIVATE int   Next_alloc;  	     /* Index of next element of the array */

#define	SSIZE	32

PRIVATE NFA   *Sstack[ SSIZE ];	     /* Stack used by new()		   */
PRIVATE NFA   **Sp = &Sstack[ -1 ];  /* Stack pointer			   */

#define	STACK_OK()    ( INBOUNDS(Sstack, Sp) )     /* true if stack not  */
						   /* full or empty	 */
#define STACK_USED()  ( (int)(Sp-Sstack) + 1	)  /* slots used         */
#define CLEAR_STACK() ( Sp = Sstack - 1		)  /* reset the stack    */
#define PUSH(x)	      ( *++Sp = (x)		)  /* put x on stack     */
#define POP()	      ( *Sp--			)  /* get x from stack   */


/*--------------------------------------------------------------
 * MACRO support:
 */

#define MAC_NAME_MAX  34	    /* Maximum name length		*/
#define MAC_TEXT_MAX  80	    /* Maximum amount of expansion text	*/

typedef struct MACRO
{
    char name[ MAC_NAME_MAX ];
    char text[ MAC_TEXT_MAX ];

} MACRO;

PRIVATE HASH_TAB *Macros; /* Symbol table for macro definitions */
typedef enum TOKEN
{
    EOS = 1,		/*  end of string	*/
    ANY,		/*  .		 	*/
    AT_BOL,		/*  ^			*/
    AT_EOL,		/*  $			*/
    CCL_END,		/*  ]			*/
    CCL_START,		/*  [			*/
    CLOSE_CURLY,	/*  }			*/
    CLOSE_PAREN,	/*  )			*/
    CLOSURE,		/*  *			*/
    DASH,		/*  -			*/
    END_OF_INPUT,	/*  EOF  		*/
    L,			/*  literal character	*/
    OPEN_CURLY,		/*  {			*/
    OPEN_PAREN,		/*  (			*/
    OPTIONAL,		/*  ?			*/
    OR,			/*  |			*/
    PLUS_CLOSE		/*  +			*/

} TOKEN;


PRIVATE TOKEN	Tokmap[] =
{
/*  ^@  ^A  ^B  ^C  ^D  ^E  ^F  ^G  ^H  ^I  ^J  ^K  ^L  ^M  ^N	*/
     L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,

/*  ^O  ^P  ^Q  ^R  ^S  ^T  ^U  ^V  ^W  ^X  ^Y  ^Z  ^[  ^\  ^]	*/
     L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,

/*  ^^  ^_  SPACE  !   "   #    $        %   &    '		*/
    L,  L,  L,     L,  L,  L,   AT_EOL,  L,  L,   L,

/*  (		 )            *	       +           ,  -     .   */
    OPEN_PAREN,  CLOSE_PAREN, CLOSURE, PLUS_CLOSE, L, DASH, ANY,

/*  /   0   1   2   3   4   5   6   7   8   9   :   ;   <   =	*/
    L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,

/*  >         ?							*/
    L,        OPTIONAL,

/*  @   A   B   C   D   E   F   G   H   I   J   K   L   M   N	*/
    L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,

/*  O   P   Q   R   S   T   U   V   W   X   Y   Z   		*/
    L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,

/*  [		\	]		^			*/
    CCL_START,	L,	CCL_END, 	AT_BOL,

/*  _   `   a   b   c   d   e   f   g   h   i   j   k   l   m	*/
    L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,

/*  n   o   p   q   r   s   t   u   v   w   x   y   z   	*/
    L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,  L,

/*  {            |    }            DEL 				*/
    OPEN_CURLY,  OR,  CLOSE_CURLY, L
};

PRIVATE char  *(*Ifunct) P((void)) ; /* Input function pointer		 */
PRIVATE char  *Input  = "" ;	     /* Current position in input string */
PRIVATE char  *S_input 	   ;  	     /* Beginning of input string	 */
PRIVATE TOKEN Current_tok  ;  	     /* Current token		 	 */
PRIVATE int   Lexeme	   ;  	     /* Value associated with LITERAL	 */

#define	MATCH(t)   (Current_tok == (t))

PRIVATE void 	parse_err	P(( ERR_NUM type		));
PRIVATE	void	warning		P(( WARN_NUM type		));
PRIVATE void	errmsg		P(( int type, char **table, char *msgtype ));
PRIVATE NFA 	*new		P(( void 			));
PRIVATE void	discard		P(( NFA *nfa_to_discard		));
PRIVATE char	*save		P(( char *str			));
PRIVATE char	*get_macro	P(( char **namep		));
PRIVATE void	print_a_macro	P(( MACRO *mac			));
PRIVATE TOKEN	advance		P(( void 			));
PRIVATE NFA	*machine	P(( void 			));
PRIVATE NFA	*rule		P(( void 			));
PRIVATE void	expr		P(( NFA **startp, NFA **endp	));
PRIVATE void	cat_expr	P(( NFA **startp, NFA **endp	));
PRIVATE int	first_in_cat	P(( TOKEN tok			));
PRIVATE void	factor		P(( NFA **startp, NFA **endp	));
PRIVATE void	term		P(( NFA **startp, NFA **endp	));
PRIVATE void	dodash		P(( SET *set			));



PRIVATE	void	warning( type )
WARN_NUM	type;			/* Print error mesage and arrow to */
{
    errmsg( (int)type, Warnmsgs, "WARNING" );
}

PRIVATE	void	parse_err( type )
ERR_NUM	type;				/* Print error mesage and arrow to */
{
    errmsg( (int)type, Errmsgs, "ERROR" );
    exit(1);
}

PRIVATE void errmsg( type, table, msgtype )
int  type;
char **table;
char *msgtype;
{			  	  	/* highlight point of error.       */
    char *p;
    fprintf(stderr, "%s (line %d) %s\n", msgtype, Actual_lineno, table[type] );

    for( p = S_input; ++p <= Input; putc('-', stderr) )
	;
    fprintf( stderr, "v\n%s\n", S_input );
    exit( 1 );
}

/*--------------------------------------------------------------*/

PRIVATE	NFA  *new()			/* NFA management functions */
{
    NFA	*p;
    static int first_time = 1;

    if( first_time )
    {
	if( !( Nfa_states = (NFA *) calloc(NFA_MAX, sizeof(NFA)) ))
	    parse_err( E_MEM );	/* doesn't return */

	first_time = 0;
	Sp = &Sstack[ -1 ];
    }

    if( ++Nstates >= NFA_MAX )
	parse_err( E_LENGTH );	/* doesn't return */

    /* If the stack is not ok, it's empty */

    p = !STACK_OK() ? &Nfa_states[Next_alloc++] : POP();
    p->edge = EPSILON;
    return p;
}

/*- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

PRIVATE	void	discard( nfa_to_discard )
NFA	*nfa_to_discard;
{
    --Nstates;

    memset( nfa_to_discard, 0, sizeof(NFA) );
    nfa_to_discard->edge = EMPTY ;
    PUSH( nfa_to_discard );

    if( !STACK_OK() )
	parse_err( E_STACK );	/* doesn't return */
}

/*----------------------------------------------------------------------*/

PRIVATE char	*save( str )		/* String-management function. */
char	*str;
{
    char	*textp, *startp;
    int		len;
    static int	first_time = 1;
    static char size[8];	/* Query-mode size */
    static int  *strings;  	 /* Place to save accepting strings	*/
    static int  *savep;		 /* Current position in strings array.	*/

    if( first_time )
    {
	if( !(savep = strings = (int *) malloc( STR_MAX )) )
	    parse_err( E_MEM );	/* doesn't return */
	first_time = 0;
    }

    if( !str ) /* query mode. Return number of bytes in use */
    {
	sprintf( size, "%ld", (long)(savep - strings) );
	return size;
    }

    if( *str == '|' )
	return (char *)( savep + 1 );

    *savep++ = Lineno;

    for( textp = (char *)savep ; *str ; *textp++ = *str++ )
	if( textp >=  (char *)strings + (STR_MAX-1) )
	    parse_err( E_STRINGS );	/* doesn't return */

    *textp++ = '\0' ;

    /* Increment savep past the text. "len" is initialized to the string length.
     * The "len/sizeof(int)" truncates the size down to an even multiple of the
     * current int size. The "+(len % sizeof(int) != 0)" adds 1 to the truncated
     * size if the string length isn't an even multiple of the int size (the !=
     * operator evaluates to 1 or 0). Return a pointer to the string itself.
     */

    startp  = (char *)savep;
    len     = textp - startp;
    savep  += (len / sizeof(int)) + (len % sizeof(int) != 0);
    return startp;
}

/*------------------------------------------------------------*/

PUBLIC  void	new_macro( def )
char	*def;
{
    /* Add a new macro to the table. If two macros have the same name, the
     * second one takes precedence. A definition takes the form:
     *		 name <whitespace> text [<whitespace>]
     * whitespace at the end of the line is ignored.
     */

    unsigned hash_add();

    char  *name;		/* Name component of macro definition	*/
    char  *text;		/* text part of macro definition	*/
    char  *edef;		/* pointer to end of text part		*/
    MACRO *p;
    static int first_time = 1;

    if( first_time )
    {
	first_time = 0;
	Macros = maketab( 31, hash_add, strcmp );
    }

    for( name = def; *def && !isspace(*def) ; def++ )	   /* Isolate name */
	;
    if( *def )
	*def++ = '\0' ;

    /* Isolate the definition text. This process is complicated because you need
     * to discard any trailing whitespace on the line. The first while loop
     * skips the preceding whitespace. The for loop is looking for end of
     * string. If you find a white character (and the \n at the end of string
     * is white), remember the position as a potential end of string.
     */

    while( isspace( *def ) )	     /* skip up to macro body 		   */
	++def;

    text = def;			     /* Remember start of replacement text */

    edef = NULL;		     /* strip trailing white space	   */
    while(  *def  )
    {
	if( !isspace(*def) )
	    ++def;
	else
	    for(edef = def++; isspace(*def) ; ++def )
		;
    }

    if( edef )
	*edef = '\0';
				     /* Add the macro to the symbol table  */

    p  = (MACRO *) newsym( sizeof(MACRO) );
    strncpy( p->name, name, MAC_NAME_MAX );
    strncpy( p->text, text, MAC_TEXT_MAX );
    addsym( Macros, p );

    D( printf("Added macro definition, macro table now is:\n");	)
    D( print_macs();						)
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -*/

PRIVATE char	*get_macro( namep )
char	**namep;
{
    /* Return a pointer to the contents of a macro having the indicated
     * name. Abort with a message if no macro exists. The macro name includes
     * the brackets. *namep is modified to point past the close brace.
     */

    char   *p;
    MACRO  *mac;
						/* {		     */
    if( !(p = strchr( ++(*namep), '}')) )	/* skip { and find } */
	parse_err( E_BADMAC );			/* print msg & abort */
    else
    {
	*p = '\0';				/* Overwrite close brace. { */
	if( !(mac = (MACRO *) findsym( Macros, *namep )) )
	    parse_err( E_NOMAC );
	*p++   = '}';				/* Put the brace back.	  */

	*namep =  p ;				/* Update name pointer.   */
	return mac->text;
    }

    return "ERROR";			    /* If you get here, it's a bug */
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -*/

PRIVATE	void print_a_macro( mac )	/* Workhorse function needed by      */
MACRO	*mac;				/* ptab() call in printmacs(), below */
{
    printf( "%-16s--[%s]--\n", mac->name, mac->text );
}

PUBLIC void printmacs( ANSI(void) )	/* Print all the macros to stdout */
{
    if( !Macros )
	printf("\tThere are no macros\n");
    else
    {
	printf("\nMACROS:\n");
	ptab( Macros, (void(*)P((void*,...))) print_a_macro, NULL, 1 );
    }
}

/*----------------------------------------------------------------
 * Lexical analyzer:
 *
 * Lexical analysis is trivial because all lexemes are single-character values.
 * The only complications are escape sequences and quoted strings, both
 * of which are handled by advance(), below. This routine advances past the
 * current token, putting the new token into Current_tok and the equivalent
 * lexeme into Lexeme. If the character was escaped, Lexeme holds the actual
 * value. For example, if a "\s" is encountered, Lexeme will hold a space
 * character.  The MATCH(x) macro returns true if x matches the current token.
 * Advance both modifies Current_tok to the current token and returns it.
 *
 * Macro expansion is handled by means of a stack (declared at the top
 * of the subroutine). When an expansion request is encountered, the
 * current input buffer is stacked, and input is read from the macro
 * text. This process repeats with nested macros, so SSIZE controls
 * the maximum macro-nesting depth.
 */

PRIVATE	TOKEN	advance()
{
    static int   inquote = 0;   	/* Processing quoted string	*/
    int	         saw_esc;       	/* Saw a backslash		*/
    static char  *stack[ SSIZE ],	/* Input-source stack		*/
		 **sp = NULL;		/* and stack pointer.		*/

    if( !sp )				/* Initialize sp. 		*/
        sp = stack - 1;			/* Necessary for large model    */

    if( Current_tok == EOS )		/* Get another line	        */
    {
	if( inquote )
	    parse_err( E_NEWLINE );	/* doesn't return */
	do
	{
	    /* Sit in this loop until a non-blank line is read into	*/
	    /* the "Input" array.					*/

	    if( !(Input = (*Ifunct)()) )	/* then at end of file  */
	    {
		Current_tok = END_OF_INPUT;
		goto exit;
	    }
	    while( isspace( *Input ) )	  	/* Ignore leading	  */
		Input++;		  	/* white space...	  */

	} while ( !*Input );		  	/* and blank lines.	  */

	S_input = Input;		  	/* Remember start of line */
    }				          	/* for error messages.    */

    while( *Input == '\0' )
    {
	if( INBOUNDS(stack, sp) )	 /* Restore previous input source */
	{
	    Input = *sp-- ;
	    continue;
	}

	Current_tok = EOS;	       /* No more input sources to restore */
	Lexeme = '\0';		       /* ie. you're at the real end of    */
	goto exit;		       /* string.			   */
    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -