⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nfa.c

📁 compiler
💻 C
📖 第 1 页 / 共 2 页
字号:

    if( !inquote )
    {
	while( *Input == '{' )		  /* Macro expansion required     }*/
	{
	    *++sp = Input ;		  /* Stack current input string.    */
	    Input = get_macro( sp );	  /* Use macro body as input string */

	    if( TOOHIGH(stack,sp) )
		parse_err(E_MACDEPTH);	  /* Stack overflow, abort */
	}
    }

    if( *Input == '"' )
    {				  /* At either start and end of a quoted      */
	inquote = ~inquote;	  /* string. All characters are treated  as   */
	if( !*++Input )		  /* literals while inquote is true).         */
	{
	    Current_tok = EOS ;
	    Lexeme = '\0';
	    goto exit;
	}
    }

    saw_esc = (*Input == '\\');

    if( !inquote )
    {
	if( isspace(*Input) )
	{
	    Current_tok = EOS ;
	    Lexeme	= '\0';
	    goto exit;
	}
	Lexeme = esc( &Input );
    }
    else
    {
	if( saw_esc && Input[1] == '"' )
	{
	    Input  += 2;	/* Skip the escaped character */
	    Lexeme = '"';
	}
	else
	    Lexeme = *Input++ ;
    }

    Current_tok = (inquote || saw_esc) ? L : Tokmap[Lexeme] ;
exit:
    return Current_tok;
}


/*--------------------------------------------------------------
 * The Parser:
 *	A simple recursive descent parser that creates a Thompson NFA for
 * 	a regular expression. The access routine [thompson()] is at the
 *	bottom. The NFA is created as a directed graph, with each node
 *	containing pointer's to the next node. Since the structures are
 *	allocated from an array, the machine can also be considered
 *	as an array where the state number is the array index.
 */

PRIVATE  NFA	*machine()
{
    NFA	*start;
    NFA	*p;

    ENTER("machine");

    p = start = new();
    p->next   = rule();

    while( !MATCH(END_OF_INPUT) )
    {
	p->next2 = new();
	p        = p->next2;
	p->next  = rule();
    }

    LEAVE("machine");
    return start;
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -*/

PRIVATE NFA  *rule()
{
    /*	rule	--> expr  EOS action
     *		    ^expr EOS action
     *		    expr$ EOS action
     *
     *	action	--> <tabs> <string of characters>
     *		    epsilon
     */

    NFA	*start = NULL;
    NFA	*end   = NULL;
    int	anchor = NONE;

    ENTER("rule");

    if( MATCH( AT_BOL ) )
    {
    	start 	    =  new() ;
	start->edge =  '\n'  ;
	anchor      |= START ;
	advance();
	expr( &start->next, &end );
    }
    else
	expr( &start, &end );

    if( MATCH( AT_EOL ) )
    {
	/*  pattern followed by a carriage-return or linefeed (use a
	 *  character class).
	 */

	advance();
    	end->next =  new()     ;
	end->edge =  CCL       ;

	if( !( end->bitset = newset()) )
	    parse_err( E_MEM );		/* doesn't return */

	ADD( end->bitset, '\n' );

	if( !Unix )
	    ADD( end->bitset, '\r' );

	end       =  end->next ;
	anchor    |= END       ;
    }

    while( isspace(*Input) )
	Input++ ;

    end->accept = save( Input );
    end->anchor = anchor;
    advance();				/* skip past EOS */

    LEAVE("rule");
    return start;
}

PRIVATE	void expr( startp,  endp )
NFA	**startp, **endp ;
{
    /* Because a recursive descent compiler can't handle left recursion,
     * the productions:
     *
     *	expr	-> expr OR cat_expr
     *		|  cat_expr
     *
     * must be translated into:
     *
     *	expr	-> cat_expr expr'
     *	expr'	-> OR cat_expr expr'
     *		   epsilon
     *
     * which can be implemented with this loop:
     *
     *	cat_expr
     *	while( match(OR) )
     *		cat_expr
     *		do the OR
     */

    NFA	*e2_start = NULL; /* expression to right of | */
    NFA *e2_end   = NULL;
    NFA *p;

    ENTER("expr");

    cat_expr( startp, endp );

    while( MATCH( OR ) )
    {
	advance();
	cat_expr( &e2_start, &e2_end );

	p = new();
	p->next2 = e2_start ;
	p->next  = *startp  ;
	*startp  = p;

	p = new();
	(*endp)->next = p;
	e2_end ->next = p;
	*endp = p;
    }
    LEAVE("expr");
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -*/

PRIVATE	void cat_expr( startp,  endp )
NFA	**startp, **endp ;
{
    /* The same translations that were needed in the expr rules are needed again
     * here:
     *
     *	cat_expr  -> cat_expr | factor
     *		     factor
     *
     * is translated to:
     *
     *	cat_expr  -> factor cat_expr'
     *	cat_expr' -> | factor cat_expr'
     *		     epsilon
     */

    NFA	 *e2_start, *e2_end;

    ENTER("cat_expr");

    if( first_in_cat( Current_tok ) )
	factor( startp, endp );

    while(  first_in_cat( Current_tok )  )
    {
	factor( &e2_start, &e2_end );

	memcpy( *endp, e2_start, sizeof(NFA));
	discard( e2_start );

	*endp = e2_end;
    }

    LEAVE("cat_expr");
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -*/

PRIVATE	int	first_in_cat( tok )
TOKEN	tok;
{
    switch( tok )
    {
    case CLOSE_PAREN:
    case AT_EOL:
    case OR:
    case EOS:		return 0;


    case CLOSURE:
    case PLUS_CLOSE:
    case OPTIONAL:	parse_err( E_CLOSE   );	return 0;

    case CCL_END:	parse_err( E_BRACKET );	return 0;
    case AT_BOL:	parse_err( E_BOL     );	return 0;
    }

    return 1;
}

PRIVATE	void factor( startp, endp )
NFA	**startp, **endp;
{
     /*		factor	--> term*  | term+  | term?
     */

    NFA	*start, *end;

    ENTER("factor");

    term( startp, endp );

    if( MATCH(CLOSURE) || MATCH(PLUS_CLOSE) || MATCH(OPTIONAL) )
    {
	start 	  = new()   ;
	end   	  = new()   ;
	start->next   = *startp ;
	(*endp)->next = end     ;

	if( MATCH(CLOSURE) || MATCH(OPTIONAL) )		  /*   * or ?   */
	    start->next2 = end;

	if( MATCH(CLOSURE) || MATCH(PLUS_CLOSE) )	  /*   * or +   */
	    (*endp)->next2 = *startp;

	*startp  = start ;
	*endp    = end   ;
	advance();
    }

    LEAVE("factor");
}

PRIVATE	void term( startp, endp )
NFA	**startp, **endp;
{
    /* Process the term productions:
     *
     * term  --> [...]  |  [^...]  |  []  |  [^] |  .  | (expr) | <character>
     *
     * The [] is nonstandard. It matches a space, tab, formfeed, or newline,
     * but not a carriage return (\r). All of these are single nodes in the
     * NFA.
     */

    NFA	*start;
    int c;

    ENTER("term");

    if( MATCH( OPEN_PAREN ) )
    {
	advance();
	expr( startp, endp );
	if( MATCH( CLOSE_PAREN ) )
	    advance();
	else
	    parse_err( E_PAREN );	/* doesn't return */
    }
    else
    {
	*startp = start 	  = new();
	*endp   = start->next = new();

	if( !( MATCH( ANY ) || MATCH( CCL_START) ))
	{
	    start->edge = Lexeme;
	    advance();
	}
	else
	{
	    start->edge = CCL;

	    if( !( start->bitset = newset()) )
		parse_err( E_MEM );		/* doesn't return */

	    if( MATCH( ANY ) )			/* 	dot (.)	   */
	    {
		ADD( start->bitset, '\n' );
		if( !Unix )
		    ADD( start->bitset, '\r' );

		COMPLEMENT( start->bitset );
	    }
	    else
	    {
		advance();
		if( MATCH( AT_BOL ) )		/* Negative character class */
		{
		    advance();

		    ADD( start->bitset, '\n' );	/* Don't include \n in class */
		    if( !Unix )
			ADD( start->bitset, '\r' );

		    COMPLEMENT( start->bitset );
		}
		if( ! MATCH( CCL_END ) )
		    dodash( start->bitset );
		else				/* 	[] or [^]	    */
		    for( c = 0; c <= ' '; ++c )
			ADD( start->bitset, c );
	    }
	    advance();
	}
    }
    LEAVE("term");
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

PRIVATE	void	dodash( set )
SET	*set;				/* Pointer to ccl character set	*/
{
    register int	first;

    if( MATCH( DASH ) )		/* Treat [-...] as a literal dash */
    {				/* But print warning.		  */
	warning( W_STARTDASH );
	ADD( set, Lexeme );
	advance();
    }

    for(; !MATCH( EOS )  &&  !MATCH( CCL_END ) ; advance() )
    {
	if( ! MATCH( DASH ) )
	{
	    first = Lexeme;
	    ADD( set, Lexeme );
	}
	else	/* looking at a dash */
	{
	    advance();
	    if( MATCH( CCL_END ) )	/* Treat [...-] as literal */
	    {
		warning( W_ENDDASH );
		ADD( set, '-' );
	    }
	    else
	    {
		for(; first <= Lexeme ; first++ )
		    ADD( set, first );
	    }
	}
    }
}


PUBLIC  NFA	*thompson( input_function, max_state, start_state )
char	*(*input_function) P((void));
int	*max_state;
NFA	**start_state;
{
    /* Access routine to this module. Return a pointer to a NFA transition
     * table that represents the regular expression pointed to by expr or
     * NULL if there's not enough memory. Modify *max_state to reflect the
     * largest state number used. This number will probably be a larger
     * number than the total number of states. Modify *start_state to point
     * to the start state. This pointer is garbage if thompson() returned 0.
     * The memory for the table is fetched from malloc(); use free() to
     * discard it.
     */

    CLEAR_STACK();

    Ifunct = input_function;

    Current_tok = EOS;		/* Load first token	*/
    advance();

    Nstates    = 0;
    Next_alloc = 0;

    *start_state = machine();	/* Manufacture the NFA	*/
    *max_state   = Next_alloc ;	/* Max state # in NFA   */

    if( Verbose > 1 )
	print_nfa( Nfa_states, *max_state, *start_state );

    if( Verbose )
    {
	printf("%d/%d NFA states used.\n", *max_state, NFA_MAX );
	printf("%s/%d bytes used for accept strings.\n\n", save(NULL), STR_MAX);
    }
    return Nfa_states;
}



#ifdef MAIN

#define ALLOC
#include "globals.h"

/*- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

PRIVATE pnode( nfa )	/* For debugging, print a single NFA structure */
NFA	*nfa;
{
    if( !nfa )
	    printf("(NULL)");
    else
    {
	printf( "+--Structure at 0x%04x-------------------\n", nfa );
	printf( "|edge   = 0x%x (%c)\n", nfa->edge, nfa->edge );
	printf( "|next   = 0x%x\n", nfa->next );
	printf( "|next2  = 0x%x\n", nfa->next2 );
	printf( "|accept = <%s>\n", nfa->accept );
	printf( "|bitset = " );
	printccl( nfa->bitset );
	printf("\n");
	printf( "+----------------------------------------\n" );
    }
}

/*- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

PRIVATE char	*getline()
{
    static char	buf[80];

    printf("%d: ", Lineno++ );
    gets( buf );

    return  *buf ? buf : NULL ;
}

/*- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

main( argc, argv )
char	**argv;
{
    NFA	*nfa, *start_state;
    int	max_state;

    Verbose = 2;

    nfa = thompson(getline, &max_state, &start_state);
}

#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -