⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rexpr.c

📁 本工具提供一个词法分析器和语法分析器的集成开发环境
💻 C
字号:
/* * This file contains code for * *      int rexpr(char *expr, char *s); * * which answers * *      1 if 's' is in the language described by the regular expression 'expr' *      0 if it is not *     -1 if the regular expression is invalid * * Language membership is determined by constructing a non-deterministic * finite automata (NFA) from the regular expression.  A depth- * first-search is performed on the NFA (graph) to check for a match of 's'. * Each non-epsilon arc consumes one character from 's'.  Backtracking is * performed to check all possible paths through the NFA. * * Regular expressions follow the meta-language: * * <regExpr>        ::= <andExpr> ( '|' <andExpr> )* * * <andExpr>        ::= <expr> ( <expr> )* * * <expr>           ::= {'~'} '[' <atomList> ']' <repeatSymbol> *                      | '(' <regExpr> ')' <repeatSymbol> *                      | '{' <regExpr> '}' <repeatSymbol> *                      | <atom> <repeatSymbol> * * <repeatSymbol>   ::= { '*' | '+' } * * <atomList>       ::= <atom> ( <atom> )* *                      | { <atomList> } <atom> '-' <atom> { <atomList> } * * <atom>           ::= Token[Atom] * * Notes: *		~	means complement the set in [..]. i.e. all characters not listed *		*	means match 0 or more times (can be on expression or atom) *		+	means match 1 or more times (can be on expression or atom) *		{}	optional *		()	grouping *		[]	set of atoms *		x-y	all characters from x to y (found only in [..]) *		\xx the character with value xx * * Examples: *		[a-z]+ *			match 1 or more lower-case letters (e.g. variable) * *		0x[0-9A-Fa-f]+ *			match a hex number with 0x on front (e.g. 0xA1FF) * *		[0-9]+.[0-9]+{e[0-9]+} *			match a floating point number (e.g. 3.14e21) * * Code example: *		if ( rexpr("[a-zA-Z][a-zA-Z0-9]+", str) ) then str is keyword * * Terence Parr * Purdue University * April 1991 */#include <stdio.h>#include <ctype.h>#ifdef __STDC__#include <stdlib.h>#else#include <malloc.h>#endif#include "rexpr.h"#ifdef __STDC__static int regExpr( GraphPtr g );static int andExpr( GraphPtr g );static int expr( GraphPtr g );static int repeatSymbol( GraphPtr g );static int atomList( char *p, int complement );static int next( void );static ArcPtr newGraphArc( void );static NodePtr newNode( void );static int ArcBetweenGraphNode( NodePtr i, NodePtr j, int label );static Graph BuildNFA_atom( int label );static Graph BuildNFA_AB( Graph A, Graph B );static Graph BuildNFA_AorB( Graph A, Graph B );static Graph BuildNFA_set( char *s );static Graph BuildNFA_Astar( Graph A );static Graph BuildNFA_Aplus( Graph A );static Graph BuildNFA_Aoptional( Graph A );#elsestatic int regExpr();static int andExpr();static int expr();static int repeatSymbol();static int atomList();static int next();static ArcPtr newGraphArc();static NodePtr newNode();static int ArcBetweenGraphNode();static Graph BuildNFA_atom();static Graph BuildNFA_AB();static Graph BuildNFA_AorB();static Graph BuildNFA_set();static Graph BuildNFA_Astar();static Graph BuildNFA_Aplus();static Graph BuildNFA_Aoptional();#endifstatic char *_c;static int token, tokchar;static NodePtr accept;static NodePtr freelist = NULL;/* * return 1 if s in language described by expr *        0 if s is not *       -1 if expr is an invalid regular expression */rexpr(expr, s)char *expr, *s;{	NodePtr p,q;	Graph nfa;	int result;	fprintf(stderr, "rexpr(%s,%s);\n", expr,s);	freelist = NULL;	_c = expr;	next();	if ( regExpr(&nfa) == -1 ) return -1;	accept = nfa.right;	result = match(nfa.left, s);	/* free all your memory */	p = q = freelist;	while ( p!=NULL ) { q = p->track; free(p); p = q; }	return result;}/* * do a depth-first-search on the NFA looking for a path from start to * accept state labelled with the characters of 's'. */match(automaton, s)NodePtr automaton;char *s;{	ArcPtr p;		if ( automaton == accept && *s == '\0' ) return 1;	/* match */	for (p=automaton->arcs; p!=NULL; p=p->next)			/* try all arcs */	{		if ( p->label == Epsilon )		{			if ( match(p->target, s) ) return 1;		}		else if ( p->label == *s )				if ( match(p->target, s+1) ) return 1;	}	return 0;}/* * <regExpr>        ::= <andExpr> ( '|' {<andExpr>} )* * * Return -1 if syntax error * Return  0 if none found * Return  1 if a regExrp was found */staticregExpr(g)GraphPtr g;{	Graph g1, g2;		if ( andExpr(&g1) == -1 )	{		return -1;	}		while ( token == '|' )	{		int a;		next();		a = andExpr(&g2);		if ( a == -1 ) return -1;	/* syntax error below */		else if ( !a ) return 1;	/* empty alternative */		g1 = BuildNFA_AorB(g1, g2);	}		if ( token!='\0' ) return -1;	*g = g1;	return 1;}/* * <andExpr>        ::= <expr> ( <expr> )* */staticandExpr(g)GraphPtr g;{	Graph g1, g2;		if ( expr(&g1) == -1 )	{		return -1;	}		while ( token==Atom || token=='{' || token=='(' || token=='~' || token=='[' )	{		if (expr(&g2) == -1) return -1;		g1 = BuildNFA_AB(g1, g2);	}		*g = g1;	return 1;}/* * <expr>           ::=    {'~'} '[' <atomList> ']' <repeatSymbol> *                      | '(' <regExpr> ')' <repeatSymbol> *                      | '{' <regExpr> '}' <repeatSymbol> *                      | <atom> <repeatSymbol> */staticexpr(g)GraphPtr g;{	int complement = 0;	char s[257];    /* alloc space for string of char in [] */		if ( token == '~' || token == '[' )	{		if ( token == '~' ) {complement = 1; next();}		if ( token != '[' ) return -1;		next();		if ( atomList( s, complement ) == -1 ) return -1;		*g = BuildNFA_set( s );		if ( token != ']' ) return -1;		next();		repeatSymbol( g );		return 1;	}	if ( token == '(' )	{		next();		if ( regExpr( g ) == -1 ) return -1;		if ( token != ')' ) return -1;		next();		repeatSymbol( g );		return 1;	}	if ( token == '{' )	{		next();		if ( regExpr( g ) == -1 ) return -1;		if ( token != '}' ) return -1;		next();		/* S p e c i a l  C a s e   O p t i o n a l  {  } */		if ( token != '*' && token != '+' )		{			*g = BuildNFA_Aoptional( *g );		}		repeatSymbol( g );		return 1;	}	if ( token == Atom )	{		*g = BuildNFA_atom( tokchar );		next();		repeatSymbol( g );		return 1;	}		return -1;}/* * <repeatSymbol>   ::= { '*' | '+' } */staticrepeatSymbol(g)GraphPtr g;{	switch ( token )	{		case '*' : *g = BuildNFA_Astar( *g ); next(); break;		case '+' : *g = BuildNFA_Aplus( *g ); next(); break;	}	return 1;}/* * <atomList>       ::= <atom> { <atom> }* *                      { <atomList> } <atom> '-' <atom> { <atomList> } * * a-b is same as ab * q-a is same as q */staticatomList(p, complement)char *p;int complement;{	static unsigned char set[256];		/* no duplicates */	int first, last, i;	char *s = p;		if ( token != Atom ) return -1;		for (i=0; i<256; i++) set[i] = 0;	while ( token == Atom )	{		if ( !set[tokchar] ) *s++ = tokchar;		set[tokchar] = 1;    			/* Add atom to set */		next();		if ( token == '-' )         	/* have we found '-' */		{			first = *(s-1);             /* Get last char */			next();			if ( token != Atom ) return -1;			else			{				last = tokchar;			}			for (i = first+1; i <= last; i++)			{				if ( !set[tokchar] ) *s++ = i;				set[i] = 1;    			/* Add atom to set */			}			next();		}	}	*s = '\0';	if ( complement )	{		for (i=0; i<256; i++) set[i] = !set[i];		for (i=1,s=p; i<256; i++) if ( set[i] ) *s++ = i;		*s = '\0';	}	return 1;}/* a somewhat stupid lexical analyzer */staticnext(){	while ( *_c==' ' || *_c=='\t' || *_c=='\n' ) _c++;	if ( *_c=='\\' )	{		_c++;		if ( isdigit(*_c) )		{			int n=0;			while ( isdigit(*_c) )			{				n = n*10 + (*_c++ - '0');			}			if ( n>255 ) n=255;			tokchar = n;		}		else		{			switch (*_c)			{				case 'n' : tokchar = '\n'; break;				case 't' : tokchar = '\t'; break;				case 'r' : tokchar = '\r'; break;				default  : tokchar = *_c;			}			_c++;		}		token = Atom;	}	else if ( isgraph(*_c) && *_c!='[' && *_c!='(' && *_c!='{' &&			  *_c!='-' && *_c!='}' && *_c!=')' && *_c!=']' &&			  *_c!='+' && *_c!='*' && *_c!='~' && *_c!='|' )	{		token = Atom;		tokchar = *_c++;	}	else	{		token = tokchar = *_c++;	}}/* N F A  B u i l d i n g  R o u t i n e s */staticArcPtrnewGraphArc(){	ArcPtr p;	p = (ArcPtr) calloc(1, sizeof(Arc));	if ( p==NULL ) {fprintf(stderr,"rexpr: out of memory\n"); exit(-1);}	if ( freelist != NULL ) p->track = (ArcPtr) freelist;	freelist = (NodePtr) p;	return p;}staticNodePtrnewNode(){	NodePtr p;	p = (NodePtr) calloc(1, sizeof(Node));	if ( p==NULL ) {fprintf(stderr,"rexpr: out of memory\n"); exit(-1);}	if ( freelist != NULL ) p->track = freelist;	freelist = p;	return p;}staticArcBetweenGraphNodes(i, j, label)NodePtr i, j;int label;{	ArcPtr a;		a = newGraphArc();	if ( i->arcs == NULL ) i->arctail = i->arcs = a;	else {(i->arctail)->next = a; i->arctail = a;}	a->label = label;	a->target = j;}static GraphBuildNFA_atom(label)int label;{	Graph g;		g.left = newNode();	g.right = newNode();	ArcBetweenGraphNodes(g.left, g.right, label);	return( g );}static GraphBuildNFA_AB(A, B)Graph A, B;{	Graph g;		ArcBetweenGraphNodes(A.right, B.left, Epsilon);	g.left = A.left;	g.right = B.right;	return( g );}static GraphBuildNFA_AorB(A, B)Graph A, B;{	Graph g;		g.left = newNode();	ArcBetweenGraphNodes(g.left, A.left, Epsilon);	ArcBetweenGraphNodes(g.left, B.left, Epsilon);	g.right = newNode();	ArcBetweenGraphNodes(A.right, g.right, Epsilon);	ArcBetweenGraphNodes(B.right, g.right, Epsilon);	return( g );}static GraphBuildNFA_set( s )char *s;{	Graph g;		if ( s == NULL ) return g;		g.left = newNode();	g.right = newNode();	while ( *s != '\0' )	{		ArcBetweenGraphNodes(g.left, g.right, *s++);	}	return g;}static GraphBuildNFA_Astar( A )Graph A;{	Graph g;	g.left = newNode();	g.right = newNode();		ArcBetweenGraphNodes(g.left, A.left, Epsilon);	ArcBetweenGraphNodes(g.left, g.right, Epsilon);	ArcBetweenGraphNodes(A.right, g.right, Epsilon);	ArcBetweenGraphNodes(A.right, A.left, Epsilon);		return( g );}static GraphBuildNFA_Aplus( A )Graph A;{	ArcBetweenGraphNodes(A.right, A.left, Epsilon);		return( A );}static GraphBuildNFA_Aoptional( A )Graph A;{	Graph g;		g.left = newNode();	g.right = newNode();		ArcBetweenGraphNodes(g.left, A.left, Epsilon);	ArcBetweenGraphNodes(g.left, g.right, Epsilon);	ArcBetweenGraphNodes(A.right, g.right, Epsilon);		return( g );}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -