⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dfa.c

📁 早期freebsd实现
💻 C
📖 第 1 页 / 共 4 页
字号:
      if (!regcopy)	reg_error("out of memory");            /* This is a complete kludge and could potentially break	 \<letter> escapes . . . */      case_fold = 0;      for (i = 0; i < len; ++i)	if (ISUPPER(s[i]))	  regcopy[i] = tolower(s[i]);	else	  regcopy[i] = s[i];      reginit(r);      r->mustn = 0;      r->must[0] = '\0';      regparse(regcopy, len, r);      free(regcopy);      regmust(r);      reganalyze(r, searchflag);      case_fold = 1;      reginit(r);      regparse(s, len, r);      reganalyze(r, searchflag);    }  else    {        reginit(r);        regparse(s, len, r);        regmust(r);        reganalyze(r, searchflag);    }}/* Free the storage held by the components of a regexp. */voidreg_free(r)     struct regexp *r;{  int i;  free((ptr_t) r->charsets);  free((ptr_t) r->tokens);  for (i = 0; i < r->sindex; ++i)    free((ptr_t) r->states[i].elems.elems);  free((ptr_t) r->states);  for (i = 0; i < r->tindex; ++i)    if (r->follows[i].elems)      free((ptr_t) r->follows[i].elems);  free((ptr_t) r->follows);  for (i = 0; i < r->tralloc; ++i)    if (r->trans[i])      free((ptr_t) r->trans[i]);    else if (r->fails[i])      free((ptr_t) r->fails[i]);  if (r->realtrans)    free((ptr_t) r->realtrans);  if (r->fails)    free((ptr_t) r->fails);  if (r->newlines)    free((ptr_t) r->newlines);}/*Having found the postfix representation of the regular expression,try to find a long sequence of characters that must appear in any linecontaining the r.e.Finding a "longest" sequence is beyond the scope here;we take an easy way out and hope for the best.(Take "(ab|a)b"--please.)We do a bottom-up calculation of sequences of characters that must appearin matches of r.e.'s represented by trees rooted at the nodes of the postfixrepresentation:	sequences that must appear at the left of the match ("left")	sequences that must appear at the right of the match ("right")	lists of sequences that must appear somewhere in the match ("in")	sequences that must constitute the match ("is")When we get to the root of the tree, we use one of the longest of itscalculated "in" sequences as our answer.  The sequence we find is returned inr->must (where "r" is the single argument passed to "regmust");the length of the sequence is returned in r->mustn.The sequences calculated for the various types of node (in pseudo ANSI c)are shown below.  "p" is the operand of unary operators (and the left-handoperand of binary operators); "q" is the right-hand operand of binary operators."ZERO" means "a zero-length sequence" below.Type	left		right		is		in----	----		-----		--		--char c	# c		# c		# c		# cSET	ZERO		ZERO		ZERO		ZEROSTAR	ZERO		ZERO		ZERO		ZEROQMARK	ZERO		ZERO		ZERO		ZEROPLUS	p->left		p->right	ZERO		p->inCAT	(p->is==ZERO)?	(q->is==ZERO)?	(p->is!=ZERO &&	p->in plus	p->left :	q->right :	q->is!=ZERO) ?	q->in plus	p->is##q->left	p->right##q->is	p->is##q->is :	p->right##q->left					ZEROOR	longest common	longest common	(do p->is and	substrings common to	leading		trailing	q->is have same	p->in and q->in	(sub)sequence	(sub)sequence	length and		of p->left	of p->right	content) ?		and q->left	and q->right	p->is : NULL	If there's anything else we recognize in the tree, all four sequences get setto zero-length sequences.  If there's something we don't recognize in the tree,we just return a zero-length sequence.Break ties in favor of infrequent letters (choosing 'zzz' in preference to'aaa')?And. . .is it here or someplace that we might ponder "optimizations" such as	egrep 'psi|epsilon'	->	egrep 'psi'	egrep 'pepsi|epsilon'	->	egrep 'epsi'					(Yes, we now find "epsi" as a "string					that must occur", but we might also					simplify the *entire* r.e. being sought)	grep '[c]'		->	grep 'c'	grep '(ab|a)b'		->	grep 'ab'	grep 'ab*'		->	grep 'a'	grep 'a*b'		->	grep 'b'There are several issues:	Is optimization easy (enough)?	Does optimization actually accomplish anything,	or is the automaton you get from "psi|epsilon" (for example)	the same as the one you get from "psi" (for example)?	Are optimizable r.e.'s likely to be used in real-life situations	(something like 'ab*' is probably unlikely; something like is	'psi|epsilon' is likelier)?*/static char *icatalloc(old, new)char *	old;const char *	new;{	register char *	result;	register int	oldsize, newsize;	newsize = (new == NULL) ? 0 : strlen(new);	if (old == NULL)		oldsize = 0;	else if (newsize == 0)		return old;	else	oldsize = strlen(old);	if (old == NULL)		result = (char *) malloc(newsize + 1);	else	result = (char *) realloc((void *) old, oldsize + newsize + 1);	if (result != NULL && new != NULL)		(void) strcpy(result + oldsize, new);	return result;}static char *icpyalloc(string)const char *	string;{	return icatalloc((char *) NULL, string);}static char *istrstr(lookin, lookfor)char *		lookin;register char *	lookfor;{	register char *	cp;	register int	len;	len = strlen(lookfor);	for (cp = lookin; *cp != '\0'; ++cp)		if (strncmp(cp, lookfor, len) == 0)			return cp;	return NULL;}static voidifree(cp)char *	cp;{	if (cp != NULL)		free(cp);}static voidfreelist(cpp)register char **	cpp;{	register int	i;	if (cpp == NULL)		return;	for (i = 0; cpp[i] != NULL; ++i) {		free(cpp[i]);		cpp[i] = NULL;	}}static char **enlist(cpp, new, len)register char **	cpp;register char *		new;#ifdef __STDC__size_t                  len;#elseint                     len;#endif{	register int	i, j;	if (cpp == NULL)		return NULL;	if ((new = icpyalloc(new)) == NULL) {		freelist(cpp);		return NULL;	}	new[len] = '\0';	/*	** Is there already something in the list that's new (or longer)?	*/	for (i = 0; cpp[i] != NULL; ++i)		if (istrstr(cpp[i], new) != NULL) {			free(new);			return cpp;		}	/*	** Eliminate any obsoleted strings.	*/	j = 0;	while (cpp[j] != NULL)		if (istrstr(new, cpp[j]) == NULL)			++j;		else {			free(cpp[j]);			if (--i == j)				break;			cpp[j] = cpp[i];		}	/*	** Add the new string.	*/	cpp = (char **) realloc((char *) cpp, (i + 2) * sizeof *cpp);	if (cpp == NULL)		return NULL;	cpp[i] = new;	cpp[i + 1] = NULL;	return cpp;}/*** Given pointers to two strings,** return a pointer to an allocated list of their distinct common substrings.** Return NULL if something seems wild.*/static char **comsubs(left, right)char *	left;char *	right;{	register char **	cpp;	register char *		lcp;	register char *		rcp;	register int		i, len;	if (left == NULL || right == NULL)		return NULL;	cpp = (char **) malloc(sizeof *cpp);	if (cpp == NULL)		return NULL;	cpp[0] = NULL;	for (lcp = left; *lcp != '\0'; ++lcp) {		len = 0;		rcp = strchr(right, *lcp);		while (rcp != NULL) {			for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)				;			if (i > len)				len = i;			rcp = strchr(rcp + 1, *lcp);		}		if (len == 0)			continue;#ifdef __STDC__		if ((cpp = enlist(cpp, lcp, (size_t)len)) == NULL)#else		if ((cpp = enlist(cpp, lcp, len)) == NULL)#endif			break;	}	return cpp;}static char **addlists(old, new)char **	old;char **	new;{	register int	i;	if (old == NULL || new == NULL)		return NULL;	for (i = 0; new[i] != NULL; ++i) {		old = enlist(old, new[i], strlen(new[i]));		if (old == NULL)			break;	}	return old;}/*** Given two lists of substrings,** return a new list giving substrings common to both.*/static char **inboth(left, right)char **	left;char **	right;{	register char **	both;	register char **	temp;	register int		lnum, rnum;	if (left == NULL || right == NULL)		return NULL;	both = (char **) malloc(sizeof *both);	if (both == NULL)		return NULL;	both[0] = NULL;	for (lnum = 0; left[lnum] != NULL; ++lnum) {		for (rnum = 0; right[rnum] != NULL; ++rnum) {			temp = comsubs(left[lnum], right[rnum]);			if (temp == NULL) {				freelist(both);				return NULL;			}			both = addlists(both, temp);			freelist(temp);			if (both == NULL)				return NULL;		}	}	return both;}/*typedef struct {	char **	in;	char *	left;	char *	right;	char *	is;} must; */static voidresetmust(mp)register must *	mp;{	mp->left[0] = mp->right[0] = mp->is[0] = '\0';	freelist(mp->in);}static voidregmust(r)register struct regexp *	r;{	register must *		musts;	register must *		mp;	register char *		result = "";	register int		ri;	register int		i;	register _token		t;	static must		must0;	reg->mustn = 0;	reg->must[0] = '\0';	musts = (must *) malloc((reg->tindex + 1) * sizeof *musts);	if (musts == NULL)		return;	mp = musts;	for (i = 0; i <= reg->tindex; ++i)		mp[i] = must0;	for (i = 0; i <= reg->tindex; ++i) {		mp[i].in = (char **) malloc(sizeof *mp[i].in);		mp[i].left = malloc(2);		mp[i].right = malloc(2);		mp[i].is = malloc(2);		if (mp[i].in == NULL || mp[i].left == NULL ||			mp[i].right == NULL || mp[i].is == NULL)				goto done;		mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0';		mp[i].in[0] = NULL;	}	for (ri = 0; ri < reg->tindex; ++ri) {		switch (t = reg->tokens[ri]) {		case _ALLBEGLINE:		case _ALLENDLINE:		case _LPAREN:		case _RPAREN:			goto done;		/* "cannot happen" */		case _EMPTY:		case _BEGLINE:		case _ENDLINE:		case _BEGWORD:		case _ENDWORD:		case _LIMWORD:		case _NOTLIMWORD:		case _BACKREF:			resetmust(mp);			break;		case _STAR:		case _QMARK:			if (mp <= musts)				goto done;	/* "cannot happen" */			--mp;			resetmust(mp);			break;		case _OR:			if (mp < &musts[2])				goto done;	/* "cannot happen" */			{				register char **	new;				register must *		lmp;				register must *		rmp;				register int		j, ln, rn, n;				rmp = --mp;				lmp = --mp;				/* Guaranteed to be.  Unlikely, but. . . */				if (strcmp(lmp->is, rmp->is) != 0)					lmp->is[0] = '\0';				/* Left side--easy */				i = 0;				while (lmp->left[i] != '\0' &&					lmp->left[i] == rmp->left[i])						++i;				lmp->left[i] = '\0';				/* Right side */				ln = strlen(lmp->right);				rn = strlen(rmp->right);				n = ln;				if (n > rn)					n = rn;				for (i = 0; i < n; ++i)					if (lmp->right[ln - i - 1] !=					    rmp->right[rn - i - 1])						break;				for (j = 0; j < i; ++j)					lmp->right[j] =						lmp->right[(ln - i) + j];				lmp->right[j] = '\0';				new = inboth(lmp->in, rmp->in);				if (new == NULL)					goto done;				freelist(lmp->in);				free((char *) lmp->in);				lmp->in = new;			}			break;		case _PLUS:			if (mp <= musts)				goto done;	/* "cannot happen" */			--mp;			mp->is[0] = '\0';			break;		case _END:			if (mp != &musts[1])				goto done;	/* "cannot happen" */			for (i = 0; musts[0].in[i] != NULL; ++i)				if (strlen(musts[0].in[i]) > strlen(result))					result = musts[0].in[i];			goto done;		case _CAT:			if (mp < &musts[2])				goto done;	/* "cannot happen" */			{				register must *	lmp;				register must *	rmp;				rmp = --mp;				lmp = --mp;				/*				** In.  Everything in left, plus everything in				** right, plus catenation of				** left's right and right's left.				*/				lmp->in = addlists(lmp->in, rmp->in);				if (lmp->in == NULL)					goto done;				if (lmp->right[0] != '\0' &&					rmp->left[0] != '\0') {						register char *	tp;						tp = icpyalloc(lmp->right);						if (tp == NULL)							goto done;						tp = icatalloc(tp, rmp->left);						if (tp == NULL)							goto done;						lmp->in = enlist(lmp->in, tp,							strlen(tp));						free(tp);						if (lmp->in == NULL)							goto done;				}				/* Left-hand */				if (lmp->is[0] != '\0') {					lmp->left = icatalloc(lmp->left,						rmp->left);					if (lmp->left == NULL)						goto done;				}				/* Right-hand */				if (rmp->is[0] == '\0')					lmp->right[0] = '\0';				lmp->right = icatalloc(lmp->right, rmp->right);				if (lmp->right == NULL)					goto done;				/* Guaranteed to be */				if (lmp->is[0] != '\0' && rmp->is[0] != '\0') {					lmp->is = icatalloc(lmp->is, rmp->is);					if (lmp->is == NULL)						goto done;				}			}			break;		default:			if (t < _END) {				/* "cannot happen" */				goto done;			} else if (t == '\0') {				/* not on *my* shift */				goto done;			} else if (t >= _SET) {				/* easy enough */				resetmust(mp);			} else {				/* plain character */				resetmust(mp);				mp->is[0] = mp->left[0] = mp->right[0] = t;				mp->is[1] = mp->left[1] = mp->right[1] = '\0';				mp->in = enlist(mp->in, mp->is, 1);				if (mp->in == NULL)					goto done;			}			break;		}		++mp;	}done:	(void) strncpy(reg->must, result, MUST_MAX - 1);	reg->must[MUST_MAX - 1] = '\0';	reg->mustn = strlen(reg->must);	mp = musts;	for (i = 0; i <= reg->tindex; ++i) {		freelist(mp[i].in);		ifree((char *) mp[i].in);		ifree(mp[i].left);		ifree(mp[i].right);		ifree(mp[i].is);	}	free((char *) mp);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -