📄 re.c

📁 早期freebsd实现
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*************************************************************************** * This program is Copyright (C) 1986, 1987, 1988 by Jonathan Payne.  JOVE * * is provided to you without charge, and with no warranty.  You may give  * * away copies of JOVE, including sources, provided that this notice is    * * included in all the files.                                              * ***************************************************************************//* search package */#include "jove.h"#include "re.h"#include "ctype.h"private void	search proto((int, bool, bool));private int	do_comp proto((struct RE_block *,int));private char	searchstr[128];		/* global search string */char	rep_search[128],	/* replace search string */	rep_str[128];		/* contains replacement string */bool	CaseIgnore = OFF,	/* ignore case? */	WrapScan = OFF,		/* wrap at end of buffer? */	UseRE = OFF;		/* use regular expressions */#define cind_cmp(a, b)	(CharUpcase(a) == CharUpcase(b))private int	REpeekc;private char	*REptr;private intREgetc(){	int	c;	if ((c = REpeekc) != -1)		REpeekc = -1;	else if (*REptr)		c = *REptr++;	else		c = '\0';	return c;}#define STAR 	01	/* Match any number of last RE. */#define AT_BOL	2	/* ^ */#define AT_EOL	4	/* $ */#define AT_BOW	6	/* \< */#define AT_EOW	8	/* \> */#define OPENP	10	/* \( */#define CLOSEP	12	/* \) */#define CURLYB	14	/* \{ */#define NOSTR	14	/* Codes <= NOSTR can't be *'d. */#define ANYC	(NOSTR+2)		/* . */#define NORMC	(ANYC+2)		/* normal character */#define CINDC	(NORMC+2)		/* case independent character */#define ONE_OF	(CINDC+2)		/* [xxx] */#define NONE_OF	(ONE_OF+2)	/* [^xxx] */#define BACKREF	(NONE_OF+2)	/* \# */#define EOP	(BACKREF+2)	/* end of pattern *//* ONE_OF/NONE_OF is represented as a bit vector. * These symbols parameterize the representation. */#define	BYTESIZE	8#define	SETSIZE		(NCHARS / BYTESIZE)#define	SETBYTE(c)	((c) / BYTESIZE)#define	SETBIT(c)	(1 << ((c) % BYTESIZE))#define NPAR	10	/* [0-9] - 0th is the entire matched string, i.e. & */private char	*comp_ptr,		**alt_p,		**alt_endp;voidREcompile(pattern, re, re_blk)char	*pattern;bool	re;struct RE_block	*re_blk;{	REptr = pattern;	REpeekc = -1;	comp_ptr = re_blk->r_compbuf;	alt_p = re_blk->r_alternates;	alt_endp = alt_p + NALTS;	*alt_p++ = comp_ptr;	re_blk->r_nparens = 0;	(void) do_comp(re_blk, re ? OKAY_RE : NORM);	*alt_p = NULL;	re_blk->r_anchored = NO;	re_blk->r_firstc = '\0';	/* do a little post processing */	if (re_blk->r_alternates[1] == NULL) {		char	*p;		p = re_blk->r_alternates[0];		for (;;) {			switch (*p) {			case OPENP:			case CLOSEP:				p += 2;				continue;			case AT_BOW:			case AT_EOW:				p += 1;				continue;			case AT_BOL:				re_blk->r_anchored = YES;				/* don't set firstc -- won't work */				break;			case NORMC:			case CINDC:				re_blk->r_firstc = CharUpcase(p[2]);				break;			default:				break;			}			break;		}	}}/* compile the pattern into an internal code */private intdo_comp(re_blk, kind)struct RE_block	*re_blk;int	kind;{	char	*this_verb,		*prev_verb,		*start_p,		*comp_endp;	int	parens[NPAR],		*parenp,		c,		ret_code;	parenp = parens;	this_verb = NULL;	ret_code = 1;	comp_endp = &re_blk->r_compbuf[COMPSIZE - 6];	/* wrap the whole expression around (implied) parens */	if (kind == OKAY_RE) {		*comp_ptr++ = OPENP;		*comp_ptr++ = re_blk->r_nparens;		*parenp++ = re_blk->r_nparens++;	}	start_p = comp_ptr;	while ((c = REgetc()) != '\0') {		if (comp_ptr > comp_endp) {toolong:			complain("Search string too long/complex.");		}		prev_verb = this_verb;		this_verb = comp_ptr;		if (kind == NORM && strchr(".[*", c) != NULL)			goto defchar;		switch (c) {		case '\\':			switch (c = REgetc()) {			case '\0':				complain("[Premature end of pattern]");				/*NOTREACHED*/			case '{':			    {				char	*wcntp;		/* word count */				*comp_ptr++ = CURLYB;				wcntp = comp_ptr;				*comp_ptr++ = 0;				for (;;) {					int	comp_val;					char	*comp_len;					comp_len = comp_ptr++;					comp_val = do_comp(re_blk, IN_CB);					*comp_len = comp_ptr - comp_len;					(*wcntp) += 1;					if (comp_val == 0)						break;				}				break;			    }			case '}':				if (kind != IN_CB)					complain("Unexpected \\}.");				ret_code = 0;				goto outahere;			case '(':				if (re_blk->r_nparens >= NPAR)					complain("Too many ('s; max is %d.", NPAR);				*comp_ptr++ = OPENP;				*comp_ptr++ = re_blk->r_nparens;				*parenp++ = re_blk->r_nparens++;				break;			case ')':				if (parenp == parens)					complain("Too many )'s.");				*comp_ptr++ = CLOSEP;				*comp_ptr++ = *--parenp;				break;			case '|':				if (alt_p >= alt_endp)					complain("Too many alternates; max %d.", NALTS);				/* close off previous alternate */				*comp_ptr++ = CLOSEP;				*comp_ptr++ = *--parenp;				*comp_ptr++ = EOP;				*alt_p++ = comp_ptr;				/* start a new one */				re_blk->r_nparens = 0;				*comp_ptr++ = OPENP;				*comp_ptr++ = re_blk->r_nparens;				*parenp++ = re_blk->r_nparens++;				start_p = comp_ptr;				break;			case '1':			case '2':			case '3':			case '4':			case '5':			case '6':			case '7':			case '8':			case '9':				*comp_ptr++ = BACKREF;				*comp_ptr++ = c - '0';				break;			case '<':				*comp_ptr++ = AT_BOW;				break;			case '>':				*comp_ptr++ = AT_EOW;				break;			default:				goto defchar;			}			break;		case ',':			if (kind != IN_CB)				goto defchar;			goto outahere;		case '.':			*comp_ptr++ = ANYC;			break;		case '^':			if (comp_ptr == start_p) {				*comp_ptr++ = AT_BOL;				break;			}			goto defchar;		case '$':			if ((REpeekc = REgetc()) != '\0' && REpeekc != '\\')				goto defchar;			*comp_ptr++ = AT_EOL;			break;		case '[':		    {			int	chrcnt;			*comp_ptr++ = ONE_OF;			if (comp_ptr + SETSIZE >= comp_endp)				goto toolong;			byte_zero(comp_ptr, (size_t) SETSIZE);			if ((REpeekc = REgetc()) == '^') {				*this_verb = NONE_OF;				/* Get it for real this time. */				(void) REgetc();			}			chrcnt = 0;			while ((c = REgetc()) != ']' && c != '\0') {				if (c == '\\') {					c = REgetc();					if (c == '\0')						break;				} else if ((REpeekc = REgetc()) == '-') {					int	i;					i = c;					(void) REgetc();     /* reread '-' */					c = REgetc();					if (c == '\0')						break;					while (i < c) {						comp_ptr[SETBYTE(i)] |= SETBIT(i);						i += 1;					}				}				comp_ptr[SETBYTE(c)] |= SETBIT(c);				chrcnt += 1;			}			if (c == '\0')				complain("Missing ].");			if (chrcnt == 0)				complain("Empty [].");			comp_ptr += SETSIZE;			break;		    }		case '*':			if (prev_verb == NULL || *prev_verb <= NOSTR || (*prev_verb&STAR)!=0)				goto defchar;			if (*prev_verb == NORMC || *prev_verb == CINDC) {				char	lastc = comp_ptr[-1];				/* The * operator applies only to the				 * previous character.  Since we were				 * building a string-matching command				 * (NORMC or CINDC), we must split it				 * up and work with the last character.				 *				 * Note that the STARed versions of these				 * commands do not operate on strings, and				 * so do not need or have character counts.				 */				if (prev_verb[1] == 1) {					/* Only one char in string:					 * delete old command.					 */					this_verb = prev_verb;				} else {					/* Several chars in string:					 * strip off the last.					 * New verb is derived from old.					 */					prev_verb[1] -= 1;					this_verb -= 1;					*this_verb = *prev_verb;				}				comp_ptr = this_verb + 1;				*comp_ptr++ = lastc;			} else {				/* This command is just the previous one,				 * whose verb we will modify.				 */				this_verb = prev_verb;			}			*this_verb |= STAR;			break;		default:defchar:			if ((prev_verb == NULL) ||			    !(*prev_verb == NORMC || *prev_verb == CINDC)) {				/* create new string command */				*comp_ptr++ = (CaseIgnore) ? CINDC : NORMC;				*comp_ptr++ = 0;			} else {				/* merge this into previous string command */				this_verb = prev_verb;			}			this_verb[1] += 1;			*comp_ptr++ = c;			break;		}	}outahere:	/* End of pattern, let's do some error checking. */	if (kind == OKAY_RE) {		*comp_ptr++ = CLOSEP;		*comp_ptr++ = *--parenp;	}	if (parenp != parens)		complain("Unmatched ()'s.");	if (kind == IN_CB && c == '\0')	/* end of pattern with missing \}. */		complain("Missing \\}.");	*comp_ptr++ = EOP;	return ret_code;}private char	*pstrtlst[NPAR],	/* index into re_blk->r_lbuf */		*pendlst[NPAR],		*REbolp,	/* begining-of-line pointer */		*locrater,	/* roof of last substitution */		*loc1,	/* start of matched text */		*loc2;	/* roof of matched text */int	REbom,		/* beginning and end columns of match */	REeom,	REdelta;	/* increase in line length due to last re_dosub */private boolbackref(n, linep)int	n;register char	*linep;{	register char	*backsp,			*backep;	backsp = pstrtlst[n];	backep = pendlst[n];	while (*backsp++ == *linep++)		if (backsp >= backep)			return YES;	return NO;}private boolmember(comp_ptr, c, af)register char	*comp_ptr;register int	c;bool		af;{	if (c == '\0')		return NO;	/* try to match EOL always fails */	if (comp_ptr[SETBYTE(c)] & SETBIT(c))		return af;	return !af;}private boolREmatch(linep, comp_ptr)register char	*linep,		*comp_ptr;{	char	*first_p;	register int	n;	for (;;) switch (*comp_ptr++) {	case NORMC:		n = *comp_ptr++;		while (--n >= 0)			if (*linep++ != *comp_ptr++)				return NO;		continue;	case CINDC:	/* case independent comparison */		n = *comp_ptr++;		while (--n >= 0)			if (!cind_cmp(*linep++, *comp_ptr++))				return NO;		continue;	case EOP:		loc2 = linep;		REeom = (loc2 - REbolp);		return YES;	/* Success! */	case AT_BOL:		if (linep == REbolp && linep != locrater)			continue;		return NO;	case AT_EOL:		if (*linep == '\0')			continue;		return NO;	case ANYC:		if (*linep++ != '\0')			continue;		return NO;
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -